rlocate/crates/squozen/src/prepare_pattern.rs

112 lines
3.4 KiB
Rust

const GLOBCHARS: &[u8] = &[b'?', b'*', b'[', b']'];
const GLOBSTARTS: &[u8] = &[b'?', b'*', b']'];
// prepare_pattern
//
// This functions finds the first substring of characters, starting from the end
// of the search string, that does not contain glob-special characters. It
// returns a vector of those characters for comparison. The test cases have all
// been derived from tests performed on the original 1983 `patprep` function
// found in locate.c.
// Unlike the original database, we're going to assume that this slice contains
// only the content of the pattern, and no nulls at either end, relying instead
// on Rust's tracking the size of slices internally.
// One of the biggest changes between this and the original is that the original
// used "not pointing within the legal slice" as a sentinel for exceeding the
// bounds of the searchable space; since Rust doesn't allow that, we have to
// test ahead of time if the condition in which the original would have exceeded
// the legal search space is met, and short-circuit the return value at that
// point.
fn hunt<F>(name: &[u8], end: usize, alt: usize, comp: F) -> usize
where
F: Fn(&u8) -> bool,
{
let mut p = end;
while p > 0 {
if comp(&name[p]) {
return p;
}
p -= 1;
}
return alt;
}
#[derive(Debug, PartialEq)]
pub struct PatternError;
pub fn prepare_pattern(name: &[u8]) -> Result<Vec<u8>, PatternError> {
let eol = name.len();
if eol == 0 {
return Err(PatternError);
}
// After this point, eol always points to the index from where we want to
// stop, not to the character beyond that.
let mut eol = hunt(name, eol - 1, 0, |&c| c != b'*' && c != b'?');
if name[eol] == b']' {
eol = hunt(&name, eol - 1, 0, |&c| c == b'[');
eol = if eol > 0 { eol - 1 } else { 0 }
}
let mut dest = Vec::with_capacity(116);
if eol == 0 {
if GLOBCHARS.contains(&name[0]) {
dest.push(b'/');
return Ok(dest);
} else {
dest.push(name[0]);
return Ok(dest);
};
}
let start = hunt(&name, eol, 0, |&c| GLOBSTARTS.contains(&c));
let start = if GLOBSTARTS.contains(&name[start]) {
start + 1
} else {
start
};
if start > eol {
dest.push(b'/');
} else {
dest.extend_from_slice(&name[start..eol + 1]);
}
Ok(dest)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_patterns() {
assert_eq!(prepare_pattern(b""), Err(PatternError));
assert_eq!(prepare_pattern(b"testing").unwrap(), b"testing");
assert_eq!(prepare_pattern(b"t").unwrap(), b"t");
assert_eq!(prepare_pattern(b"test*").unwrap(), b"test");
assert_eq!(prepare_pattern(b"test*").unwrap(), b"test");
assert_eq!(
prepare_pattern(b"/foo/bar/whatever[0-9]").unwrap(),
b"/foo/bar/whatever"
);
assert_eq!(prepare_pattern(b"/foo/bar/whatever*[0-9]").unwrap(), b"/");
assert_eq!(
prepare_pattern(b"/foo/bar/whatever[0-9]").unwrap(),
b"/foo/bar/whatever"
);
assert_eq!(
prepare_pattern(b"/foo/bar/whatever[0-9]*").unwrap(),
b"/foo/bar/whatever"
);
assert_eq!(
prepare_pattern(b"/foo/bar/*whatever[0-9]").unwrap(),
b"whatever"
);
assert_eq!(prepare_pattern(b"fooz]").unwrap(), b"f");
}
}