Make prepare_pattern more Rust-like.

This just removes the layer between `prepare_pattern` and
`prepare_pattern_raw`; the function now always returns the
allocated vector.

Oddly, it wasn't possible to encode this using an Option<> in the
`hunt()` function.  The `usize` of the scan variable meant that we'd
never go below zero legally (and Rust wouldn't let that happen), so
the "if we're at zero we have some special cases to check" had to
remain here.  The C version of this code could say "If this pointer
is below the allocated space" which is, to a Rust developer, hella
weird (you're literally pointed at memory you don't own!).

And despite the allocation, despite the special case checks, this code
is *still* twice as fast as its C implementation.
This commit is contained in:
Elf M. Sternberg 2022-11-24 12:56:13 -08:00
parent a6d4fda582
commit d13a76f08a
3 changed files with 30 additions and 24 deletions

View File

@ -15,3 +15,7 @@ fnmatch-sys = "1.0.0"
[[bin]]
name = "bench_patprep"
path = "bench/bench_patprep.rs"
[profile.release]
opt-level = 3

View File

@ -1,13 +1,11 @@
use squozen::prepare_pattern::prepare_pattern_raw;
use squozen::prepare_pattern::prepare_pattern;
const COUNT: usize = 5 * 1000 * 1000 * 100;
fn main() {
let mut end = COUNT;
let mut dest = Vec::<u8>::with_capacity(100);
while end > 0 {
dest.clear();
prepare_pattern_raw(b"/foo/bar/whatever[0-9]*", &mut dest);
let _ = prepare_pattern(b"/foo/bar/whatever[0-9]*");
end = end - 1;
}
}

View File

@ -26,17 +26,13 @@ where
}
return alt;
}
#[derive(Debug, PartialEq)]
pub struct PatternError;
pub fn prepare_pattern(name: &[u8]) -> Vec<u8> {
let mut dest = Vec::with_capacity(116);
prepare_pattern_raw(name, &mut dest);
dest
}
pub fn prepare_pattern_raw(name: &[u8], dest: &mut Vec<u8>) {
pub fn prepare_pattern(name: &[u8]) -> Result<Vec<u8>, PatternError> {
let eol = name.len();
if eol == 0 {
panic!("Library error - This function should never be called with an empty string.")
return Err(PatternError);
}
// After this point, eol always points to the index from where we want to
@ -48,13 +44,15 @@ pub fn prepare_pattern_raw(name: &[u8], dest: &mut Vec<u8>) {
eol = if eol > 0 { eol - 1 } else { 0 }
}
let mut dest = Vec::with_capacity(116);
if eol == 0 {
if GLOBCHARS.contains(&name[0]) {
dest.push(b'/');
return;
return Ok(dest);
} else {
dest.push(name[0]);
return;
return Ok(dest);
};
}
@ -69,6 +67,8 @@ pub fn prepare_pattern_raw(name: &[u8], dest: &mut Vec<u8>) {
} else {
dest.extend_from_slice(&name[start..eol + 1]);
}
Ok(dest)
}
#[cfg(test)]
@ -77,24 +77,28 @@ mod tests {
#[test]
fn test_patterns() {
assert_eq!(prepare_pattern(b"testing"), b"testing");
assert_eq!(prepare_pattern(b"t"), b"t");
assert_eq!(prepare_pattern(b"test*"), b"test");
assert_eq!(prepare_pattern(b"test*"), b"test");
assert_eq!(prepare_pattern(b""), Err(PatternError));
assert_eq!(prepare_pattern(b"testing").unwrap(), b"testing");
assert_eq!(prepare_pattern(b"t").unwrap(), b"t");
assert_eq!(prepare_pattern(b"test*").unwrap(), b"test");
assert_eq!(prepare_pattern(b"test*").unwrap(), b"test");
assert_eq!(
prepare_pattern(b"/foo/bar/whatever[0-9]"),
prepare_pattern(b"/foo/bar/whatever[0-9]").unwrap(),
b"/foo/bar/whatever"
);
assert_eq!(prepare_pattern(b"/foo/bar/whatever*[0-9]"), b"/");
assert_eq!(prepare_pattern(b"/foo/bar/whatever*[0-9]").unwrap(), b"/");
assert_eq!(
prepare_pattern(b"/foo/bar/whatever[0-9]"),
prepare_pattern(b"/foo/bar/whatever[0-9]").unwrap(),
b"/foo/bar/whatever"
);
assert_eq!(
prepare_pattern(b"/foo/bar/whatever[0-9]*"),
prepare_pattern(b"/foo/bar/whatever[0-9]*").unwrap(),
b"/foo/bar/whatever"
);
assert_eq!(prepare_pattern(b"/foo/bar/*whatever[0-9]"), b"whatever");
assert_eq!(prepare_pattern(b"fooz]"), b"f");
assert_eq!(
prepare_pattern(b"/foo/bar/*whatever[0-9]").unwrap(),
b"whatever"
);
assert_eq!(prepare_pattern(b"fooz]").unwrap(), b"f");
}
}