From d13a76f08a314a8419a32af6ce073083081ea050 Mon Sep 17 00:00:00 2001 From: "Elf M. Sternberg" Date: Thu, 24 Nov 2022 12:56:13 -0800 Subject: [PATCH] Make prepare_pattern more Rust-like. This just removes the layer between `prepare_pattern` and `prepare_pattern_raw`; the function now always returns the allocated vector. Oddly, it wasn't possible to encode this using an Option<> in the `hunt()` function. The `usize` of the scan variable meant that we'd never go below zero legally (and Rust wouldn't let that happen), so the "if we're at zero we have some special cases to check" had to remain here. The C version of this code could say "If this pointer is below the allocated space" which is, to a Rust developer, hella weird (you're literally pointed at memory you don't own!). And despite the allocation, despite the special case checks, this code is *still* twice as fast as its C implementation. --- crates/squozen/Cargo.toml | 4 +++ crates/squozen/bench/bench_patprep.rs | 6 ++-- crates/squozen/src/prepare_pattern.rs | 44 +++++++++++++++------------ 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/crates/squozen/Cargo.toml b/crates/squozen/Cargo.toml index 37c6f5b..75febcb 100644 --- a/crates/squozen/Cargo.toml +++ b/crates/squozen/Cargo.toml @@ -15,3 +15,7 @@ fnmatch-sys = "1.0.0" [[bin]] name = "bench_patprep" path = "bench/bench_patprep.rs" + +[profile.release] +opt-level = 3 + diff --git a/crates/squozen/bench/bench_patprep.rs b/crates/squozen/bench/bench_patprep.rs index 4e09fd0..d3a11b8 100644 --- a/crates/squozen/bench/bench_patprep.rs +++ b/crates/squozen/bench/bench_patprep.rs @@ -1,13 +1,11 @@ -use squozen::prepare_pattern::prepare_pattern_raw; +use squozen::prepare_pattern::prepare_pattern; const COUNT: usize = 5 * 1000 * 1000 * 100; fn main() { let mut end = COUNT; - let mut dest = Vec::::with_capacity(100); while end > 0 { - dest.clear(); - prepare_pattern_raw(b"/foo/bar/whatever[0-9]*", &mut dest); + let _ = prepare_pattern(b"/foo/bar/whatever[0-9]*"); end = end - 1; } } diff --git a/crates/squozen/src/prepare_pattern.rs b/crates/squozen/src/prepare_pattern.rs index e8a0188..1237f17 100644 --- a/crates/squozen/src/prepare_pattern.rs +++ b/crates/squozen/src/prepare_pattern.rs @@ -26,17 +26,13 @@ where } return alt; } +#[derive(Debug, PartialEq)] +pub struct PatternError; -pub fn prepare_pattern(name: &[u8]) -> Vec { - let mut dest = Vec::with_capacity(116); - prepare_pattern_raw(name, &mut dest); - dest -} - -pub fn prepare_pattern_raw(name: &[u8], dest: &mut Vec) { +pub fn prepare_pattern(name: &[u8]) -> Result, PatternError> { let eol = name.len(); if eol == 0 { - panic!("Library error - This function should never be called with an empty string.") + return Err(PatternError); } // After this point, eol always points to the index from where we want to @@ -48,13 +44,15 @@ pub fn prepare_pattern_raw(name: &[u8], dest: &mut Vec) { eol = if eol > 0 { eol - 1 } else { 0 } } + let mut dest = Vec::with_capacity(116); + if eol == 0 { if GLOBCHARS.contains(&name[0]) { dest.push(b'/'); - return; + return Ok(dest); } else { dest.push(name[0]); - return; + return Ok(dest); }; } @@ -69,6 +67,8 @@ pub fn prepare_pattern_raw(name: &[u8], dest: &mut Vec) { } else { dest.extend_from_slice(&name[start..eol + 1]); } + + Ok(dest) } #[cfg(test)] @@ -77,24 +77,28 @@ mod tests { #[test] fn test_patterns() { - assert_eq!(prepare_pattern(b"testing"), b"testing"); - assert_eq!(prepare_pattern(b"t"), b"t"); - assert_eq!(prepare_pattern(b"test*"), b"test"); - assert_eq!(prepare_pattern(b"test*"), b"test"); + assert_eq!(prepare_pattern(b""), Err(PatternError)); + assert_eq!(prepare_pattern(b"testing").unwrap(), b"testing"); + assert_eq!(prepare_pattern(b"t").unwrap(), b"t"); + assert_eq!(prepare_pattern(b"test*").unwrap(), b"test"); + assert_eq!(prepare_pattern(b"test*").unwrap(), b"test"); assert_eq!( - prepare_pattern(b"/foo/bar/whatever[0-9]"), + prepare_pattern(b"/foo/bar/whatever[0-9]").unwrap(), b"/foo/bar/whatever" ); - assert_eq!(prepare_pattern(b"/foo/bar/whatever*[0-9]"), b"/"); + assert_eq!(prepare_pattern(b"/foo/bar/whatever*[0-9]").unwrap(), b"/"); assert_eq!( - prepare_pattern(b"/foo/bar/whatever[0-9]"), + prepare_pattern(b"/foo/bar/whatever[0-9]").unwrap(), b"/foo/bar/whatever" ); assert_eq!( - prepare_pattern(b"/foo/bar/whatever[0-9]*"), + prepare_pattern(b"/foo/bar/whatever[0-9]*").unwrap(), b"/foo/bar/whatever" ); - assert_eq!(prepare_pattern(b"/foo/bar/*whatever[0-9]"), b"whatever"); - assert_eq!(prepare_pattern(b"fooz]"), b"f"); + assert_eq!( + prepare_pattern(b"/foo/bar/*whatever[0-9]").unwrap(), + b"whatever" + ); + assert_eq!(prepare_pattern(b"fooz]").unwrap(), b"f"); } }