diff --git a/.gitignore b/.gitignore index 527985c..5a63621 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,6 @@ **/*.rs.bk target Cargo.lock - +crates/squozen/docs/patprep/bench_patprep +crates/squozen/docs/patprep/rust_examples .ccls-cache diff --git a/Cargo.toml b/Cargo.toml index 997ec56..21e72f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,4 +14,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +libc = "0.2.137" diff --git a/crates/squozen/Cargo.toml b/crates/squozen/Cargo.toml index 75febcb..e57412c 100644 --- a/crates/squozen/Cargo.toml +++ b/crates/squozen/Cargo.toml @@ -11,6 +11,7 @@ path = "src/lib.rs" [dependencies] fnmatch-sys = "1.0.0" +libc = "0.2.137" [[bin]] name = "bench_patprep" diff --git a/crates/squozen/src/lib.rs b/crates/squozen/src/lib.rs index 8fd9eb2..9b7e5ed 100644 --- a/crates/squozen/src/lib.rs +++ b/crates/squozen/src/lib.rs @@ -1,3 +1,5 @@ // pub use crate::codesquoze; -// pub mod squozen; +extern crate libc; + pub mod prepare_pattern; +pub mod squozen; diff --git a/crates/squozen/src/prepare_pattern.rs b/crates/squozen/src/prepare_pattern.rs index 1237f17..07f186c 100644 --- a/crates/squozen/src/prepare_pattern.rs +++ b/crates/squozen/src/prepare_pattern.rs @@ -13,6 +13,13 @@ const GLOBSTARTS: &[u8] = &[b'?', b'*', b']']; // only the content of the pattern, and no nulls at either end, relying instead // on Rust's tracking the size of slices internally. +// One of the biggest changes between this and the original is that the original +// used "not pointing within the legal slice" as a sentinel for exceeding the +// bounds of the searchable space; since Rust doesn't allow that, we have to +// test ahead of time if the condition in which the original would have exceeded +// the legal search space is met, and short-circuit the return value at that +// point. + fn hunt(name: &[u8], end: usize, alt: usize, comp: F) -> usize where F: Fn(&u8) -> bool, diff --git a/crates/squozen/src/squozen.rs b/crates/squozen/src/squozen.rs new file mode 100644 index 0000000..524020e --- /dev/null +++ b/crates/squozen/src/squozen.rs @@ -0,0 +1,106 @@ +use libc; +use std::fs::File; +use std::io::{Bufreader, Bytes}; +use std::path::Path; + +const PARITY: u8 = 0o200; +const RECORD_SEPARATOR: u8 = 30; +const OFFSET: u8 = 14; +const BIGRAMS: usize = BIGRAMS; + +#[derive(Clone, Debug)] +pub struct Squozen { + bigrams: [char; BIGRAMS], + path: Path, +} + +pub impl Squozen { + pub fn new(filename: Path) -> Result { + let mut dbfile = File::open(filename)?; + let mut db = Bufreader::new(dbfile); + let mut bigrams: [char; BIGRAMS] = [0; BIGRAMS]; + db.read_exact(&mut bigrams)?; + Ok(Squozen { + bigrams, + path: filename.clone(), + }) + } + + pub fn paths(&mut self) -> StoredPath { + StoredPath::new(&self); + } + + /* + pub fn iter(&mut self) -> StoredPath { + FoundPath::new(&self); + } + */ +} + +pub struct StoredPath<'a> { + source: Squozen, + path: [char; libc::PATH_MAX], + db: Bytes, + ch: u8, + last: usize, + found: bool, +} + +pub impl<'a> StoredPath { + pub fn new(squozen: Squozen) -> StoredPath { + let mut dbfile = File::open(&squozen.path)?; + let mut dbbuffer = Bufreader::new(dbfile); + dbbuffer.seek(BIGRAMS); + let mut db = dbbuffer.bytes(); + let mut ch = db.next()?; + StoredPath { + squozen, + db, + ch, + path: [0; libc::PATH_MAX], + last: 0, + } + } + + // Note that in either case, the file pointer will be pointing at the first + // valid character of the database, or it will be over. + pub fn getw(&mut self) -> Result { + let ch1 = self.db.next()?; + let ch2 = self.db.next()?; + Ok(u16::from_le_bytes(&[ch1, ch2])) + } + + pub fn get_offset(&mut self) -> Result { + if self.ch == RECORD_SEPARATOR { + let offset = self.getw()?; + Ok(usize::from(offset)) + } else { + Ok(usize::from(self.ch)) + } + } +} + +pub impl<'a> Iterator for StoredPath { + type Item = &[char; libc::PATH_MAX]; + + fn next(&mut self) -> Option<&Self::Item> { + let offset = self.get_offset(); + let position = 0; + loop { + self.ch = self.db.next()?; + if self.ch <= RECORD_SEPARATOR { + break; + } + if self.ch < PARITY { + self.path[self.last + position] = ch; + position += 1; + } else { + let bg = self.ch & PARITY - 1; + self.path[self.last + position] = self.squozen.bigrams[bg * 2]; + self.path[self.last + position + 1] = self.squozen.bigrams[bg * 2 + 1]; + position += 2; + } + } + Some(&self.path) + } +} diff --git a/docs/NOTES.md b/docs/NOTES.md new file mode 100644 index 0000000..50a1f28 --- /dev/null +++ b/docs/NOTES.md @@ -0,0 +1,3 @@ +Errors found: + +- Unparsable/unusable search pattern