diff --git a/.gitignore b/.gitignore index 96ef6c0..527985c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,9 @@ -/target +.#* +*~ +*# +*.aux +**/*.rs.bk +target Cargo.lock + +.ccls-cache diff --git a/Cargo.toml b/Cargo.toml index 2a5b8c3..997ec56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,17 +1,17 @@ [package] -name = "mlocate-rs" -version = "0.1.0" +name = "rlocate" +version = "0.0.1" authors = ["Elf M. Sternberg "] -edition = "2018" -license = "MPL-2.0+" -description = "Rust implementation of the Linux mlocate client, with library." -repository = "https://github.com/elfsternberg/mlocate-rs" -readme = "./README.md" +description = "Unix Locate/Updatedb utility" +license = "Apache-2.0 WITH LLVM-exception" +categories = ["coreutil"] +keywords = ["unix", "utility", "cli"] +repository = "https://git.elfsternberg.com/elf/rlocate" +readme = "README.md" +# default-run = "locate" +edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -globset = "0.4.8" -regex = "1.5.4" -clap = "2.33.3" -structview = "1.1.0" + diff --git a/README.md b/README.md index 1c469aa..7af3246 100644 --- a/README.md +++ b/README.md @@ -25,8 +25,6 @@ Right now, none of this works. The _intended_ feature list is: - Can read the following locatedb formats: - MLOCATE - - LOCATE01 - - LOCATE02 - Provides new locatedb formats: - RLOCR01: Directory path prefixes are built by reference, making for a much smaller database. diff --git a/crates/mlocate/Cargo.toml b/crates/mlocate/Cargo.toml new file mode 100644 index 0000000..2a5b8c3 --- /dev/null +++ b/crates/mlocate/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "mlocate-rs" +version = "0.1.0" +authors = ["Elf M. Sternberg "] +edition = "2018" +license = "MPL-2.0+" +description = "Rust implementation of the Linux mlocate client, with library." +repository = "https://github.com/elfsternberg/mlocate-rs" +readme = "./README.md" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +globset = "0.4.8" +regex = "1.5.4" +clap = "2.33.3" +structview = "1.1.0" diff --git a/TODO.md b/crates/mlocate/TODO.md similarity index 78% rename from TODO.md rename to crates/mlocate/TODO.md index 8550e11..4d9c999 100644 --- a/TODO.md +++ b/crates/mlocate/TODO.md @@ -3,4 +3,4 @@ * Implement a "write to rslocate01" feature. * implement a "write to rzlocate01" feature, stealing wildly from dictd. - +* Read up on fanotify for Linux. See what's rustable. diff --git a/crates/mlocate/pop.yaml b/crates/mlocate/pop.yaml new file mode 100644 index 0000000..386acd3 --- /dev/null +++ b/crates/mlocate/pop.yaml @@ -0,0 +1,4 @@ +--- +name: rlocate +description: A version of the locate toolkit, written in rust +keywords: rust, library, suite diff --git a/src/database.rs b/crates/mlocate/src/database.rs similarity index 100% rename from src/database.rs rename to crates/mlocate/src/database.rs diff --git a/crates/mlocate/src/lib.rs b/crates/mlocate/src/lib.rs new file mode 100644 index 0000000..3f0ad99 --- /dev/null +++ b/crates/mlocate/src/lib.rs @@ -0,0 +1,50 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! MLocate +//! +//! The readme has the full explanation, but the `locate` suite of +//! tools present in all Linux distributions is used to locate files +//! on your storage device. Rather than search the device directly, +//! `locate` scans a catalog file created during downtime. +//! +//! `MLocate` is the most popular implementation of the locate system, +//! but it has three annoying flaws: +//! +//! 1. The archive file isn't very compressed. +//! 2. The archive file is always an average of 12 hours out of date. +//! 3. The archive is accessible only through a command line program. +//! +//! This program intends to read one of two different formats, the +//! classic mlocate format, or a new format that exploits a few nifty +//! tricks to try and make the database file smaller and access +//! faster. + +extern crate structview; + +pub mod database; +pub mod mlocate_db; + +use crate::database::LocateDb; +use crate::mlocate_db::MlHeader; + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::File; + use std::io::{BufRead, BufReader}; + + #[test] + fn can_read_header() -> Result<(), String> { + let db = File::open("/var/lib/mlocate/mlocate.db").expect("Unable to open file"); + let mut reader = BufReader::new(db); + match reader.fill_buf() { + Ok(buffer) => { + assert!(MlHeader::is(buffer), "Could not read DB"); + Ok(()) + } + Err(_) => Err("The header could not be read".to_owned()), + } + } +} diff --git a/src/mlocate_db/mod.rs b/crates/mlocate/src/mlocate_db/mod.rs similarity index 100% rename from src/mlocate_db/mod.rs rename to crates/mlocate/src/mlocate_db/mod.rs diff --git a/crates/squozen/Cargo.toml b/crates/squozen/Cargo.toml new file mode 100644 index 0000000..37c6f5b --- /dev/null +++ b/crates/squozen/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "squozen" +description = "Decompressor and search engine for the Squozen format (1983)" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "squozen" +path = "src/lib.rs" + +[dependencies] +fnmatch-sys = "1.0.0" + +[[bin]] +name = "bench_patprep" +path = "bench/bench_patprep.rs" diff --git a/crates/squozen/bench/bench_patprep.rs b/crates/squozen/bench/bench_patprep.rs new file mode 100644 index 0000000..c4fd1f2 --- /dev/null +++ b/crates/squozen/bench/bench_patprep.rs @@ -0,0 +1,11 @@ +use squozen::prepare_pattern::prepare_pattern; + +const COUNT: usize = 5 * 1000 * 1000 * 100; + +fn main() { + let mut end = COUNT; + while end > 0 { + let _g = prepare_pattern(b"/foo/bar/whatever[0-9]*"); + end = end - 1; + } +} diff --git a/crates/squozen/docs/patprep/Makefile b/crates/squozen/docs/patprep/Makefile new file mode 100644 index 0000000..af1b4c6 --- /dev/null +++ b/crates/squozen/docs/patprep/Makefile @@ -0,0 +1,6 @@ + +bench: + gcc -o bench_patprep patprep.c bench_patprep.c + +rust_examples: + gcc -o rust_examples patprep.c gen_rust_examples.c diff --git a/crates/squozen/docs/patprep/bench_patprep.c b/crates/squozen/docs/patprep/bench_patprep.c new file mode 100644 index 0000000..ab36738 --- /dev/null +++ b/crates/squozen/docs/patprep/bench_patprep.c @@ -0,0 +1,9 @@ +#include "patprep.h" + +const int count = 5 * 1000 * 1000 * 1000; + +void main() { + for (int i = 0; i <= count; i++) { + patprep("/foo/bar/whatever[0-9]*"); + } +} diff --git a/crates/squozen/docs/patprep/gen_rust_examples.c b/crates/squozen/docs/patprep/gen_rust_examples.c new file mode 100644 index 0000000..f99e41a --- /dev/null +++ b/crates/squozen/docs/patprep/gen_rust_examples.c @@ -0,0 +1,36 @@ +#include "patprep.h" +#include + +// Below is a series of test cases that were not present in the original. The +// purpose of this function is to output the test cases that will go into the +// rust version of the project, to assert that they behave correctly. + +char *cases[] = {"testing", + "test*", + "/foo/bar/whatever[0-9]", + "/foo/bar/whatever*[0-9]", + "/foo/bar/whatever[0-9]", + "/foo/bar/whatever[0-9]*", + "/foo/bar/*whatever[0-9]", + "fooz]", + NULL}; + +// Since patprep gives us the END of the array, we need to search for +// the beginning. And both ends are null terminated, because Unix. +// There is a ton of undefined behavior here, all of which is predicated +// on never sending patprep something it can't parse, or a zero-length +// string. + +void main() { + int i; + char *c; + char *g; + for (i = 0, c = cases[0]; cases[i] != NULL; ++i, c = cases[i]) { + g = patprep(c); + while (*g != 0) { + g--; + } + g++; + printf("assert_eq!(prepare_pattern(b\"%s\"), b\"%s\");\n", c, g); + } +} diff --git a/crates/squozen/docs/patprep/patprep.c b/crates/squozen/docs/patprep/patprep.c new file mode 100644 index 0000000..ee3f9a2 --- /dev/null +++ b/crates/squozen/docs/patprep/patprep.c @@ -0,0 +1,74 @@ +#include "patprep.h" +#include +#include +#include + +// This is a copy of the patprep() function from the original 1983 version of +// locate.c. I have annotated it heavily in order to document, in my own mind at +// any rate, exactly what it does, step by step. + +// Globfree is limited to 100 characters. +static char globfree[100]; + +char *patprep(name) +char *name; +{ + register char *endmark, *p, *subp; + + subp = globfree; + *subp++ = '\0'; + + // Go to the very end of the string passed in. + p = name + strlen(name) - 1; + + /* starting from the END of the string, skip trailing metacharacters (and + [] ranges) */ + for (; p >= name; p--) + // Index is mis-named; it returns a pointer to the first + // instance of the character '*p' in the content of the + // static string here. In this case, it's saying that + // if there is no metacharacter, break out? + if (index("*?", *p) == 0) + break; + + if (p < name) + p = name; + + // Skip past a range operator. + if (*p == ']') + for (p--; p >= name; p--) + if (*p == '[') { + p--; + break; + } + + if (p < name) + p = name; + + /* + * if pattern has only metacharacters, check every path (force '/' + * search) + */ + + // We got to the start of the string. At least give it an anchoring '/', + // so the matcher has something it can makes sense of. + if ((p == name) && index("?*[]", *p) != 0) + *subp++ = '/'; + else { + // Okay, from where we were, scan backwards until we find another + // metacharacter or the root of the search string, in which case we have + // a literal substring. + for (endmark = p; p >= name; p--) + if (index("]*?", *p) != 0) + break; + + // From the first non-metacharacter found to the first metacharacter (or + // EOL), copy that substring into the 100-byte reserve. + for (++p; (p <= endmark) && subp < (globfree + sizeof(globfree));) + *subp++ = *p++; + } + // Give a null ending. + *subp = '\0'; + // Return a pointer to the last byte of the pattern reserve. + return (--subp); +} diff --git a/crates/squozen/docs/patprep/patprep.h b/crates/squozen/docs/patprep/patprep.h new file mode 100644 index 0000000..085e262 --- /dev/null +++ b/crates/squozen/docs/patprep/patprep.h @@ -0,0 +1 @@ +char *patprep(char *name); diff --git a/crates/squozen/src/lib.rs b/crates/squozen/src/lib.rs new file mode 100644 index 0000000..8fd9eb2 --- /dev/null +++ b/crates/squozen/src/lib.rs @@ -0,0 +1,3 @@ +// pub use crate::codesquoze; +// pub mod squozen; +pub mod prepare_pattern; diff --git a/crates/squozen/src/prepare_pattern.rs b/crates/squozen/src/prepare_pattern.rs new file mode 100644 index 0000000..6966372 --- /dev/null +++ b/crates/squozen/src/prepare_pattern.rs @@ -0,0 +1,92 @@ +const GLOBCHARS: &[u8] = &[b'?', b'*', b'[', b']']; +const GLOBSTARTS: &[u8] = &[b'?', b'*', b']']; + +// prepare_pattern +// +// This functions finds the first substring of characters, starting from the end +// of the search string, that does not contain glob-special characters. It +// returns a vector of those characters for comparison. The test cases have all +// been derived from tests performed on the original 1983 `patprep` function +// found in locate.c. + +// Unlike the original database, we're going to assume that this slice contains +// only the content of the pattern, and no nulls at either end, relying instead +// on Rust's tracking the size of slices internally. + +fn hunt(name: &[u8], end: usize, alt: usize, comp: F) -> usize +where + F: Fn(&u8) -> bool, +{ + let mut p = end; + while p > 0 { + if comp(&name[p]) { + return p; + } + p -= 1; + } + return alt; +} + +pub fn prepare_pattern(name: &[u8]) -> Vec { + let mut eol = name.len(); + if eol == 0 { + panic!("Library error - This function should never be called with an empty string.") + } + + // After this point, eol always points to the index from where we want to + // stop, not to the character beyond that. + + eol = hunt(name, eol - 1, 0, |&c| c != b'*' && c != b'?'); + if name[eol] == b']' { + eol = hunt(&name, eol - 1, 0, |&c| c == b'['); + eol = if eol > 0 { eol - 1 } else { 0 } + } + + if eol == 0 { + return if GLOBCHARS.contains(&name[0]) { + vec![b'/'] + } else { + vec![name[0]] + }; + } + + let start = hunt(&name, eol, 0, |&c| GLOBSTARTS.contains(&c)); + let start = if GLOBSTARTS.contains(&name[start]) { + start + 1 + } else { + start + }; + if start > eol { + vec![b'/'] + } else { + name[start..eol + 1].to_vec() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_patterns() { + assert_eq!(prepare_pattern(b"testing"), b"testing"); + assert_eq!(prepare_pattern(b"t"), b"t"); + assert_eq!(prepare_pattern(b"test*"), b"test"); + assert_eq!(prepare_pattern(b"test*"), b"test"); + assert_eq!( + prepare_pattern(b"/foo/bar/whatever[0-9]"), + b"/foo/bar/whatever" + ); + assert_eq!(prepare_pattern(b"/foo/bar/whatever*[0-9]"), b"/"); + assert_eq!( + prepare_pattern(b"/foo/bar/whatever[0-9]"), + b"/foo/bar/whatever" + ); + assert_eq!( + prepare_pattern(b"/foo/bar/whatever[0-9]*"), + b"/foo/bar/whatever" + ); + assert_eq!(prepare_pattern(b"/foo/bar/*whatever[0-9]"), b"whatever"); + assert_eq!(prepare_pattern(b"fooz]"), b"f"); + } +} diff --git a/src/lib.rs b/src/lib.rs index 3f0ad99..7d12d9a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,50 +1,14 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -//! MLocate -//! -//! The readme has the full explanation, but the `locate` suite of -//! tools present in all Linux distributions is used to locate files -//! on your storage device. Rather than search the device directly, -//! `locate` scans a catalog file created during downtime. -//! -//! `MLocate` is the most popular implementation of the locate system, -//! but it has three annoying flaws: -//! -//! 1. The archive file isn't very compressed. -//! 2. The archive file is always an average of 12 hours out of date. -//! 3. The archive is accessible only through a command line program. -//! -//! This program intends to read one of two different formats, the -//! classic mlocate format, or a new format that exploits a few nifty -//! tricks to try and make the database file smaller and access -//! faster. - -extern crate structview; - -pub mod database; -pub mod mlocate_db; - -use crate::database::LocateDb; -use crate::mlocate_db::MlHeader; +pub fn add(left: usize, right: usize) -> usize { + left + right +} #[cfg(test)] mod tests { use super::*; - use std::fs::File; - use std::io::{BufRead, BufReader}; #[test] - fn can_read_header() -> Result<(), String> { - let db = File::open("/var/lib/mlocate/mlocate.db").expect("Unable to open file"); - let mut reader = BufReader::new(db); - match reader.fill_buf() { - Ok(buffer) => { - assert!(MlHeader::is(buffer), "Could not read DB"); - Ok(()) - } - Err(_) => Err("The header could not be read".to_owned()), - } + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); } }