commit c66875e3676a84d1c4912856edac71856633e764 Author: Elf M. Sternberg Date: Sun Nov 10 13:02:44 2024 -0800 Committing and storing the work. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..d739052 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,32 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "check-bol" +version = "0.1.0" +dependencies = [ + "nom", +] + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..b19c622 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "check-bol" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +nom = "7.1.3" diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..caacade --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Elf M. Sternberg + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5b1c871 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +# Rust beginning of line match with NOM + +This is an experiment to demonstrate that it is possible to detect "the beginning of a line" using +Nom in Rust, although it's a bit harder than it looks. Essentially we can only match "content that +includes a beginning-of-line marker," which could be another beginning-of-line marker or any legal +Nom parser. + +It's just an experiment, meant to expand my knowledge. Not a big deal. + +# LICENSE + +[MIT License](./LICENSE.md) diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..8347e71 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,151 @@ +use nom::{ + bytes::complete::{tag, take_while}, + combinator::recognize, + sequence::{pair, preceded}, + IResult, +}; + +/** + * Using Rust Nom, show how to detect "content that begins at the start of a line." This particular + * variant rolls forward until it finds any content *other than* the start of a line, which is + * defined as "the input token after any \n". + * + */ +fn is_beginning_of_line(input: &str) -> IResult<&str, &str> { + if input.is_empty() { + // It took me an absolutely ridiculous amount of time to find a simple "how do you construct + // a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld) + // (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered + // to answer that question, and being the *first* one with an example that wasn't "How to + // write a custom Nom error" or "How to handle errors in Nom." + return Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Eof, + ))); + } + + let (remaining, recognized) = recognize(preceded( + take_while(|c| c == '\n'), + // The empty string tag always succeeds without consuming any of the input. So we're + // skipping the start-of-line markers before "trivially" succeeding, always successfully + // checking the empty token, so we don't actually consume the next token. + tag(""), + ))(input)?; + + // Remaining is now pointing to the first token *after* the line feed, which could be literally + // where the input started, or remaining is where the input needs to be next *after* the input + // started with "\n". (I.e, we rolled forward one or more "\n"'s, but didn't meet anything else + // so we can't be anywhere but at column 0). + if std::ptr::eq(input, remaining) || input.starts_with('\n') { + Ok((remaining, recognized)) + } else { + // I wasn't really sure what error to return here. This is one of those parsers you'll + // probably use in an alt or something and we're consuming a hazy concept of end-of-line + // until we're sure we're at the beginning of a line. "Tag" was the closest thing that + // seemed right. + Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))) + } +} + +// And this is how you use it; you're looking for *content* at the *start* of the line, not the +// start itself. +fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> { + preceded( + is_beginning_of_line, + tag("BEGIN"), + )(input) +} + +fn pattern_at_beginning_after_blank_line(input: &str) -> IResult<&str, &str> { + let (remaining, (recognized, rest)) = pair( + is_beginning_of_line, + tag("BEGIN"))(input)?; + if recognized.chars().filter(|c| *c == '\n').count() < 2 { + Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))) + } else { + Ok((remaining, rest)) + } +} + + +fn main() { + let input = "\nBEGINThis is where your input will be next."; + // Note that `pattern_at_beginning_of_line` matches the parser *after* `is_beginning_of_line`. + // Using `preceded` (above) throws out all the line feeds. + match pattern_at_beginning_of_line(input) { + Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining), + Err(err) => println!("Error: {:?}", err), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn predicate_empty_in_not_sol() { + let result = is_beginning_of_line(""); + assert!(result.is_err()); + } + + #[test] + fn predicate_sol() { + let result = is_beginning_of_line("\n"); + assert!(result.is_ok()); + } + + #[test] + fn predicate_not_sol() { + let result = is_beginning_of_line("TEST"); + assert!(result.is_ok()); + let (remaining, _) = result.unwrap(); + assert_eq!(remaining, "TEST"); + } + + #[test] + fn predicate_more_than_sol() { + let result = is_beginning_of_line("\nTEST"); + assert!(result.is_ok()); + let (remaining, _) = result.unwrap(); + assert_eq!(remaining, "TEST"); + } + + #[test] + fn sample_test() { + let result = pattern_at_beginning_of_line("BEGIN: the rest"); + assert!(result.is_ok()); + let (remaining, input) = result.unwrap(); + println!("{:?}", input); + assert_eq!(remaining, ": the rest"); + } + + #[test] + fn with_leading_return() { + let result = pattern_at_beginning_of_line("\nBEGIN: the rest"); + assert!(result.is_ok()); + let (remaining, _) = result.unwrap(); + assert_eq!(remaining, ": the rest"); + } + + #[test] + fn with_multiple_leading_return() { + let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest"); + assert!(result.is_ok()); + let (remaining, _) = result.unwrap(); + assert_eq!(remaining, ": the rest"); + } + + #[test] + fn with_space_leading_return() { + let result = pattern_at_beginning_of_line(" \nBEGIN: the rest"); + assert!(result.is_err()); + } +} + +