Committing and storing the work.

2024-11-10 13:02:44 -08:00 · 2024-11-10 13:02:44 -08:00 · c66875e367
commit c66875e367
6 changed files with 226 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+/target
--- a/Cargo.lock
+++ b/Cargo.lock
@ -0,0 +1,32 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "check-bol"
+version = "0.1.0"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,9 @@
+[package]
+name = "check-bol"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+nom = "7.1.3"
--- a/LICENSE.md
+++ b/LICENSE.md
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Elf M. Sternberg <elf.sternberg@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,12 @@
+# Rust beginning of line match with NOM
+
+This is an experiment to demonstrate that it is possible to detect "the beginning of a line" using
+Nom in Rust, although it's a bit harder than it looks. Essentially we can only match "content that
+includes a beginning-of-line marker," which could be another beginning-of-line marker or any legal
+Nom parser.
+
+It's just an experiment, meant to expand my knowledge.  Not a big deal.
+
+# LICENSE
+
+[MIT License](./LICENSE.md)
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,151 @@
+use nom::{
+    bytes::complete::{tag, take_while},
+    combinator::recognize,
+    sequence::{pair, preceded},
+    IResult,
+};
+
+/**
+ * Using Rust Nom, show how to detect "content that begins at the start of a line." This particular
+ * variant rolls forward until it finds any content *other than* the start of a line, which is
+ * defined as "the input token after any \n".
+ *
+ */
+fn is_beginning_of_line(input: &str) -> IResult<&str, &str> {
+    if input.is_empty() {
+        // It took me an absolutely ridiculous amount of time to find a simple "how do you construct
+        // a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
+        // (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered
+        // to answer that question, and being the *first* one with an example that wasn't "How to
+        // write a custom Nom error" or "How to handle errors in Nom."
+        return Err(nom::Err::Error(nom::error::Error::new(
+            input,
+            nom::error::ErrorKind::Eof,
+        )));
+    }
+    
+    let (remaining, recognized) = recognize(preceded(
+        take_while(|c| c == '\n'),
+        // The empty string tag always succeeds without consuming any of the input. So we're
+        // skipping the start-of-line markers before "trivially" succeeding, always successfully
+        // checking the empty token, so we don't actually consume the next token.
+        tag(""),
+    ))(input)?;
+
+    // Remaining is now pointing to the first token *after* the line feed, which could be literally
+    // where the input started, or remaining is where the input needs to be next *after* the input
+    // started with "\n". (I.e, we rolled forward one or more "\n"'s, but didn't meet anything else
+    // so we can't be anywhere but at column 0).
+    if std::ptr::eq(input, remaining) || input.starts_with('\n') {
+        Ok((remaining, recognized))
+    } else {
+        // I wasn't really sure what error to return here. This is one of those parsers you'll
+        // probably use in an alt or something and we're consuming a hazy concept of end-of-line
+        // until we're sure we're at the beginning of a line. "Tag" was the closest thing that
+        // seemed right.
+        Err(nom::Err::Error(nom::error::Error::new(
+            input,
+            nom::error::ErrorKind::Tag,
+        )))
+    }
+}
+
+// And this is how you use it; you're looking for *content* at the *start* of the line, not the
+// start itself.
+fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> {
+    preceded(
+        is_beginning_of_line,
+        tag("BEGIN"),
+    )(input)
+}
+
+fn pattern_at_beginning_after_blank_line(input: &str) -> IResult<&str, &str> {
+    let (remaining, (recognized, rest)) = pair(
+        is_beginning_of_line,
+        tag("BEGIN"))(input)?;
+    if recognized.chars().filter(|c| *c == '\n').count() < 2 {
+        Err(nom::Err::Error(nom::error::Error::new(
+            input,
+            nom::error::ErrorKind::Tag,
+        )))
+    } else {
+        Ok((remaining, rest))
+    }
+}
+
+
+fn main() {
+    let input = "\nBEGINThis is where your input will be next.";
+    // Note that `pattern_at_beginning_of_line` matches the parser *after* `is_beginning_of_line`.
+    // Using `preceded` (above) throws out all the line feeds.
+    match pattern_at_beginning_of_line(input) {
+        Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining),
+        Err(err) => println!("Error: {:?}", err),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn predicate_empty_in_not_sol() {
+        let result = is_beginning_of_line("");
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn predicate_sol() {
+        let result = is_beginning_of_line("\n");
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn predicate_not_sol() {
+        let result = is_beginning_of_line("TEST");
+        assert!(result.is_ok());
+        let (remaining, _) = result.unwrap();
+        assert_eq!(remaining, "TEST");
+    }
+
+    #[test]
+    fn predicate_more_than_sol() {
+        let result = is_beginning_of_line("\nTEST");
+        assert!(result.is_ok());
+        let (remaining, _) = result.unwrap();
+        assert_eq!(remaining, "TEST");
+    }
+    
+    #[test]
+    fn sample_test() {
+        let result = pattern_at_beginning_of_line("BEGIN: the rest");
+        assert!(result.is_ok());
+        let (remaining, input) = result.unwrap();
+        println!("{:?}", input);
+        assert_eq!(remaining, ": the rest");
+    }
+
+    #[test]
+    fn with_leading_return() {
+        let result = pattern_at_beginning_of_line("\nBEGIN: the rest");
+        assert!(result.is_ok());
+        let (remaining, _) = result.unwrap();
+        assert_eq!(remaining, ": the rest");
+    }
+
+    #[test]
+    fn with_multiple_leading_return() {
+        let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest");
+        assert!(result.is_ok());
+        let (remaining, _) = result.unwrap();
+        assert_eq!(remaining, ": the rest");
+    }
+
+    #[test]
+    fn with_space_leading_return() {
+        let result = pattern_at_beginning_of_line(" \nBEGIN: the rest");
+        assert!(result.is_err());
+    }
+}
+
+