Committing and storing the work.

2024-11-10 13:02:44 -08:00 · 2024-11-10 13:02:44 -08:00 · c66875e367
commit c66875e367
6 changed files with 226 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 /target
--- a/Cargo.lock
+++ b/Cargo.lock
@ -0,0 +1,32 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 version = 3
 [[package]]
 name = "check-bol"
 version = "0.1.0"
 dependencies = [
 "nom",
 ]
 [[package]]
 name = "memchr"
 version = "2.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
 [[package]]
 name = "nom"
 version = "7.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
 dependencies = [
 "memchr",
 "minimal-lexical",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,9 @@
 [package]
 name = "check-bol"
 version = "0.1.0"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 nom = "7.1.3"
--- a/LICENSE.md
+++ b/LICENSE.md
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2023 Elf M. Sternberg <elf.sternberg@gmail.com>
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,12 @@
 # Rust beginning of line match with NOM
 This is an experiment to demonstrate that it is possible to detect "the beginning of a line" using
 Nom in Rust, although it's a bit harder than it looks. Essentially we can only match "content that
 includes a beginning-of-line marker," which could be another beginning-of-line marker or any legal
 Nom parser.
 It's just an experiment, meant to expand my knowledge.  Not a big deal.
 # LICENSE
 [MIT License](./LICENSE.md)
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,151 @@
 use nom::{
    bytes::complete::{tag, take_while},
    combinator::recognize,
    sequence::{pair, preceded},
    IResult,
 };
 /**
 * Using Rust Nom, show how to detect "content that begins at the start of a line." This particular
 * variant rolls forward until it finds any content *other than* the start of a line, which is
 * defined as "the input token after any \n".
 *
 */
 fn is_beginning_of_line(input: &str) -> IResult<&str, &str> {
    if input.is_empty() {
        // It took me an absolutely ridiculous amount of time to find a simple "how do you construct
        // a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
        // (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered
        // to answer that question, and being the *first* one with an example that wasn't "How to
        // write a custom Nom error" or "How to handle errors in Nom."
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Eof,
        )));
    }
    let (remaining, recognized) = recognize(preceded(
        take_while(|c| c == '\n'),
        // The empty string tag always succeeds without consuming any of the input. So we're
        // skipping the start-of-line markers before "trivially" succeeding, always successfully
        // checking the empty token, so we don't actually consume the next token.
        tag(""),
    ))(input)?;
    // Remaining is now pointing to the first token *after* the line feed, which could be literally
    // where the input started, or remaining is where the input needs to be next *after* the input
    // started with "\n". (I.e, we rolled forward one or more "\n"'s, but didn't meet anything else
    // so we can't be anywhere but at column 0).
    if std::ptr::eq(input, remaining) || input.starts_with('\n') {
        Ok((remaining, recognized))
    } else {
        // I wasn't really sure what error to return here. This is one of those parsers you'll
        // probably use in an alt or something and we're consuming a hazy concept of end-of-line
        // until we're sure we're at the beginning of a line. "Tag" was the closest thing that
        // seemed right.
        Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Tag,
        )))
    }
 }
 // And this is how you use it; you're looking for *content* at the *start* of the line, not the
 // start itself.
 fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> {
    preceded(
        is_beginning_of_line,
        tag("BEGIN"),
    )(input)
 }
 fn pattern_at_beginning_after_blank_line(input: &str) -> IResult<&str, &str> {
    let (remaining, (recognized, rest)) = pair(
        is_beginning_of_line,
        tag("BEGIN"))(input)?;
    if recognized.chars().filter(|c| *c == '\n').count() < 2 {
        Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Tag,
        )))
    } else {
        Ok((remaining, rest))
    }
 }
 fn main() {
    let input = "\nBEGINThis is where your input will be next.";
    // Note that `pattern_at_beginning_of_line` matches the parser *after* `is_beginning_of_line`.
    // Using `preceded` (above) throws out all the line feeds.
    match pattern_at_beginning_of_line(input) {
        Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining),
        Err(err) => println!("Error: {:?}", err),
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn predicate_empty_in_not_sol() {
        let result = is_beginning_of_line("");
        assert!(result.is_err());
    }
    #[test]
    fn predicate_sol() {
        let result = is_beginning_of_line("\n");
        assert!(result.is_ok());
    }
    #[test]
    fn predicate_not_sol() {
        let result = is_beginning_of_line("TEST");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, "TEST");
    }
    #[test]
    fn predicate_more_than_sol() {
        let result = is_beginning_of_line("\nTEST");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, "TEST");
    }
    #[test]
    fn sample_test() {
        let result = pattern_at_beginning_of_line("BEGIN: the rest");
        assert!(result.is_ok());
        let (remaining, input) = result.unwrap();
        println!("{:?}", input);
        assert_eq!(remaining, ": the rest");
    }
    #[test]
    fn with_leading_return() {
        let result = pattern_at_beginning_of_line("\nBEGIN: the rest");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, ": the rest");
    }
    #[test]
    fn with_multiple_leading_return() {
        let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, ": the rest");
    }
    #[test]
    fn with_space_leading_return() {
        let result = pattern_at_beginning_of_line(" \nBEGIN: the rest");
        assert!(result.is_err());
    }
 }