Committing and storing the work.

This commit is contained in:
Elf M. Sternberg 2024-11-10 13:02:44 -08:00
commit c66875e367
6 changed files with 226 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

32
Cargo.lock generated Normal file
View File

@ -0,0 +1,32 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "check-bol"
version = "0.1.0"
dependencies = [
"nom",
]
[[package]]
name = "memchr"
version = "2.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]

9
Cargo.toml Normal file
View File

@ -0,0 +1,9 @@
[package]
name = "check-bol"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
nom = "7.1.3"

21
LICENSE.md Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Elf M. Sternberg <elf.sternberg@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

12
README.md Normal file
View File

@ -0,0 +1,12 @@
# Rust beginning of line match with NOM
This is an experiment to demonstrate that it is possible to detect "the beginning of a line" using
Nom in Rust, although it's a bit harder than it looks. Essentially we can only match "content that
includes a beginning-of-line marker," which could be another beginning-of-line marker or any legal
Nom parser.
It's just an experiment, meant to expand my knowledge. Not a big deal.
# LICENSE
[MIT License](./LICENSE.md)

151
src/main.rs Normal file
View File

@ -0,0 +1,151 @@
use nom::{
bytes::complete::{tag, take_while},
combinator::recognize,
sequence::{pair, preceded},
IResult,
};
/**
* Using Rust Nom, show how to detect "content that begins at the start of a line." This particular
* variant rolls forward until it finds any content *other than* the start of a line, which is
* defined as "the input token after any \n".
*
*/
fn is_beginning_of_line(input: &str) -> IResult<&str, &str> {
if input.is_empty() {
// It took me an absolutely ridiculous amount of time to find a simple "how do you construct
// a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
// (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered
// to answer that question, and being the *first* one with an example that wasn't "How to
// write a custom Nom error" or "How to handle errors in Nom."
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Eof,
)));
}
let (remaining, recognized) = recognize(preceded(
take_while(|c| c == '\n'),
// The empty string tag always succeeds without consuming any of the input. So we're
// skipping the start-of-line markers before "trivially" succeeding, always successfully
// checking the empty token, so we don't actually consume the next token.
tag(""),
))(input)?;
// Remaining is now pointing to the first token *after* the line feed, which could be literally
// where the input started, or remaining is where the input needs to be next *after* the input
// started with "\n". (I.e, we rolled forward one or more "\n"'s, but didn't meet anything else
// so we can't be anywhere but at column 0).
if std::ptr::eq(input, remaining) || input.starts_with('\n') {
Ok((remaining, recognized))
} else {
// I wasn't really sure what error to return here. This is one of those parsers you'll
// probably use in an alt or something and we're consuming a hazy concept of end-of-line
// until we're sure we're at the beginning of a line. "Tag" was the closest thing that
// seemed right.
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)))
}
}
// And this is how you use it; you're looking for *content* at the *start* of the line, not the
// start itself.
fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> {
preceded(
is_beginning_of_line,
tag("BEGIN"),
)(input)
}
fn pattern_at_beginning_after_blank_line(input: &str) -> IResult<&str, &str> {
let (remaining, (recognized, rest)) = pair(
is_beginning_of_line,
tag("BEGIN"))(input)?;
if recognized.chars().filter(|c| *c == '\n').count() < 2 {
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)))
} else {
Ok((remaining, rest))
}
}
fn main() {
let input = "\nBEGINThis is where your input will be next.";
// Note that `pattern_at_beginning_of_line` matches the parser *after* `is_beginning_of_line`.
// Using `preceded` (above) throws out all the line feeds.
match pattern_at_beginning_of_line(input) {
Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining),
Err(err) => println!("Error: {:?}", err),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn predicate_empty_in_not_sol() {
let result = is_beginning_of_line("");
assert!(result.is_err());
}
#[test]
fn predicate_sol() {
let result = is_beginning_of_line("\n");
assert!(result.is_ok());
}
#[test]
fn predicate_not_sol() {
let result = is_beginning_of_line("TEST");
assert!(result.is_ok());
let (remaining, _) = result.unwrap();
assert_eq!(remaining, "TEST");
}
#[test]
fn predicate_more_than_sol() {
let result = is_beginning_of_line("\nTEST");
assert!(result.is_ok());
let (remaining, _) = result.unwrap();
assert_eq!(remaining, "TEST");
}
#[test]
fn sample_test() {
let result = pattern_at_beginning_of_line("BEGIN: the rest");
assert!(result.is_ok());
let (remaining, input) = result.unwrap();
println!("{:?}", input);
assert_eq!(remaining, ": the rest");
}
#[test]
fn with_leading_return() {
let result = pattern_at_beginning_of_line("\nBEGIN: the rest");
assert!(result.is_ok());
let (remaining, _) = result.unwrap();
assert_eq!(remaining, ": the rest");
}
#[test]
fn with_multiple_leading_return() {
let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest");
assert!(result.is_ok());
let (remaining, _) = result.unwrap();
assert_eq!(remaining, ": the rest");
}
#[test]
fn with_space_leading_return() {
let result = pattern_at_beginning_of_line(" \nBEGIN: the rest");
assert!(result.is_err());
}
}