Committing and storing the work.
This commit is contained in:
commit
c66875e367
|
@ -0,0 +1 @@
|
|||
/target
|
|
@ -0,0 +1,32 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "check-bol"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
]
|
|
@ -0,0 +1,9 @@
|
|||
[package]
|
||||
name = "check-bol"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
nom = "7.1.3"
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2023 Elf M. Sternberg <elf.sternberg@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,12 @@
|
|||
# Rust beginning of line match with NOM
|
||||
|
||||
This is an experiment to demonstrate that it is possible to detect "the beginning of a line" using
|
||||
Nom in Rust, although it's a bit harder than it looks. Essentially we can only match "content that
|
||||
includes a beginning-of-line marker," which could be another beginning-of-line marker or any legal
|
||||
Nom parser.
|
||||
|
||||
It's just an experiment, meant to expand my knowledge. Not a big deal.
|
||||
|
||||
# LICENSE
|
||||
|
||||
[MIT License](./LICENSE.md)
|
|
@ -0,0 +1,151 @@
|
|||
use nom::{
|
||||
bytes::complete::{tag, take_while},
|
||||
combinator::recognize,
|
||||
sequence::{pair, preceded},
|
||||
IResult,
|
||||
};
|
||||
|
||||
/**
|
||||
* Using Rust Nom, show how to detect "content that begins at the start of a line." This particular
|
||||
* variant rolls forward until it finds any content *other than* the start of a line, which is
|
||||
* defined as "the input token after any \n".
|
||||
*
|
||||
*/
|
||||
fn is_beginning_of_line(input: &str) -> IResult<&str, &str> {
|
||||
if input.is_empty() {
|
||||
// It took me an absolutely ridiculous amount of time to find a simple "how do you construct
|
||||
// a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
|
||||
// (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered
|
||||
// to answer that question, and being the *first* one with an example that wasn't "How to
|
||||
// write a custom Nom error" or "How to handle errors in Nom."
|
||||
return Err(nom::Err::Error(nom::error::Error::new(
|
||||
input,
|
||||
nom::error::ErrorKind::Eof,
|
||||
)));
|
||||
}
|
||||
|
||||
let (remaining, recognized) = recognize(preceded(
|
||||
take_while(|c| c == '\n'),
|
||||
// The empty string tag always succeeds without consuming any of the input. So we're
|
||||
// skipping the start-of-line markers before "trivially" succeeding, always successfully
|
||||
// checking the empty token, so we don't actually consume the next token.
|
||||
tag(""),
|
||||
))(input)?;
|
||||
|
||||
// Remaining is now pointing to the first token *after* the line feed, which could be literally
|
||||
// where the input started, or remaining is where the input needs to be next *after* the input
|
||||
// started with "\n". (I.e, we rolled forward one or more "\n"'s, but didn't meet anything else
|
||||
// so we can't be anywhere but at column 0).
|
||||
if std::ptr::eq(input, remaining) || input.starts_with('\n') {
|
||||
Ok((remaining, recognized))
|
||||
} else {
|
||||
// I wasn't really sure what error to return here. This is one of those parsers you'll
|
||||
// probably use in an alt or something and we're consuming a hazy concept of end-of-line
|
||||
// until we're sure we're at the beginning of a line. "Tag" was the closest thing that
|
||||
// seemed right.
|
||||
Err(nom::Err::Error(nom::error::Error::new(
|
||||
input,
|
||||
nom::error::ErrorKind::Tag,
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
// And this is how you use it; you're looking for *content* at the *start* of the line, not the
|
||||
// start itself.
|
||||
fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> {
|
||||
preceded(
|
||||
is_beginning_of_line,
|
||||
tag("BEGIN"),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn pattern_at_beginning_after_blank_line(input: &str) -> IResult<&str, &str> {
|
||||
let (remaining, (recognized, rest)) = pair(
|
||||
is_beginning_of_line,
|
||||
tag("BEGIN"))(input)?;
|
||||
if recognized.chars().filter(|c| *c == '\n').count() < 2 {
|
||||
Err(nom::Err::Error(nom::error::Error::new(
|
||||
input,
|
||||
nom::error::ErrorKind::Tag,
|
||||
)))
|
||||
} else {
|
||||
Ok((remaining, rest))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn main() {
|
||||
let input = "\nBEGINThis is where your input will be next.";
|
||||
// Note that `pattern_at_beginning_of_line` matches the parser *after* `is_beginning_of_line`.
|
||||
// Using `preceded` (above) throws out all the line feeds.
|
||||
match pattern_at_beginning_of_line(input) {
|
||||
Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining),
|
||||
Err(err) => println!("Error: {:?}", err),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn predicate_empty_in_not_sol() {
|
||||
let result = is_beginning_of_line("");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_sol() {
|
||||
let result = is_beginning_of_line("\n");
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_not_sol() {
|
||||
let result = is_beginning_of_line("TEST");
|
||||
assert!(result.is_ok());
|
||||
let (remaining, _) = result.unwrap();
|
||||
assert_eq!(remaining, "TEST");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_more_than_sol() {
|
||||
let result = is_beginning_of_line("\nTEST");
|
||||
assert!(result.is_ok());
|
||||
let (remaining, _) = result.unwrap();
|
||||
assert_eq!(remaining, "TEST");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sample_test() {
|
||||
let result = pattern_at_beginning_of_line("BEGIN: the rest");
|
||||
assert!(result.is_ok());
|
||||
let (remaining, input) = result.unwrap();
|
||||
println!("{:?}", input);
|
||||
assert_eq!(remaining, ": the rest");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_leading_return() {
|
||||
let result = pattern_at_beginning_of_line("\nBEGIN: the rest");
|
||||
assert!(result.is_ok());
|
||||
let (remaining, _) = result.unwrap();
|
||||
assert_eq!(remaining, ": the rest");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_multiple_leading_return() {
|
||||
let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest");
|
||||
assert!(result.is_ok());
|
||||
let (remaining, _) = result.unwrap();
|
||||
assert_eq!(remaining, ": the rest");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_space_leading_return() {
|
||||
let result = pattern_at_beginning_of_line(" \nBEGIN: the rest");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue