use nom::{ bytes::complete::{tag, take_while}, combinator::recognize, sequence::{pair, preceded}, IResult, }; /** * Using Rust Nom, show how to detect "content that begins at the start of a line." This particular * variant rolls forward until it finds any content *other than* the start of a line, which is * defined as "the input token after any \n". * */ fn is_beginning_of_line(input: &str) -> IResult<&str, &str> { if input.is_empty() { // It took me an absolutely ridiculous amount of time to find a simple "how do you construct // a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld) // (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered // to answer that question, and being the *first* one with an example that wasn't "How to // write a custom Nom error" or "How to handle errors in Nom." return Err(nom::Err::Error(nom::error::Error::new( input, nom::error::ErrorKind::Eof, ))); } let (remaining, recognized) = recognize(preceded( take_while(|c| c == '\n'), // The empty string tag always succeeds without consuming any of the input. So we're // skipping the start-of-line markers before "trivially" succeeding, always successfully // checking the empty token, so we don't actually consume the next token. tag(""), ))(input)?; // Remaining is now pointing to the first token *after* the line feed, which could be literally // where the input started, or remaining is where the input needs to be next *after* the input // started with "\n". (I.e, we rolled forward one or more "\n"'s, but didn't meet anything else // so we can't be anywhere but at column 0). if std::ptr::eq(input, remaining) || input.starts_with('\n') { Ok((remaining, recognized)) } else { // I wasn't really sure what error to return here. This is one of those parsers you'll // probably use in an alt or something and we're consuming a hazy concept of end-of-line // until we're sure we're at the beginning of a line. "Tag" was the closest thing that // seemed right. Err(nom::Err::Error(nom::error::Error::new( input, nom::error::ErrorKind::Tag, ))) } } // And this is how you use it; you're looking for *content* at the *start* of the line, not the // start itself. fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> { preceded( is_beginning_of_line, tag("BEGIN"), )(input) } fn pattern_at_beginning_after_blank_line(input: &str) -> IResult<&str, &str> { let (remaining, (recognized, rest)) = pair( is_beginning_of_line, tag("BEGIN"))(input)?; if recognized.chars().filter(|c| *c == '\n').count() < 2 { Err(nom::Err::Error(nom::error::Error::new( input, nom::error::ErrorKind::Tag, ))) } else { Ok((remaining, rest)) } } fn main() { let input = "\nBEGINThis is where your input will be next."; // Note that `pattern_at_beginning_of_line` matches the parser *after* `is_beginning_of_line`. // Using `preceded` (above) throws out all the line feeds. match pattern_at_beginning_of_line(input) { Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining), Err(err) => println!("Error: {:?}", err), } } #[cfg(test)] mod tests { use super::*; #[test] fn predicate_empty_in_not_sol() { let result = is_beginning_of_line(""); assert!(result.is_err()); } #[test] fn predicate_sol() { let result = is_beginning_of_line("\n"); assert!(result.is_ok()); } #[test] fn predicate_not_sol() { let result = is_beginning_of_line("TEST"); assert!(result.is_ok()); let (remaining, _) = result.unwrap(); assert_eq!(remaining, "TEST"); } #[test] fn predicate_more_than_sol() { let result = is_beginning_of_line("\nTEST"); assert!(result.is_ok()); let (remaining, _) = result.unwrap(); assert_eq!(remaining, "TEST"); } #[test] fn sample_test() { let result = pattern_at_beginning_of_line("BEGIN: the rest"); assert!(result.is_ok()); let (remaining, input) = result.unwrap(); println!("{:?}", input); assert_eq!(remaining, ": the rest"); } #[test] fn with_leading_return() { let result = pattern_at_beginning_of_line("\nBEGIN: the rest"); assert!(result.is_ok()); let (remaining, _) = result.unwrap(); assert_eq!(remaining, ": the rest"); } #[test] fn with_multiple_leading_return() { let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest"); assert!(result.is_ok()); let (remaining, _) = result.unwrap(); assert_eq!(remaining, ": the rest"); } #[test] fn with_space_leading_return() { let result = pattern_at_beginning_of_line(" \nBEGIN: the rest"); assert!(result.is_err()); } }