Committing and storing the work.
This commit is contained in:
		
						commit
						c66875e367
					
				|  | @ -0,0 +1 @@ | ||||||
|  | /target | ||||||
|  | @ -0,0 +1,32 @@ | ||||||
|  | # This file is automatically @generated by Cargo. | ||||||
|  | # It is not intended for manual editing. | ||||||
|  | version = 3 | ||||||
|  | 
 | ||||||
|  | [[package]] | ||||||
|  | name = "check-bol" | ||||||
|  | version = "0.1.0" | ||||||
|  | dependencies = [ | ||||||
|  |  "nom", | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | [[package]] | ||||||
|  | name = "memchr" | ||||||
|  | version = "2.7.2" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" | ||||||
|  | 
 | ||||||
|  | [[package]] | ||||||
|  | name = "minimal-lexical" | ||||||
|  | version = "0.2.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" | ||||||
|  | 
 | ||||||
|  | [[package]] | ||||||
|  | name = "nom" | ||||||
|  | version = "7.1.3" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" | ||||||
|  | dependencies = [ | ||||||
|  |  "memchr", | ||||||
|  |  "minimal-lexical", | ||||||
|  | ] | ||||||
|  | @ -0,0 +1,9 @@ | ||||||
|  | [package] | ||||||
|  | name = "check-bol" | ||||||
|  | version = "0.1.0" | ||||||
|  | edition = "2021" | ||||||
|  | 
 | ||||||
|  | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||||||
|  | 
 | ||||||
|  | [dependencies] | ||||||
|  | nom = "7.1.3" | ||||||
|  | @ -0,0 +1,21 @@ | ||||||
|  | MIT License | ||||||
|  | 
 | ||||||
|  | Copyright (c) 2023 Elf M. Sternberg <elf.sternberg@gmail.com> | ||||||
|  | 
 | ||||||
|  | Permission is hereby granted, free of charge, to any person obtaining a copy | ||||||
|  | of this software and associated documentation files (the "Software"), to deal | ||||||
|  | in the Software without restriction, including without limitation the rights | ||||||
|  | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||||
|  | copies of the Software, and to permit persons to whom the Software is | ||||||
|  | furnished to do so, subject to the following conditions: | ||||||
|  | 
 | ||||||
|  | The above copyright notice and this permission notice shall be included in all | ||||||
|  | copies or substantial portions of the Software. | ||||||
|  | 
 | ||||||
|  | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||||
|  | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||||
|  | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||||
|  | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||||
|  | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||||
|  | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||||
|  | SOFTWARE. | ||||||
|  | @ -0,0 +1,12 @@ | ||||||
|  | # Rust beginning of line match with NOM | ||||||
|  | 
 | ||||||
|  | This is an experiment to demonstrate that it is possible to detect "the beginning of a line" using | ||||||
|  | Nom in Rust, although it's a bit harder than it looks. Essentially we can only match "content that | ||||||
|  | includes a beginning-of-line marker," which could be another beginning-of-line marker or any legal | ||||||
|  | Nom parser. | ||||||
|  | 
 | ||||||
|  | It's just an experiment, meant to expand my knowledge.  Not a big deal. | ||||||
|  | 
 | ||||||
|  | # LICENSE | ||||||
|  | 
 | ||||||
|  | [MIT License](./LICENSE.md) | ||||||
|  | @ -0,0 +1,151 @@ | ||||||
|  | use nom::{ | ||||||
|  |     bytes::complete::{tag, take_while}, | ||||||
|  |     combinator::recognize, | ||||||
|  |     sequence::{pair, preceded}, | ||||||
|  |     IResult, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Using Rust Nom, show how to detect "content that begins at the start of a line." This particular | ||||||
|  |  * variant rolls forward until it finds any content *other than* the start of a line, which is | ||||||
|  |  * defined as "the input token after any \n". | ||||||
|  |  * | ||||||
|  |  */ | ||||||
|  | fn is_beginning_of_line(input: &str) -> IResult<&str, &str> { | ||||||
|  |     if input.is_empty() { | ||||||
|  |         // It took me an absolutely ridiculous amount of time to find a simple "how do you construct
 | ||||||
|  |         // a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
 | ||||||
|  |         // (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered
 | ||||||
|  |         // to answer that question, and being the *first* one with an example that wasn't "How to
 | ||||||
|  |         // write a custom Nom error" or "How to handle errors in Nom."
 | ||||||
|  |         return Err(nom::Err::Error(nom::error::Error::new( | ||||||
|  |             input, | ||||||
|  |             nom::error::ErrorKind::Eof, | ||||||
|  |         ))); | ||||||
|  |     } | ||||||
|  |     
 | ||||||
|  |     let (remaining, recognized) = recognize(preceded( | ||||||
|  |         take_while(|c| c == '\n'), | ||||||
|  |         // The empty string tag always succeeds without consuming any of the input. So we're
 | ||||||
|  |         // skipping the start-of-line markers before "trivially" succeeding, always successfully
 | ||||||
|  |         // checking the empty token, so we don't actually consume the next token.
 | ||||||
|  |         tag(""), | ||||||
|  |     ))(input)?; | ||||||
|  | 
 | ||||||
|  |     // Remaining is now pointing to the first token *after* the line feed, which could be literally
 | ||||||
|  |     // where the input started, or remaining is where the input needs to be next *after* the input
 | ||||||
|  |     // started with "\n". (I.e, we rolled forward one or more "\n"'s, but didn't meet anything else
 | ||||||
|  |     // so we can't be anywhere but at column 0).
 | ||||||
|  |     if std::ptr::eq(input, remaining) || input.starts_with('\n') { | ||||||
|  |         Ok((remaining, recognized)) | ||||||
|  |     } else { | ||||||
|  |         // I wasn't really sure what error to return here. This is one of those parsers you'll
 | ||||||
|  |         // probably use in an alt or something and we're consuming a hazy concept of end-of-line
 | ||||||
|  |         // until we're sure we're at the beginning of a line. "Tag" was the closest thing that
 | ||||||
|  |         // seemed right.
 | ||||||
|  |         Err(nom::Err::Error(nom::error::Error::new( | ||||||
|  |             input, | ||||||
|  |             nom::error::ErrorKind::Tag, | ||||||
|  |         ))) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // And this is how you use it; you're looking for *content* at the *start* of the line, not the
 | ||||||
|  | // start itself.
 | ||||||
|  | fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> { | ||||||
|  |     preceded( | ||||||
|  |         is_beginning_of_line, | ||||||
|  |         tag("BEGIN"), | ||||||
|  |     )(input) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | fn pattern_at_beginning_after_blank_line(input: &str) -> IResult<&str, &str> { | ||||||
|  |     let (remaining, (recognized, rest)) = pair( | ||||||
|  |         is_beginning_of_line, | ||||||
|  |         tag("BEGIN"))(input)?; | ||||||
|  |     if recognized.chars().filter(|c| *c == '\n').count() < 2 { | ||||||
|  |         Err(nom::Err::Error(nom::error::Error::new( | ||||||
|  |             input, | ||||||
|  |             nom::error::ErrorKind::Tag, | ||||||
|  |         ))) | ||||||
|  |     } else { | ||||||
|  |         Ok((remaining, rest)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | fn main() { | ||||||
|  |     let input = "\nBEGINThis is where your input will be next."; | ||||||
|  |     // Note that `pattern_at_beginning_of_line` matches the parser *after* `is_beginning_of_line`.
 | ||||||
|  |     // Using `preceded` (above) throws out all the line feeds.
 | ||||||
|  |     match pattern_at_beginning_of_line(input) { | ||||||
|  |         Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining), | ||||||
|  |         Err(err) => println!("Error: {:?}", err), | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #[cfg(test)] | ||||||
|  | mod tests { | ||||||
|  |     use super::*; | ||||||
|  | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn predicate_empty_in_not_sol() { | ||||||
|  |         let result = is_beginning_of_line(""); | ||||||
|  |         assert!(result.is_err()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn predicate_sol() { | ||||||
|  |         let result = is_beginning_of_line("\n"); | ||||||
|  |         assert!(result.is_ok()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn predicate_not_sol() { | ||||||
|  |         let result = is_beginning_of_line("TEST"); | ||||||
|  |         assert!(result.is_ok()); | ||||||
|  |         let (remaining, _) = result.unwrap(); | ||||||
|  |         assert_eq!(remaining, "TEST"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn predicate_more_than_sol() { | ||||||
|  |         let result = is_beginning_of_line("\nTEST"); | ||||||
|  |         assert!(result.is_ok()); | ||||||
|  |         let (remaining, _) = result.unwrap(); | ||||||
|  |         assert_eq!(remaining, "TEST"); | ||||||
|  |     } | ||||||
|  |     
 | ||||||
|  |     #[test] | ||||||
|  |     fn sample_test() { | ||||||
|  |         let result = pattern_at_beginning_of_line("BEGIN: the rest"); | ||||||
|  |         assert!(result.is_ok()); | ||||||
|  |         let (remaining, input) = result.unwrap(); | ||||||
|  |         println!("{:?}", input); | ||||||
|  |         assert_eq!(remaining, ": the rest"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn with_leading_return() { | ||||||
|  |         let result = pattern_at_beginning_of_line("\nBEGIN: the rest"); | ||||||
|  |         assert!(result.is_ok()); | ||||||
|  |         let (remaining, _) = result.unwrap(); | ||||||
|  |         assert_eq!(remaining, ": the rest"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn with_multiple_leading_return() { | ||||||
|  |         let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest"); | ||||||
|  |         assert!(result.is_ok()); | ||||||
|  |         let (remaining, _) = result.unwrap(); | ||||||
|  |         assert_eq!(remaining, ": the rest"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn with_space_leading_return() { | ||||||
|  |         let result = pattern_at_beginning_of_line(" \nBEGIN: the rest"); | ||||||
|  |         assert!(result.is_err()); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  |     
 | ||||||
		Loading…
	
		Reference in New Issue