Committing and storing the work.
This commit is contained in:
		
						commit
						c66875e367
					
				|  | @ -0,0 +1 @@ | |||
| /target | ||||
|  | @ -0,0 +1,32 @@ | |||
| # This file is automatically @generated by Cargo. | ||||
| # It is not intended for manual editing. | ||||
| version = 3 | ||||
| 
 | ||||
| [[package]] | ||||
| name = "check-bol" | ||||
| version = "0.1.0" | ||||
| dependencies = [ | ||||
|  "nom", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "memchr" | ||||
| version = "2.7.2" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "minimal-lexical" | ||||
| version = "0.2.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "nom" | ||||
| version = "7.1.3" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" | ||||
| dependencies = [ | ||||
|  "memchr", | ||||
|  "minimal-lexical", | ||||
| ] | ||||
|  | @ -0,0 +1,9 @@ | |||
| [package] | ||||
| name = "check-bol" | ||||
| version = "0.1.0" | ||||
| edition = "2021" | ||||
| 
 | ||||
| # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||||
| 
 | ||||
| [dependencies] | ||||
| nom = "7.1.3" | ||||
|  | @ -0,0 +1,21 @@ | |||
| MIT License | ||||
| 
 | ||||
| Copyright (c) 2023 Elf M. Sternberg <elf.sternberg@gmail.com> | ||||
| 
 | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
| of this software and associated documentation files (the "Software"), to deal | ||||
| in the Software without restriction, including without limitation the rights | ||||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| copies of the Software, and to permit persons to whom the Software is | ||||
| furnished to do so, subject to the following conditions: | ||||
| 
 | ||||
| The above copyright notice and this permission notice shall be included in all | ||||
| copies or substantial portions of the Software. | ||||
| 
 | ||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
| SOFTWARE. | ||||
|  | @ -0,0 +1,12 @@ | |||
| # Rust beginning of line match with NOM | ||||
| 
 | ||||
| This is an experiment to demonstrate that it is possible to detect "the beginning of a line" using | ||||
| Nom in Rust, although it's a bit harder than it looks. Essentially we can only match "content that | ||||
| includes a beginning-of-line marker," which could be another beginning-of-line marker or any legal | ||||
| Nom parser. | ||||
| 
 | ||||
| It's just an experiment, meant to expand my knowledge.  Not a big deal. | ||||
| 
 | ||||
| # LICENSE | ||||
| 
 | ||||
| [MIT License](./LICENSE.md) | ||||
|  | @ -0,0 +1,151 @@ | |||
| use nom::{ | ||||
|     bytes::complete::{tag, take_while}, | ||||
|     combinator::recognize, | ||||
|     sequence::{pair, preceded}, | ||||
|     IResult, | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  * Using Rust Nom, show how to detect "content that begins at the start of a line." This particular | ||||
|  * variant rolls forward until it finds any content *other than* the start of a line, which is | ||||
|  * defined as "the input token after any \n". | ||||
|  * | ||||
|  */ | ||||
| fn is_beginning_of_line(input: &str) -> IResult<&str, &str> { | ||||
|     if input.is_empty() { | ||||
|         // It took me an absolutely ridiculous amount of time to find a simple "how do you construct
 | ||||
|         // a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
 | ||||
|         // (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered
 | ||||
|         // to answer that question, and being the *first* one with an example that wasn't "How to
 | ||||
|         // write a custom Nom error" or "How to handle errors in Nom."
 | ||||
|         return Err(nom::Err::Error(nom::error::Error::new( | ||||
|             input, | ||||
|             nom::error::ErrorKind::Eof, | ||||
|         ))); | ||||
|     } | ||||
|     
 | ||||
|     let (remaining, recognized) = recognize(preceded( | ||||
|         take_while(|c| c == '\n'), | ||||
|         // The empty string tag always succeeds without consuming any of the input. So we're
 | ||||
|         // skipping the start-of-line markers before "trivially" succeeding, always successfully
 | ||||
|         // checking the empty token, so we don't actually consume the next token.
 | ||||
|         tag(""), | ||||
|     ))(input)?; | ||||
| 
 | ||||
|     // Remaining is now pointing to the first token *after* the line feed, which could be literally
 | ||||
|     // where the input started, or remaining is where the input needs to be next *after* the input
 | ||||
|     // started with "\n". (I.e, we rolled forward one or more "\n"'s, but didn't meet anything else
 | ||||
|     // so we can't be anywhere but at column 0).
 | ||||
|     if std::ptr::eq(input, remaining) || input.starts_with('\n') { | ||||
|         Ok((remaining, recognized)) | ||||
|     } else { | ||||
|         // I wasn't really sure what error to return here. This is one of those parsers you'll
 | ||||
|         // probably use in an alt or something and we're consuming a hazy concept of end-of-line
 | ||||
|         // until we're sure we're at the beginning of a line. "Tag" was the closest thing that
 | ||||
|         // seemed right.
 | ||||
|         Err(nom::Err::Error(nom::error::Error::new( | ||||
|             input, | ||||
|             nom::error::ErrorKind::Tag, | ||||
|         ))) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // And this is how you use it; you're looking for *content* at the *start* of the line, not the
 | ||||
| // start itself.
 | ||||
| fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> { | ||||
|     preceded( | ||||
|         is_beginning_of_line, | ||||
|         tag("BEGIN"), | ||||
|     )(input) | ||||
| } | ||||
| 
 | ||||
| fn pattern_at_beginning_after_blank_line(input: &str) -> IResult<&str, &str> { | ||||
|     let (remaining, (recognized, rest)) = pair( | ||||
|         is_beginning_of_line, | ||||
|         tag("BEGIN"))(input)?; | ||||
|     if recognized.chars().filter(|c| *c == '\n').count() < 2 { | ||||
|         Err(nom::Err::Error(nom::error::Error::new( | ||||
|             input, | ||||
|             nom::error::ErrorKind::Tag, | ||||
|         ))) | ||||
|     } else { | ||||
|         Ok((remaining, rest)) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| fn main() { | ||||
|     let input = "\nBEGINThis is where your input will be next."; | ||||
|     // Note that `pattern_at_beginning_of_line` matches the parser *after* `is_beginning_of_line`.
 | ||||
|     // Using `preceded` (above) throws out all the line feeds.
 | ||||
|     match pattern_at_beginning_of_line(input) { | ||||
|         Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining), | ||||
|         Err(err) => println!("Error: {:?}", err), | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
| 
 | ||||
|     #[test] | ||||
|     fn predicate_empty_in_not_sol() { | ||||
|         let result = is_beginning_of_line(""); | ||||
|         assert!(result.is_err()); | ||||
|     } | ||||
| 
 | ||||
|     #[test] | ||||
|     fn predicate_sol() { | ||||
|         let result = is_beginning_of_line("\n"); | ||||
|         assert!(result.is_ok()); | ||||
|     } | ||||
| 
 | ||||
|     #[test] | ||||
|     fn predicate_not_sol() { | ||||
|         let result = is_beginning_of_line("TEST"); | ||||
|         assert!(result.is_ok()); | ||||
|         let (remaining, _) = result.unwrap(); | ||||
|         assert_eq!(remaining, "TEST"); | ||||
|     } | ||||
| 
 | ||||
|     #[test] | ||||
|     fn predicate_more_than_sol() { | ||||
|         let result = is_beginning_of_line("\nTEST"); | ||||
|         assert!(result.is_ok()); | ||||
|         let (remaining, _) = result.unwrap(); | ||||
|         assert_eq!(remaining, "TEST"); | ||||
|     } | ||||
|     
 | ||||
|     #[test] | ||||
|     fn sample_test() { | ||||
|         let result = pattern_at_beginning_of_line("BEGIN: the rest"); | ||||
|         assert!(result.is_ok()); | ||||
|         let (remaining, input) = result.unwrap(); | ||||
|         println!("{:?}", input); | ||||
|         assert_eq!(remaining, ": the rest"); | ||||
|     } | ||||
| 
 | ||||
|     #[test] | ||||
|     fn with_leading_return() { | ||||
|         let result = pattern_at_beginning_of_line("\nBEGIN: the rest"); | ||||
|         assert!(result.is_ok()); | ||||
|         let (remaining, _) = result.unwrap(); | ||||
|         assert_eq!(remaining, ": the rest"); | ||||
|     } | ||||
| 
 | ||||
|     #[test] | ||||
|     fn with_multiple_leading_return() { | ||||
|         let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest"); | ||||
|         assert!(result.is_ok()); | ||||
|         let (remaining, _) = result.unwrap(); | ||||
|         assert_eq!(remaining, ": the rest"); | ||||
|     } | ||||
| 
 | ||||
|     #[test] | ||||
|     fn with_space_leading_return() { | ||||
|         let result = pattern_at_beginning_of_line(" \nBEGIN: the rest"); | ||||
|         assert!(result.is_err()); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|     
 | ||||
		Loading…
	
		Reference in New Issue