From 4e04bb47d53970dc0f7c8763d5f5c6ec45306d49 Mon Sep 17 00:00:00 2001 From: "Elf M. Sternberg" Date: Tue, 13 Oct 2020 17:23:24 -0700 Subject: [PATCH] FEAT: Reference parser is now working. It's probably not the fastest thing in the world, but it's going to be enough for now. --- server/nm-store/Cargo.toml | 1 + server/nm-store/src/lib.rs | 1 + server/nm-store/src/reference_parser.rs | 119 ++++++++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 server/nm-store/src/reference_parser.rs diff --git a/server/nm-store/Cargo.toml b/server/nm-store/Cargo.toml index d0d9960..795fa77 100644 --- a/server/nm-store/Cargo.toml +++ b/server/nm-store/Cargo.toml @@ -15,6 +15,7 @@ friendly_id = "0.3.0" thiserror = "1.0.20" derive_builder = "0.9.0" lazy_static = "1.4.0" +comrak = "0.8.2" regex = "1.3.9" slug = "0.1.4" tokio = { version = "0.2.22", features = ["rt-threaded", "blocking"] } diff --git a/server/nm-store/src/lib.rs b/server/nm-store/src/lib.rs index 0fa19c4..0ce5da3 100644 --- a/server/nm-store/src/lib.rs +++ b/server/nm-store/src/lib.rs @@ -2,6 +2,7 @@ mod errors; mod row_structs; mod store; mod structs; +mod reference_parser; pub use crate::errors::NoteStoreError; pub use crate::store::NoteStore; diff --git a/server/nm-store/src/reference_parser.rs b/server/nm-store/src/reference_parser.rs new file mode 100644 index 0000000..0dbd19b --- /dev/null +++ b/server/nm-store/src/reference_parser.rs @@ -0,0 +1,119 @@ +use comrak::nodes::{AstNode, NodeValue}; +use comrak::{parse_document, Arena, ComrakOptions}; +use lazy_static::lazy_static; +use regex::bytes::Regex as BytesRegex; +use regex::Regex; + +pub struct Finder(pub Vec); + +impl Finder { + pub fn new() -> Self { + Finder(Vec::new()) + } + + fn iter_nodes<'a, F>(&mut self, node: &'a AstNode<'a>, f: &F) + where + F: Fn(&'a AstNode<'a>) -> Option>, + { + if let Some(mut v) = f(node) { + self.0.append(&mut v); + } + for c in node.children() { + self.iter_nodes(c, f); + } + } +} + +/// Given a content block, return a list of all the page references found +/// within the block. The references may need further massaging. +pub(crate) fn find_links(document: &str) -> Vec { + let arena = Arena::new(); + let mut finder = Finder::new(); + let root = parse_document(&arena, document, &ComrakOptions::default()); + + finder.iter_nodes(root, &|node| { + lazy_static! { + static ref RE_REFERENCES: BytesRegex = BytesRegex::new(r"(\[\[([^\]]+)\]\]|(\#[\w\-]+))").unwrap(); + } + + match &node.data.borrow().value { + &NodeValue::Text(ref text) => Some( + RE_REFERENCES + .captures_iter(text) + .map(|t| String::from_utf8_lossy(&t.get(1).unwrap().as_bytes()).to_string()) + .collect(), + ), + _ => None, + } + }); + + finder.0 +} + +fn recase(title: &str) -> String { + lazy_static! { + static ref RE_PASS1: Regex = Regex::new(r"(?P.)(?P[A-Z][a-z]+)").unwrap(); + static ref RE_PASS2: Regex = Regex::new(r"(?P[[:lower:]]|\d)(?P[[:upper:]])").unwrap(); + static ref RE_PASS4: Regex = Regex::new(r"(?P[a-z])(?P\d)").unwrap(); + static ref RE_PASS3: Regex = Regex::new(r"(_|-| )+").unwrap(); + } + + // This should panic if misused, so... :-) + let pass = title.to_string(); + let pass = pass.strip_prefix("#").unwrap(); + + let pass = RE_PASS1.replace_all(&pass, "$s $n"); + let pass = RE_PASS4.replace_all(&pass, "$s $n"); + let pass = RE_PASS2.replace_all(&pass, "$s $n"); + RE_PASS3.replace_all(&pass, " ").to_string() +} + +fn build_page_titles(references: &Vec) -> Vec { + references + .iter() + .map(|s| { + let c = s.chars().nth(0); + match c { + Some('#') => recase(s), + Some('[') => s.strip_prefix("[[").unwrap().strip_suffix("]]").unwrap().to_string(), + Some(_) => s.clone(), + _ => "".to_string(), + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn finds_expected() { + let sample = r###" +# Header +- NotATest 1 +- [[Test 2]] +- #Test3 +- #TestFourAndSo +- #Test-six-is-six +- # +- [[]] + +But *[[Test Seven]]* isn't. And *#Test_Eight____is_Messed-up* +And [[Test Bite Me]] is the worst. +Right? [[ +]] +"###; + let res = build_page_titles(&find_links(sample)); + let expected = vec![ + "Test 2", + "Test 3", + "Test Four And So", + "Test six is six", + "Test Seven", + "Test Eight is Messed up", + "Test Bite Me", + ]; + assert!(res.iter().eq(expected.iter())); + } +}