FEAT Parsing the content for references

This features all of the reference types that I commonly use,
including the ORG mode [[Title]], #CamelCase, #lisp-case, and #colon:case.
There are still edge cases around capitalization and the mixing of symbols
and numbers, and I'll have to hack on those until I'm satisfied.
This commit is contained in:
Elf M. Sternberg 2020-10-13 18:01:20 -07:00
parent 4e04bb47d5
commit 417320b27c
1 changed files with 10 additions and 4 deletions

View File

@ -33,7 +33,7 @@ pub(crate) fn find_links(document: &str) -> Vec<String> {
finder.iter_nodes(root, &|node| { finder.iter_nodes(root, &|node| {
lazy_static! { lazy_static! {
static ref RE_REFERENCES: BytesRegex = BytesRegex::new(r"(\[\[([^\]]+)\]\]|(\#[\w\-]+))").unwrap(); static ref RE_REFERENCES: BytesRegex = BytesRegex::new(r"(\[\[([^\]]+)\]\]|(\#[:\w\-]+))").unwrap();
} }
match &node.data.borrow().value { match &node.data.borrow().value {
@ -55,7 +55,7 @@ fn recase(title: &str) -> String {
static ref RE_PASS1: Regex = Regex::new(r"(?P<s>.)(?P<n>[A-Z][a-z]+)").unwrap(); static ref RE_PASS1: Regex = Regex::new(r"(?P<s>.)(?P<n>[A-Z][a-z]+)").unwrap();
static ref RE_PASS2: Regex = Regex::new(r"(?P<s>[[:lower:]]|\d)(?P<n>[[:upper:]])").unwrap(); static ref RE_PASS2: Regex = Regex::new(r"(?P<s>[[:lower:]]|\d)(?P<n>[[:upper:]])").unwrap();
static ref RE_PASS4: Regex = Regex::new(r"(?P<s>[a-z])(?P<n>\d)").unwrap(); static ref RE_PASS4: Regex = Regex::new(r"(?P<s>[a-z])(?P<n>\d)").unwrap();
static ref RE_PASS3: Regex = Regex::new(r"(_|-| )+").unwrap(); static ref RE_PASS3: Regex = Regex::new(r"(:|_|-| )+").unwrap();
} }
// This should panic if misused, so... :-) // This should panic if misused, so... :-)
@ -65,7 +65,7 @@ fn recase(title: &str) -> String {
let pass = RE_PASS1.replace_all(&pass, "$s $n"); let pass = RE_PASS1.replace_all(&pass, "$s $n");
let pass = RE_PASS4.replace_all(&pass, "$s $n"); let pass = RE_PASS4.replace_all(&pass, "$s $n");
let pass = RE_PASS2.replace_all(&pass, "$s $n"); let pass = RE_PASS2.replace_all(&pass, "$s $n");
RE_PASS3.replace_all(&pass, " ").to_string() RE_PASS3.replace_all(&pass, " ").trim().to_string()
} }
fn build_page_titles(references: &Vec<String>) -> Vec<String> { fn build_page_titles(references: &Vec<String>) -> Vec<String> {
@ -80,6 +80,7 @@ fn build_page_titles(references: &Vec<String>) -> Vec<String> {
_ => "".to_string(), _ => "".to_string(),
} }
}) })
.filter(|s| s.len() > 0)
.collect() .collect()
} }
@ -96,7 +97,10 @@ mod tests {
- #Test3 - #Test3
- #TestFourAndSo - #TestFourAndSo
- #Test-six-is-six - #Test-six-is-six
- #recipe:entree
- # - #
- #-_-
- #--Prefixed
- [[]] - [[]]
But *[[Test Seven]]* isn't. And *#Test_Eight____is_Messed-up* But *[[Test Seven]]* isn't. And *#Test_Eight____is_Messed-up*
@ -110,10 +114,12 @@ Right? [[
"Test 3", "Test 3",
"Test Four And So", "Test Four And So",
"Test six is six", "Test six is six",
"recipe entree",
"Prefixed",
"Test Seven", "Test Seven",
"Test Eight is Messed up", "Test Eight is Messed up",
"Test Bite Me", "Test Bite Me",
]; ];
assert!(res.iter().eq(expected.iter())); assert!(res.iter().eq(expected.iter()), "{:?}", res);
} }
} }