FEAT: Reference parser is now working.
It's probably not the fastest thing in the world, but it's going to be enough for now.
This commit is contained in:
parent
1c0f3abd6c
commit
4e04bb47d5
|
@ -15,6 +15,7 @@ friendly_id = "0.3.0"
|
||||||
thiserror = "1.0.20"
|
thiserror = "1.0.20"
|
||||||
derive_builder = "0.9.0"
|
derive_builder = "0.9.0"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
|
comrak = "0.8.2"
|
||||||
regex = "1.3.9"
|
regex = "1.3.9"
|
||||||
slug = "0.1.4"
|
slug = "0.1.4"
|
||||||
tokio = { version = "0.2.22", features = ["rt-threaded", "blocking"] }
|
tokio = { version = "0.2.22", features = ["rt-threaded", "blocking"] }
|
||||||
|
|
|
@ -2,6 +2,7 @@ mod errors;
|
||||||
mod row_structs;
|
mod row_structs;
|
||||||
mod store;
|
mod store;
|
||||||
mod structs;
|
mod structs;
|
||||||
|
mod reference_parser;
|
||||||
|
|
||||||
pub use crate::errors::NoteStoreError;
|
pub use crate::errors::NoteStoreError;
|
||||||
pub use crate::store::NoteStore;
|
pub use crate::store::NoteStore;
|
||||||
|
|
|
@ -0,0 +1,119 @@
|
||||||
|
use comrak::nodes::{AstNode, NodeValue};
|
||||||
|
use comrak::{parse_document, Arena, ComrakOptions};
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use regex::bytes::Regex as BytesRegex;
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
pub struct Finder(pub Vec<String>);
|
||||||
|
|
||||||
|
impl Finder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Finder(Vec::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn iter_nodes<'a, F>(&mut self, node: &'a AstNode<'a>, f: &F)
|
||||||
|
where
|
||||||
|
F: Fn(&'a AstNode<'a>) -> Option<Vec<String>>,
|
||||||
|
{
|
||||||
|
if let Some(mut v) = f(node) {
|
||||||
|
self.0.append(&mut v);
|
||||||
|
}
|
||||||
|
for c in node.children() {
|
||||||
|
self.iter_nodes(c, f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Given a content block, return a list of all the page references found
|
||||||
|
/// within the block. The references may need further massaging.
|
||||||
|
pub(crate) fn find_links(document: &str) -> Vec<String> {
|
||||||
|
let arena = Arena::new();
|
||||||
|
let mut finder = Finder::new();
|
||||||
|
let root = parse_document(&arena, document, &ComrakOptions::default());
|
||||||
|
|
||||||
|
finder.iter_nodes(root, &|node| {
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE_REFERENCES: BytesRegex = BytesRegex::new(r"(\[\[([^\]]+)\]\]|(\#[\w\-]+))").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
match &node.data.borrow().value {
|
||||||
|
&NodeValue::Text(ref text) => Some(
|
||||||
|
RE_REFERENCES
|
||||||
|
.captures_iter(text)
|
||||||
|
.map(|t| String::from_utf8_lossy(&t.get(1).unwrap().as_bytes()).to_string())
|
||||||
|
.collect(),
|
||||||
|
),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
finder.0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn recase(title: &str) -> String {
|
||||||
|
lazy_static! {
|
||||||
|
static ref RE_PASS1: Regex = Regex::new(r"(?P<s>.)(?P<n>[A-Z][a-z]+)").unwrap();
|
||||||
|
static ref RE_PASS2: Regex = Regex::new(r"(?P<s>[[:lower:]]|\d)(?P<n>[[:upper:]])").unwrap();
|
||||||
|
static ref RE_PASS4: Regex = Regex::new(r"(?P<s>[a-z])(?P<n>\d)").unwrap();
|
||||||
|
static ref RE_PASS3: Regex = Regex::new(r"(_|-| )+").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// This should panic if misused, so... :-)
|
||||||
|
let pass = title.to_string();
|
||||||
|
let pass = pass.strip_prefix("#").unwrap();
|
||||||
|
|
||||||
|
let pass = RE_PASS1.replace_all(&pass, "$s $n");
|
||||||
|
let pass = RE_PASS4.replace_all(&pass, "$s $n");
|
||||||
|
let pass = RE_PASS2.replace_all(&pass, "$s $n");
|
||||||
|
RE_PASS3.replace_all(&pass, " ").to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_page_titles(references: &Vec<String>) -> Vec<String> {
|
||||||
|
references
|
||||||
|
.iter()
|
||||||
|
.map(|s| {
|
||||||
|
let c = s.chars().nth(0);
|
||||||
|
match c {
|
||||||
|
Some('#') => recase(s),
|
||||||
|
Some('[') => s.strip_prefix("[[").unwrap().strip_suffix("]]").unwrap().to_string(),
|
||||||
|
Some(_) => s.clone(),
|
||||||
|
_ => "".to_string(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn finds_expected() {
|
||||||
|
let sample = r###"
|
||||||
|
# Header
|
||||||
|
- NotATest 1
|
||||||
|
- [[Test 2]]
|
||||||
|
- #Test3
|
||||||
|
- #TestFourAndSo
|
||||||
|
- #Test-six-is-six
|
||||||
|
- #
|
||||||
|
- [[]]
|
||||||
|
|
||||||
|
But *[[Test Seven]]* isn't. And *#Test_Eight____is_Messed-up*
|
||||||
|
And [[Test Bite Me]] is the worst.
|
||||||
|
Right? [[
|
||||||
|
]]
|
||||||
|
"###;
|
||||||
|
let res = build_page_titles(&find_links(sample));
|
||||||
|
let expected = vec![
|
||||||
|
"Test 2",
|
||||||
|
"Test 3",
|
||||||
|
"Test Four And So",
|
||||||
|
"Test six is six",
|
||||||
|
"Test Seven",
|
||||||
|
"Test Eight is Messed up",
|
||||||
|
"Test Bite Me",
|
||||||
|
];
|
||||||
|
assert!(res.iter().eq(expected.iter()));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue