FEAT: Reference parser is now working.
It's probably not the fastest thing in the world, but it's going to be enough for now.
This commit is contained in:
parent
1c0f3abd6c
commit
4e04bb47d5
|
@ -15,6 +15,7 @@ friendly_id = "0.3.0"
|
|||
thiserror = "1.0.20"
|
||||
derive_builder = "0.9.0"
|
||||
lazy_static = "1.4.0"
|
||||
comrak = "0.8.2"
|
||||
regex = "1.3.9"
|
||||
slug = "0.1.4"
|
||||
tokio = { version = "0.2.22", features = ["rt-threaded", "blocking"] }
|
||||
|
|
|
@ -2,6 +2,7 @@ mod errors;
|
|||
mod row_structs;
|
||||
mod store;
|
||||
mod structs;
|
||||
mod reference_parser;
|
||||
|
||||
pub use crate::errors::NoteStoreError;
|
||||
pub use crate::store::NoteStore;
|
||||
|
|
|
@ -0,0 +1,119 @@
|
|||
use comrak::nodes::{AstNode, NodeValue};
|
||||
use comrak::{parse_document, Arena, ComrakOptions};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::bytes::Regex as BytesRegex;
|
||||
use regex::Regex;
|
||||
|
||||
pub struct Finder(pub Vec<String>);
|
||||
|
||||
impl Finder {
|
||||
pub fn new() -> Self {
|
||||
Finder(Vec::new())
|
||||
}
|
||||
|
||||
fn iter_nodes<'a, F>(&mut self, node: &'a AstNode<'a>, f: &F)
|
||||
where
|
||||
F: Fn(&'a AstNode<'a>) -> Option<Vec<String>>,
|
||||
{
|
||||
if let Some(mut v) = f(node) {
|
||||
self.0.append(&mut v);
|
||||
}
|
||||
for c in node.children() {
|
||||
self.iter_nodes(c, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a content block, return a list of all the page references found
|
||||
/// within the block. The references may need further massaging.
|
||||
pub(crate) fn find_links(document: &str) -> Vec<String> {
|
||||
let arena = Arena::new();
|
||||
let mut finder = Finder::new();
|
||||
let root = parse_document(&arena, document, &ComrakOptions::default());
|
||||
|
||||
finder.iter_nodes(root, &|node| {
|
||||
lazy_static! {
|
||||
static ref RE_REFERENCES: BytesRegex = BytesRegex::new(r"(\[\[([^\]]+)\]\]|(\#[\w\-]+))").unwrap();
|
||||
}
|
||||
|
||||
match &node.data.borrow().value {
|
||||
&NodeValue::Text(ref text) => Some(
|
||||
RE_REFERENCES
|
||||
.captures_iter(text)
|
||||
.map(|t| String::from_utf8_lossy(&t.get(1).unwrap().as_bytes()).to_string())
|
||||
.collect(),
|
||||
),
|
||||
_ => None,
|
||||
}
|
||||
});
|
||||
|
||||
finder.0
|
||||
}
|
||||
|
||||
fn recase(title: &str) -> String {
|
||||
lazy_static! {
|
||||
static ref RE_PASS1: Regex = Regex::new(r"(?P<s>.)(?P<n>[A-Z][a-z]+)").unwrap();
|
||||
static ref RE_PASS2: Regex = Regex::new(r"(?P<s>[[:lower:]]|\d)(?P<n>[[:upper:]])").unwrap();
|
||||
static ref RE_PASS4: Regex = Regex::new(r"(?P<s>[a-z])(?P<n>\d)").unwrap();
|
||||
static ref RE_PASS3: Regex = Regex::new(r"(_|-| )+").unwrap();
|
||||
}
|
||||
|
||||
// This should panic if misused, so... :-)
|
||||
let pass = title.to_string();
|
||||
let pass = pass.strip_prefix("#").unwrap();
|
||||
|
||||
let pass = RE_PASS1.replace_all(&pass, "$s $n");
|
||||
let pass = RE_PASS4.replace_all(&pass, "$s $n");
|
||||
let pass = RE_PASS2.replace_all(&pass, "$s $n");
|
||||
RE_PASS3.replace_all(&pass, " ").to_string()
|
||||
}
|
||||
|
||||
fn build_page_titles(references: &Vec<String>) -> Vec<String> {
|
||||
references
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let c = s.chars().nth(0);
|
||||
match c {
|
||||
Some('#') => recase(s),
|
||||
Some('[') => s.strip_prefix("[[").unwrap().strip_suffix("]]").unwrap().to_string(),
|
||||
Some(_) => s.clone(),
|
||||
_ => "".to_string(),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn finds_expected() {
|
||||
let sample = r###"
|
||||
# Header
|
||||
- NotATest 1
|
||||
- [[Test 2]]
|
||||
- #Test3
|
||||
- #TestFourAndSo
|
||||
- #Test-six-is-six
|
||||
- #
|
||||
- [[]]
|
||||
|
||||
But *[[Test Seven]]* isn't. And *#Test_Eight____is_Messed-up*
|
||||
And [[Test Bite Me]] is the worst.
|
||||
Right? [[
|
||||
]]
|
||||
"###;
|
||||
let res = build_page_titles(&find_links(sample));
|
||||
let expected = vec![
|
||||
"Test 2",
|
||||
"Test 3",
|
||||
"Test Four And So",
|
||||
"Test six is six",
|
||||
"Test Seven",
|
||||
"Test Eight is Messed up",
|
||||
"Test Bite Me",
|
||||
];
|
||||
assert!(res.iter().eq(expected.iter()));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue