154 lines
4.5 KiB
Rust
154 lines
4.5 KiB
Rust
use comrak::nodes::{AstNode, NodeValue};
|
|
use comrak::{parse_document, Arena, ComrakOptions};
|
|
use lazy_static::lazy_static;
|
|
use regex::bytes::Regex as BytesRegex;
|
|
use regex::Regex;
|
|
|
|
struct Finder(pub Vec<String>);
|
|
|
|
impl Finder {
|
|
pub fn new() -> Self {
|
|
Finder(Vec::new())
|
|
}
|
|
|
|
fn iter_nodes<'a, F>(&mut self, node: &'a AstNode<'a>, f: &F)
|
|
where
|
|
F: Fn(&'a AstNode<'a>) -> Option<Vec<String>>,
|
|
{
|
|
if let Some(mut v) = f(node) {
|
|
self.0.append(&mut v);
|
|
}
|
|
for c in node.children() {
|
|
self.iter_nodes(c, f);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(super) fn find_links(document: &str) -> Vec<String> {
|
|
let arena = Arena::new();
|
|
let mut finder = Finder::new();
|
|
let root = parse_document(&arena, document, &ComrakOptions::default());
|
|
|
|
finder.iter_nodes(root, &|node| {
|
|
lazy_static! {
|
|
static ref RE_REFERENCES: BytesRegex = BytesRegex::new(r"(\[\[([^\]]+)\]\]|(\#[:\w\-]+))").unwrap();
|
|
}
|
|
|
|
match &node.data.borrow().value {
|
|
NodeValue::Text(ref text) => Some(
|
|
RE_REFERENCES
|
|
.captures_iter(text)
|
|
.filter_map(|t| t.get(1))
|
|
.map(|t| String::from_utf8_lossy(t.as_bytes()).to_string())
|
|
.filter(|s| !s.is_empty())
|
|
.collect(),
|
|
),
|
|
_ => None,
|
|
}
|
|
});
|
|
|
|
finder.0
|
|
}
|
|
|
|
// This function is for the camel and snake case handers.
|
|
fn recase(title: &str) -> String {
|
|
lazy_static! {
|
|
// Take every word that has a pattern of a capital letter
|
|
// followed by a lower case, and put a space between the
|
|
// capital and anything that preceeds it.
|
|
|
|
// TODO: Make Unicode aware.
|
|
static ref RE_PASS1: Regex = Regex::new(r"(?P<s>.)(?P<n>[A-Z][a-z]+)").unwrap();
|
|
|
|
// Take every instance of a lower case letter or number,
|
|
// followed by a capital letter, and put a space between them.
|
|
|
|
// TODO: Make Unicode aware. [[:lower:]] is an ASCII-ism.
|
|
static ref RE_PASS2: Regex = Regex::new(r"(?P<s>[[:lower:]]|\d)(?P<n>[[:upper:]])").unwrap();
|
|
|
|
// Take every instance of a word suffixed by a number and put
|
|
// a space between them.
|
|
|
|
// TODO: Make Unicode aware. [[:lower:]] is an ASCII-ism.
|
|
static ref RE_PASS4: Regex = Regex::new(r"(?P<s>[[:lower:]])(?P<n>\d)").unwrap();
|
|
|
|
// Take every instance of the one-or-more-of the symbols listed, and
|
|
// replace them with a space. This function is Unicode-irrelevant,
|
|
// although there is a list of symbols in the backreference parser
|
|
// that may disagree.
|
|
|
|
// TODO: Examime backreference parser and determine if this is
|
|
// sufficient.
|
|
static ref RE_PASS3: Regex = Regex::new(r"(:|_|-| )+").unwrap();
|
|
}
|
|
|
|
// This should panic if misused, so... :-)
|
|
let pass = title.to_string();
|
|
let pass = pass.strip_prefix("#").unwrap();
|
|
let pass = RE_PASS1.replace_all(&pass, "$s $n");
|
|
let pass = RE_PASS4.replace_all(&pass, "$s $n");
|
|
let pass = RE_PASS2.replace_all(&pass, "$s $n");
|
|
RE_PASS3.replace_all(&pass, " ").trim().to_string()
|
|
}
|
|
|
|
pub(super) fn build_page_titles(references: &[String]) -> Vec<String> {
|
|
references
|
|
.iter()
|
|
.filter_map(|s| match s.chars().next() {
|
|
Some('#') => Some(recase(s)),
|
|
Some('[') => Some(s.strip_prefix("[[").unwrap().strip_suffix("]]").unwrap().to_string()),
|
|
Some(_) => Some(s.clone()),
|
|
_ => None,
|
|
})
|
|
.filter(|s| !s.is_empty())
|
|
.collect()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn finds_expected() {
|
|
let sample = r###"
|
|
# Header
|
|
- NotATest 1
|
|
- [[Test 2]]
|
|
- #Test3
|
|
- #TestFourAndSo
|
|
- #Test-six-is-six
|
|
- #recipe:entree
|
|
- #
|
|
- #-_-
|
|
- #--Prefixed
|
|
- [[]]
|
|
|
|
But *[[Test Seven]]* isn't. And *#Test_Eight____is_Messed-up*
|
|
And [[Test Bite Me]] is the worst.
|
|
Right? [[
|
|
]]
|
|
"###;
|
|
let res = build_page_titles(&find_links(sample));
|
|
let expected = vec![
|
|
"Test 2",
|
|
"Test 3",
|
|
"Test Four And So",
|
|
"Test six is six",
|
|
"recipe entree",
|
|
"Prefixed",
|
|
"Test Seven",
|
|
"Test Eight is Messed up",
|
|
"Test Bite Me",
|
|
];
|
|
assert!(res.iter().eq(expected.iter()), "{:?}", res);
|
|
}
|
|
|
|
#[test]
|
|
fn doesnt_crash_on_empty() {
|
|
let sample = "";
|
|
let res = build_page_titles(&find_links(sample));
|
|
let expected: Vec<String> = vec![];
|
|
assert!(res.iter().eq(expected.iter()), "{:?}", res);
|
|
}
|
|
}
|