120 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Rust
		
	
	
	
			
		
		
	
	
			120 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Rust
		
	
	
	
| use comrak::nodes::{AstNode, NodeValue};
 | |
| use comrak::{parse_document, Arena, ComrakOptions};
 | |
| use lazy_static::lazy_static;
 | |
| use regex::bytes::Regex as BytesRegex;
 | |
| use regex::Regex;
 | |
| 
 | |
| pub struct Finder(pub Vec<String>);
 | |
| 
 | |
| impl Finder {
 | |
|     pub fn new() -> Self {
 | |
|         Finder(Vec::new())
 | |
|     }
 | |
| 
 | |
|     fn iter_nodes<'a, F>(&mut self, node: &'a AstNode<'a>, f: &F)
 | |
|     where
 | |
|         F: Fn(&'a AstNode<'a>) -> Option<Vec<String>>,
 | |
|     {
 | |
|         if let Some(mut v) = f(node) {
 | |
|             self.0.append(&mut v);
 | |
|         }
 | |
|         for c in node.children() {
 | |
|             self.iter_nodes(c, f);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| /// Given a content block, return a list of all the page references found
 | |
| /// within the block.  The references may need further massaging.
 | |
| pub(crate) fn find_links(document: &str) -> Vec<String> {
 | |
|     let arena = Arena::new();
 | |
|     let mut finder = Finder::new();
 | |
|     let root = parse_document(&arena, document, &ComrakOptions::default());
 | |
| 
 | |
|     finder.iter_nodes(root, &|node| {
 | |
|         lazy_static! {
 | |
|             static ref RE_REFERENCES: BytesRegex = BytesRegex::new(r"(\[\[([^\]]+)\]\]|(\#[\w\-]+))").unwrap();
 | |
|         }
 | |
| 
 | |
|         match &node.data.borrow().value {
 | |
|             &NodeValue::Text(ref text) => Some(
 | |
|                 RE_REFERENCES
 | |
|                     .captures_iter(text)
 | |
|                     .map(|t| String::from_utf8_lossy(&t.get(1).unwrap().as_bytes()).to_string())
 | |
|                     .collect(),
 | |
|             ),
 | |
|             _ => None,
 | |
|         }
 | |
|     });
 | |
| 
 | |
|     finder.0
 | |
| }
 | |
| 
 | |
| fn recase(title: &str) -> String {
 | |
|     lazy_static! {
 | |
|         static ref RE_PASS1: Regex = Regex::new(r"(?P<s>.)(?P<n>[A-Z][a-z]+)").unwrap();
 | |
|         static ref RE_PASS2: Regex = Regex::new(r"(?P<s>[[:lower:]]|\d)(?P<n>[[:upper:]])").unwrap();
 | |
|         static ref RE_PASS4: Regex = Regex::new(r"(?P<s>[a-z])(?P<n>\d)").unwrap();
 | |
|         static ref RE_PASS3: Regex = Regex::new(r"(_|-| )+").unwrap();
 | |
|     }
 | |
| 
 | |
|     // This should panic if misused, so... :-)
 | |
|     let pass = title.to_string();
 | |
|     let pass = pass.strip_prefix("#").unwrap();
 | |
| 
 | |
|     let pass = RE_PASS1.replace_all(&pass, "$s $n");
 | |
|     let pass = RE_PASS4.replace_all(&pass, "$s $n");
 | |
|     let pass = RE_PASS2.replace_all(&pass, "$s $n");
 | |
|     RE_PASS3.replace_all(&pass, " ").to_string()
 | |
| }
 | |
| 
 | |
| fn build_page_titles(references: &Vec<String>) -> Vec<String> {
 | |
|     references
 | |
|         .iter()
 | |
|         .map(|s| {
 | |
|             let c = s.chars().nth(0);
 | |
|             match c {
 | |
|                 Some('#') => recase(s),
 | |
|                 Some('[') => s.strip_prefix("[[").unwrap().strip_suffix("]]").unwrap().to_string(),
 | |
|                 Some(_) => s.clone(),
 | |
|                 _ => "".to_string(),
 | |
|             }
 | |
|         })
 | |
|         .collect()
 | |
| }
 | |
| 
 | |
| #[cfg(test)]
 | |
| mod tests {
 | |
|     use super::*;
 | |
| 
 | |
|     #[test]
 | |
|     fn finds_expected() {
 | |
|         let sample = r###"
 | |
| # Header
 | |
| - NotATest 1
 | |
| - [[Test 2]]
 | |
| - #Test3
 | |
| - #TestFourAndSo
 | |
| - #Test-six-is-six
 | |
| - #
 | |
| - [[]]
 | |
| 
 | |
| But *[[Test Seven]]* isn't.  And *#Test_Eight____is_Messed-up*
 | |
| And [[Test Bite Me]] is the worst.
 | |
| Right? [[
 | |
| ]]
 | |
| "###;
 | |
|         let res = build_page_titles(&find_links(sample));
 | |
|         let expected = vec![
 | |
|             "Test 2",
 | |
|             "Test 3",
 | |
|             "Test Four And So",
 | |
|             "Test six is six",
 | |
|             "Test Seven",
 | |
|             "Test Eight is Messed up",
 | |
|             "Test Bite Me",
 | |
|         ];
 | |
|         assert!(res.iter().eq(expected.iter()));
 | |
|     }
 | |
| }
 |