From 0f9cf7057a8a955fb17868089720af2d72701a8b Mon Sep 17 00:00:00 2001 From: ef3d0c3e Date: Wed, 30 Oct 2024 11:17:35 +0100 Subject: [PATCH] Refactor TOC --- docs/blocks/code.nml | 2 +- docs/template.nml | 2 + src/compiler/compiler.rs | 93 ------------- src/elements/media.rs | 49 ++++--- src/elements/mod.rs | 1 + src/elements/style.rs | 2 +- src/elements/toc.rs | 294 +++++++++++++++++++++++++++++++++++++++ src/lsp/semantic.rs | 8 ++ src/parser/rule.rs | 1 + 9 files changed, 341 insertions(+), 111 deletions(-) create mode 100644 src/elements/toc.rs diff --git a/docs/blocks/code.nml b/docs/blocks/code.nml index 372fdab..9b3c132 100644 --- a/docs/blocks/code.nml +++ b/docs/blocks/code.nml @@ -1,6 +1,6 @@ @import ../template.nml @nav.previous = Code -%% +%% # Blockquotes diff --git a/docs/template.nml b/docs/template.nml index bb19ec4..7c541ec 100644 --- a/docs/template.nml +++ b/docs/template.nml @@ -30,3 +30,5 @@ end "link_pos": "Before", "link": ["", "🔗 ", " "] } + +#+TABLE_OF_CONTENT Table of Content diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index fcdcd5a..e778c78 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -9,8 +9,6 @@ use crate::document::document::CrossReference; use crate::document::document::Document; use crate::document::document::ElemReference; use crate::document::variable::Variable; -use crate::elements::section::section_kind; -use crate::elements::section::Section; use super::postprocess::PostProcess; @@ -209,7 +207,6 @@ impl<'a> Compiler<'a> { } result += r#"
"#; - // TODO: TOC // TODO: Author, Date, Title, Div } Target::LATEX => {} @@ -228,93 +225,6 @@ impl<'a> Compiler<'a> { result } - pub fn toc(&self, document: &dyn Document) -> String { - let toc_title = if let Some(title) = document.get_variable("toc.title") { - title - } else { - return String::new(); - }; - - let mut result = String::new(); - let mut sections: Vec<(&Section, usize)> = vec![]; - // Find last section with given depth - fn last_matching(depth: usize, sections: &Vec<(&Section, usize)>) -> Option { - for (idx, (section, _number)) in sections.iter().rev().enumerate() { - if section.depth < depth { - return None; - } else if section.depth == depth { - return Some(sections.len() - idx - 1); - } - } - - None - } - let content_borrow = document.content().borrow(); - for elem in content_borrow.iter() { - if let Some(section) = elem.downcast_ref::
() { - if section.kind & section_kind::NO_TOC != 0 { - continue; - } - let last = last_matching(section.depth, §ions); - if let Some(last) = last { - if sections[last].0.kind & section_kind::NO_NUMBER != 0 { - sections.push((section, sections[last].1)); - } else { - sections.push((section, sections[last].1 + 1)) - } - } else { - sections.push((section, 1)); - } - } - } - - match self.target() { - Target::HTML => { - let match_depth = |current: usize, target: usize| -> String { - let mut result = String::new(); - for _ in current..target { - result += "
    "; - } - for _ in target..current { - result += "
"; - } - result - }; - result += "
"; - result += format!( - "{}", - Compiler::sanitize(self.target(), toc_title.to_string()) - ) - .as_str(); - let mut current_depth = 0; - for (section, number) in sections { - result += match_depth(current_depth, section.depth).as_str(); - if section.kind & section_kind::NO_NUMBER != 0 { - result += format!( - "
  • {}
  • ", - Compiler::refname(self.target(), section.title.as_str()), - Compiler::sanitize(self.target(), section.title.as_str()) - ) - .as_str(); - } else { - result += format!( - "
  • {}
  • ", - Compiler::refname(self.target(), section.title.as_str()), - Compiler::sanitize(self.target(), section.title.as_str()) - ) - .as_str(); - } - - current_depth = section.depth; - } - match_depth(current_depth, 0); - result += "
    "; - } - _ => todo!(""), - } - result - } - pub fn compile(&self, document: &dyn Document) -> (CompiledDocument, PostProcess) { let borrow = document.content().borrow(); @@ -324,9 +234,6 @@ impl<'a> Compiler<'a> { // Body let mut body = r#"
    "#.to_string(); - // Table of content - body += self.toc(document).as_str(); - for i in 0..borrow.len() { let elem = &borrow[i]; diff --git a/src/elements/media.rs b/src/elements/media.rs index c46dc9f..32c1795 100644 --- a/src/elements/media.rs +++ b/src/elements/media.rs @@ -1,5 +1,4 @@ use std::collections::HashMap; -use std::rc::Rc; use std::str::FromStr; use ariadne::Fmt; @@ -24,9 +23,7 @@ use crate::parser::parser::ParserState; use crate::parser::reports::macros::*; use crate::parser::reports::*; use crate::parser::rule::RegexRule; -use crate::parser::source::Source; use crate::parser::source::Token; -use crate::parser::source::VirtualSource; use crate::parser::util; use crate::parser::util::parse_paragraph; use crate::parser::util::Property; @@ -451,30 +448,48 @@ impl RegexRule for MediaRule { .ok() .map(|(_, value)| value); - if let Some((sems, tokens)) = - Semantics::from_source(token.source(), &state.shared.lsp) - { - sems.add(matches.get(0).unwrap().start()..matches.get(0).unwrap().start()+1, tokens.media_sep); + if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.lsp) { + sems.add( + matches.get(0).unwrap().start()..matches.get(0).unwrap().start() + 1, + tokens.media_sep, + ); // Refname - sems.add(matches.get(0).unwrap().start()+1..matches.get(0).unwrap().start()+2, tokens.media_refname_sep); + sems.add( + matches.get(0).unwrap().start() + 1..matches.get(0).unwrap().start() + 2, + tokens.media_refname_sep, + ); sems.add(matches.get(1).unwrap().range(), tokens.media_refname); - sems.add(matches.get(1).unwrap().end()..matches.get(1).unwrap().end()+1, tokens.media_refname_sep); + sems.add( + matches.get(1).unwrap().end()..matches.get(1).unwrap().end() + 1, + tokens.media_refname_sep, + ); // Uri - sems.add(matches.get(2).unwrap().start()-1..matches.get(2).unwrap().start(), tokens.media_uri_sep); + sems.add( + matches.get(2).unwrap().start() - 1..matches.get(2).unwrap().start(), + tokens.media_uri_sep, + ); sems.add(matches.get(2).unwrap().range(), tokens.media_uri); - sems.add(matches.get(2).unwrap().end()..matches.get(2).unwrap().end()+1, tokens.media_uri_sep); + sems.add( + matches.get(2).unwrap().end()..matches.get(2).unwrap().end() + 1, + tokens.media_uri_sep, + ); // Props - if let Some(props) = matches.get(3) - { - sems.add(props.start()-1..props.start(), tokens.media_props_sep); + if let Some(props) = matches.get(3) { + sems.add(props.start() - 1..props.start(), tokens.media_props_sep); sems.add(props.range(), tokens.media_props); - sems.add(props.end()..props.end()+1, tokens.media_props_sep); + sems.add(props.end()..props.end() + 1, tokens.media_props_sep); } } let description = match matches.get(4) { Some(content) => { - let source = escape_source(token.source(), content.range(), format!("Media[{refname}] description"), '\\', "\n"); + let source = escape_source( + token.source(), + content.range(), + format!("Media[{refname}] description"), + '\\', + "\n", + ); if source.content().is_empty() { None } else { @@ -536,6 +551,8 @@ impl RegexRule for MediaRule { #[cfg(test)] mod tests { + use std::rc::Rc; + use crate::parser::langparser::LangParser; use crate::parser::parser::Parser; use crate::parser::source::SourceFile; diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 0493e9a..25f106d 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -18,3 +18,4 @@ pub mod style; pub mod tex; pub mod text; pub mod variable; +pub mod toc; diff --git a/src/elements/style.rs b/src/elements/style.rs index 101df6f..0adc8ce 100644 --- a/src/elements/style.rs +++ b/src/elements/style.rs @@ -151,7 +151,7 @@ static STATE_NAME: &str = "elements.style"; impl RegexRule for StyleRule { fn name(&self) -> &'static str { "Style" } - fn previous(&self) -> Option<&'static str> { Some("Layout") } + fn previous(&self) -> Option<&'static str> { Some("Toc") } fn regexes(&self) -> &[regex::Regex] { &self.re } diff --git a/src/elements/toc.rs b/src/elements/toc.rs new file mode 100644 index 0000000..4ff7f32 --- /dev/null +++ b/src/elements/toc.rs @@ -0,0 +1,294 @@ +use regex::Captures; +use regex::Regex; +use regex::RegexBuilder; + +use crate::compiler::compiler::Compiler; +use crate::compiler::compiler::Target; +use crate::document::document::Document; +use crate::document::element::ElemKind; +use crate::document::element::Element; +use crate::elements::section::section_kind; +use crate::elements::section::Section; +use crate::lsp::semantic::Semantics; +use crate::lua::kernel::CTX; +use crate::parser::parser::ParseMode; +use crate::parser::parser::ParserState; +use crate::parser::reports::Report; +use crate::parser::rule::RegexRule; +use crate::parser::source::Token; + +#[derive(Debug)] +struct Toc { + pub(self) location: Token, + pub(self) title: Option, +} + +impl Element for Toc { + fn location(&self) -> &Token { &self.location } + fn kind(&self) -> ElemKind { ElemKind::Block } + fn element_name(&self) -> &'static str { "Toc" } + fn compile( + &self, + compiler: &Compiler, + document: &dyn Document, + _cursor: usize, + ) -> Result { + let mut result = String::new(); + let mut sections: Vec<(&Section, usize)> = vec![]; + // Find last section with given depth + fn last_matching(depth: usize, sections: &Vec<(&Section, usize)>) -> Option { + for (idx, (section, _number)) in sections.iter().rev().enumerate() { + if section.depth < depth { + return None; + } else if section.depth == depth { + return Some(sections.len() - idx - 1); + } + } + + None + } + let content_borrow = document.content().borrow(); + for elem in content_borrow.iter() { + if let Some(section) = elem.downcast_ref::
    () { + if section.kind & section_kind::NO_TOC != 0 { + continue; + } + let last = last_matching(section.depth, §ions); + if let Some(last) = last { + if sections[last].0.kind & section_kind::NO_NUMBER != 0 { + sections.push((section, sections[last].1)); + } else { + sections.push((section, sections[last].1 + 1)) + } + } else { + sections.push((section, 1)); + } + } + } + + match compiler.target() { + Target::HTML => { + let match_depth = |current: usize, target: usize| -> String { + let mut result = String::new(); + for _ in current..target { + result += "
      "; + } + for _ in target..current { + result += "
    "; + } + result + }; + result += "
    "; + result += format!( + "{}", + Compiler::sanitize( + compiler.target(), + self.title.as_ref().unwrap_or(&String::new()) + ) + ) + .as_str(); + let mut current_depth = 0; + for (section, number) in sections { + result += match_depth(current_depth, section.depth).as_str(); + if section.kind & section_kind::NO_NUMBER != 0 { + result += format!( + "
  • {}
  • ", + Compiler::refname(compiler.target(), section.title.as_str()), + Compiler::sanitize(compiler.target(), section.title.as_str()) + ) + .as_str(); + } else { + result += format!( + "
  • {}
  • ", + Compiler::refname(compiler.target(), section.title.as_str()), + Compiler::sanitize(compiler.target(), section.title.as_str()) + ) + .as_str(); + } + + current_depth = section.depth; + } + match_depth(current_depth, 0); + result += "
    "; + } + _ => todo!(""), + } + Ok(result) + } +} + +#[auto_registry::auto_registry(registry = "rules", path = "crate::elements::toc")] +pub struct TocRule { + re: [Regex; 1], +} + +impl TocRule { + pub fn new() -> Self { + Self { + re: [ + RegexBuilder::new(r"(?:^|\n)(?:[^\S\n]*)#\+TABLE_OF_CONTENT(.*)") + .multi_line(true) + .build() + .unwrap(), + ], + } + } +} + +impl RegexRule for TocRule { + fn name(&self) -> &'static str { "Toc" } + + fn previous(&self) -> Option<&'static str> { Some("Layout") } + + fn regexes(&self) -> &[regex::Regex] { &self.re } + + fn enabled(&self, mode: &ParseMode, _id: usize) -> bool { !mode.paragraph_only } + + fn on_regex_match( + &self, + _index: usize, + state: &ParserState, + document: &dyn Document, + token: Token, + matches: Captures, + ) -> Vec { + let mut reports = vec![]; + + let name = matches.get(1).unwrap().as_str().trim_start().trim_end(); + + state.push( + document, + Box::new(Toc { + location: token.clone(), + title: (!name.is_empty()).then_some(name.to_string()), + }), + ); + + if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.lsp) { + let start = matches + .get(0) + .map(|m| m.start() + token.source().content()[m.start()..].find('#').unwrap()) + .unwrap(); + sems.add(start..start + 2, tokens.toc_sep); + sems.add( + start + 2..start + 2 + "TABLE_OF_CONTENT".len(), + tokens.toc_token, + ); + sems.add(matches.get(1).unwrap().range(), tokens.toc_title); + } + + reports + } + + fn register_bindings<'lua>(&self, lua: &'lua mlua::Lua) -> Vec<(String, mlua::Function<'lua>)> { + let mut bindings = vec![]; + bindings.push(( + "push".to_string(), + lua.create_function(|_, title: Option| { + CTX.with_borrow(|ctx| { + ctx.as_ref().map(|ctx| { + ctx.state.push( + ctx.document, + Box::new(Toc { + location: ctx.location.clone(), + title, + }), + ) + }); + }); + Ok(()) + }) + .unwrap(), + )); + bindings + } +} + +#[cfg(test)] +mod tests { + use std::rc::Rc; + + use crate::parser::langparser::LangParser; + use crate::parser::parser::Parser; + use crate::parser::source::SourceFile; + use crate::validate_document; + use crate::validate_semantics; + + use super::*; + + #[test] + fn parser() { + let source = Rc::new(SourceFile::with_content( + "".to_string(), + r#" +#+TABLE_OF_CONTENT TOC +# Section1 +## SubSection +"# + .to_string(), + None, + )); + let parser = LangParser::default(); + let (doc, _) = parser.parse( + ParserState::new(&parser, None), + source, + None, + ParseMode::default(), + ); + + validate_document!(doc.content().borrow(), 0, + Toc { title == Some("TOC".to_string()) }; + Section; + Section; + ); + } + + #[test] + fn lua() { + let source = Rc::new(SourceFile::with_content( + "".to_string(), + r#" +%% +%% +"# + .to_string(), + None, + )); + let parser = LangParser::default(); + let (doc, _) = parser.parse( + ParserState::new(&parser, None), + source, + None, + ParseMode::default(), + ); + + validate_document!(doc.content().borrow(), 0, + Toc { title == Some("TOC".to_string()) }; + Toc { title == Option::::None }; + ); + } + + #[test] + fn semantic() { + let source = Rc::new(SourceFile::with_content( + "".to_string(), + r#" +#+TABLE_OF_CONTENT TOC + "# + .to_string(), + None, + )); + let parser = LangParser::default(); + let (_, state) = parser.parse( + ParserState::new_with_semantics(&parser, None), + source.clone(), + None, + ParseMode::default(), + ); + validate_semantics!(state, source.clone(), 0, + toc_sep { delta_line == 1, delta_start == 0, length == 2 }; + toc_token { delta_line == 0, delta_start == 2, length == 16 }; + toc_title { delta_line == 0, delta_start == 16, length == 4 }; + ); + } +} diff --git a/src/lsp/semantic.rs b/src/lsp/semantic.rs index ea28258..e61e9dc 100644 --- a/src/lsp/semantic.rs +++ b/src/lsp/semantic.rs @@ -175,6 +175,10 @@ pub struct Tokens { pub layout_props: (u32, u32), pub layout_type: (u32, u32), + pub toc_sep: (u32, u32), + pub toc_token: (u32, u32), + pub toc_title: (u32, u32), + pub media_sep: (u32, u32), pub media_refname_sep: (u32, u32), pub media_refname: (u32, u32), @@ -271,6 +275,10 @@ impl Tokens { layout_props: token!("enum"), layout_type: token!("function"), + toc_sep: token!("number"), + toc_token: token!("number"), + toc_title: token!("function"), + media_sep: token!("macro"), media_refname_sep: token!("macro"), media_refname: token!("enum"), diff --git a/src/parser/rule.rs b/src/parser/rule.rs index 60e2c54..7fbd2a3 100644 --- a/src/parser/rule.rs +++ b/src/parser/rule.rs @@ -224,6 +224,7 @@ mod tests { "Graphviz", "Media", "Layout", + "Toc", "Style", "Custom Style", "Section",