diff --git a/src/document/references.rs b/src/document/references.rs index 49a14b7..0a7e75e 100644 --- a/src/document/references.rs +++ b/src/document/references.rs @@ -55,7 +55,7 @@ pub mod tests { None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); assert_eq!(validate_refname(&*doc, " abc ", true), Ok("abc")); assert_eq!( diff --git a/src/document/variable.rs b/src/document/variable.rs index 2bfff30..e40172d 100644 --- a/src/document/variable.rs +++ b/src/document/variable.rs @@ -63,7 +63,9 @@ impl Variable for BaseVariable { self.to_string(), )); - state.with_state(|new_state| new_state.parser.parse_into(new_state, source, document)) + state.with_state(|new_state| { + let _ = new_state.parser.parse_into(new_state, source, document); + }); } } diff --git a/src/elements/code.rs b/src/elements/code.rs index d192605..05921c6 100644 --- a/src/elements/code.rs +++ b/src/elements/code.rs @@ -680,7 +680,7 @@ fn fact(n: usize) -> usize None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); let borrow = doc.content().borrow(); let found = borrow @@ -726,7 +726,7 @@ fn fact(n: usize) -> usize None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); let borrow = doc.content().borrow(); let found = borrow diff --git a/src/elements/comment.rs b/src/elements/comment.rs index c1b0761..982f7d4 100644 --- a/src/elements/comment.rs +++ b/src/elements/comment.rs @@ -123,7 +123,7 @@ COMMENT ::Test None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph { diff --git a/src/elements/customstyle.rs b/src/elements/customstyle.rs index 4489a60..e06f596 100644 --- a/src/elements/customstyle.rs +++ b/src/elements/customstyle.rs @@ -399,6 +399,8 @@ impl Rule for CustomStyleRule { .custom_styles .borrow_mut() .insert(Rc::new(style)); + + ctx.state.reset_match("Custom Style").unwrap(); }); }); @@ -454,6 +456,8 @@ impl Rule for CustomStyleRule { return; } ctx.state.shared.custom_styles.borrow_mut().insert(Rc::new(style)); + + ctx.state.reset_match("Custom Style").unwrap(); }); }); @@ -505,7 +509,7 @@ pre |styled| post °Hello°. None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph { @@ -549,7 +553,7 @@ pre [styled] post (Hello). None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph { diff --git a/src/elements/import.rs b/src/elements/import.rs index 2407d15..dc7c682 100644 --- a/src/elements/import.rs +++ b/src/elements/import.rs @@ -161,7 +161,7 @@ impl RegexRule for ImportRule { }; state.with_state(|new_state| { - let import_doc = new_state.parser.parse(new_state, import, Some(document)); + let (import_doc, _) = new_state.parser.parse(new_state, import, Some(document)); document.merge(import_doc.content(), import_doc.scope(), Some(&import_as)); }); diff --git a/src/elements/layout.rs b/src/elements/layout.rs index 710768e..05262e2 100644 --- a/src/elements/layout.rs +++ b/src/elements/layout.rs @@ -895,7 +895,7 @@ mod tests { None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Layout { token == LayoutToken::Begin, id == 0 }; @@ -947,7 +947,7 @@ mod tests { None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Layout { token == LayoutToken::Begin, id == 0 }; diff --git a/src/elements/link.rs b/src/elements/link.rs index e56a6db..cced54c 100644 --- a/src/elements/link.rs +++ b/src/elements/link.rs @@ -284,7 +284,7 @@ Some [link](url). None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph { @@ -314,7 +314,7 @@ nml.link.push("**BOLD link**", "another url") None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph { diff --git a/src/elements/list.rs b/src/elements/list.rs index cfd8623..f896952 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -463,7 +463,7 @@ mod tests { )); let parser = LangParser::default(); let state = ParserState::new(&parser, None); - let doc = parser.parse(state, source, None); + let (doc, _) = parser.parse(state, source, None); validate_document!(doc.content().borrow(), 0, ListMarker { numbered == false, kind == MarkerKind::Open }; diff --git a/src/elements/media.rs b/src/elements/media.rs index 2f2dd56..a59a6ce 100644 --- a/src/elements/media.rs +++ b/src/elements/media.rs @@ -548,7 +548,7 @@ mod tests { None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); let borrow = doc.content().borrow(); let group = borrow.first().as_ref().unwrap().as_container().unwrap(); diff --git a/src/elements/paragraph.rs b/src/elements/paragraph.rs index e480b2b..24b401c 100644 --- a/src/elements/paragraph.rs +++ b/src/elements/paragraph.rs @@ -165,7 +165,7 @@ Last paragraph None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph { diff --git a/src/elements/raw.rs b/src/elements/raw.rs index 1328b1f..ba939de 100644 --- a/src/elements/raw.rs +++ b/src/elements/raw.rs @@ -283,7 +283,7 @@ Break{?[kind=block] Raw?}NewParagraph{??} None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph; @@ -306,7 +306,7 @@ Break%%NewParagraph%") None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph; diff --git a/src/elements/script.rs b/src/elements/script.rs index 1eab65d..374ea1e 100644 --- a/src/elements/script.rs +++ b/src/elements/script.rs @@ -320,7 +320,7 @@ Evaluation: %% None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph; diff --git a/src/elements/section.rs b/src/elements/section.rs index 8a24961..52ff5cc 100644 --- a/src/elements/section.rs +++ b/src/elements/section.rs @@ -438,7 +438,7 @@ mod tests { None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Section { depth == 1, title == "1" }; @@ -468,7 +468,7 @@ nml.section.push("6", 6, "", "refname") None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Section { depth == 1, title == "1" }; @@ -495,18 +495,16 @@ nml.section.push("6", 6, "", "refname") )); let parser = LangParser::default(); let state = ParserState::new(&parser, None); - let _ = parser.parse(state, source, None); + let (_, state) = parser.parse(state, source, None); - // TODO2 - /* let style = state.shared .styles - .current_style(section_style::STYLE_KEY) + .borrow() + .current(section_style::STYLE_KEY) .downcast_rc::() .unwrap(); assert_eq!(style.link_pos, SectionLinkPos::None); assert_eq!(style.link, ["a".to_string(), "b".to_string(), "c".to_string()]); - */ } } diff --git a/src/elements/style.rs b/src/elements/style.rs index 0291fa2..cd65631 100644 --- a/src/elements/style.rs +++ b/src/elements/style.rs @@ -230,7 +230,7 @@ __`UNDERLINE+EM`__ None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph { diff --git a/src/elements/tex.rs b/src/elements/tex.rs index 117a654..b6b7fb8 100644 --- a/src/elements/tex.rs +++ b/src/elements/tex.rs @@ -450,7 +450,7 @@ $[kind=block,env=another] e^{i\pi}=-1$ None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Tex { mathmode == true, tex == "1+1=2", env == "main", caption == Some("Some, text\\".to_string()) }; @@ -472,7 +472,7 @@ $[env=another] e^{i\pi}=-1$ None, )); let parser = LangParser::default(); - let doc = parser.parse(ParserState::new(&parser, None), source, None); + let (doc, _) = parser.parse(ParserState::new(&parser, None), source, None); validate_document!(doc.content().borrow(), 0, Paragraph { diff --git a/src/main.rs b/src/main.rs index 14348ba..57099d4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -56,7 +56,7 @@ fn parse( // Parse let source = SourceFile::new(input.to_string(), None).unwrap(); - let doc = parser.parse(ParserState::new(parser, None), Rc::new(source), None); + let (doc, _) = parser.parse(ParserState::new(parser, None), Rc::new(source), None); if debug_opts.contains(&"ast".to_string()) { println!("-- BEGIN AST DEBUGGING --"); diff --git a/src/parser/langparser.rs b/src/parser/langparser.rs index b0ea1d2..3ba7e21 100644 --- a/src/parser/langparser.rs +++ b/src/parser/langparser.rs @@ -50,12 +50,12 @@ impl Parser for LangParser { fn has_error(&self) -> bool { *self.err_flag.borrow() } - fn parse<'a>( - &self, - state: ParserState, + fn parse<'p, 'a, 'doc>( + &'p self, + state: ParserState<'p, 'a>, source: Rc, - parent: Option<&'a dyn Document<'a>>, - ) -> Box + 'a> { + parent: Option<&'doc dyn Document<'doc>>, + ) -> (Box + 'doc>, ParserState<'p, 'a>) { let doc = LangDocument::new(source.clone(), parent); let content = source.content(); @@ -105,7 +105,6 @@ impl Parser for LangParser { } // Rule States - self.handle_reports(state.shared.rule_state.borrow_mut().on_scope_end( &state, &doc, @@ -120,15 +119,15 @@ impl Parser for LangParser { ))), ); - return Box::new(doc); + return (Box::new(doc), state); } - fn parse_into<'a>( - &self, - state: ParserState, + fn parse_into<'p, 'a, 'doc>( + &'p self, + state: ParserState<'p, 'a>, source: Rc, - document: &'a dyn Document<'a>, - ) { + document: &'doc dyn Document<'doc>, + ) -> ParserState<'p, 'a> { let content = source.content(); let mut cursor = Cursor::new(0usize, source.clone()); @@ -164,6 +163,7 @@ impl Parser for LangParser { } } + return state; // State //self.handle_reports(source.clone(), // self.state_mut().on_scope_end(&self, &document, super::state::Scope::DOCUMENT)); diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 7a78823..c76f583 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,10 +1,10 @@ +use ariadne::Label; +use ariadne::Report; use std::any::Any; use std::cell::RefCell; use std::collections::HashSet; use std::ops::Range; use std::rc::Rc; -use ariadne::Label; -use ariadne::Report; use unicode_segmentation::UnicodeSegmentation; use super::customstyle::CustomStyleHolder; @@ -55,7 +55,7 @@ impl ReportColors { } } -/// The state that is shared with the state's children +/// The state that is shared with the state's childre pub struct SharedState { pub rule_state: RefCell, @@ -84,9 +84,11 @@ impl SharedState { }; // Register default kernel - s.kernels.borrow_mut() + s.kernels + .borrow_mut() .insert("main".to_string(), Kernel::new(parser)); + // Default styles & layouts parser.rules().iter().for_each(|rule| { rule.register_styles(&mut *s.styles.borrow_mut()); rule.register_layouts(&mut *s.layouts.borrow_mut()); @@ -118,9 +120,9 @@ pub struct ParserState<'a, 'b> { impl<'a, 'b> ParserState<'a, 'b> { /// Constructs a new state for a given parser with an optional parent /// - /// Parent should be None when parsing a brand new document. - /// If you have to set the parent to Some(..) (e.g for imports or sub-document), - /// be sure to use the [`ParserState::with_state`] method instead, this create a + /// Parent should be None when parsing a brand new document. If you have to + /// set the parent to Some(..) (e.g for imports or sub-document), be sure + /// to use the [`ParserState::with_state`] method instead, this create a /// RAII lived state for use within bounded lifetime. pub fn new(parser: &'a dyn Parser, parent: Option<&'a ParserState<'a, 'b>>) -> Self { let matches = parser.rules().iter().map(|_| (0, None)).collect::>(); @@ -153,24 +155,37 @@ impl<'a, 'b> ParserState<'a, 'b> { /// Updates matches from a given start position e.g [`Cursor`] /// /// # Return + /// /// 1. The cursor position after updating the matches /// 2. (Optional) The winning match with it's match data + /// If the winning match is None, it means that the document has no more + /// rule to match. I.e The rest of the content should be added as a + /// [`Text`] element. + /// The match data should be passed to the [`Rule::on_match`] method. /// - /// If the winning match is None, it means that the document has no more rule to match - /// I.E The rest of the content should be added as a [`Text`] element. - /// The match data should be passed to the [`Rule::on_match`] method - pub fn update_matches( - &self, - cursor: &Cursor, - ) -> (Cursor, Option<(usize, Box)>) { + /// # Strategy + /// + /// This function call [`Rule::next_match`] on the rules defined for the + /// parser. It then takes the rule that has the closest `next_match` and + /// returns it. If next_match starts on an escaped character i.e `\\`, + /// then it starts over to find another match for that rule. + /// In case multiple rules have the same `next_match`, the rules that are + /// defined first in the parser are prioritized. See [Parser::add_rule] for + /// information on how to prioritize rules. + /// + /// Notes that the result of every call to [`Rule::next_match`] gets stored + /// in a table: [`ParserState::matches`]. Until the cursor steps over a + /// position in the table, `next_match` won't be called. + pub fn update_matches(&self, cursor: &Cursor) -> (Cursor, Option<(usize, Box)>) { let mut matches_borrow = self.matches.borrow_mut(); - self.parser.rules() + self.parser + .rules() .iter() .zip(matches_borrow.iter_mut()) .for_each(|(rule, (matched_at, match_data))| { // Don't upate if not stepped over yet - if *matched_at > cursor.pos && rule.downcast_ref::().is_none() { + if *matched_at > cursor.pos { // TODO: maybe we should expose matches() so it becomes possible to dynamically register a new rule return; } @@ -215,15 +230,18 @@ impl<'a, 'b> ParserState<'a, 'b> { .map(|(winner, (pos, _))| (winner, *pos)) .unwrap(); - if next_pos == usize::MAX // No rule has matched + if next_pos == usize::MAX + // No rule has matched { let content = cursor.source.content(); // No winners, i.e no matches left return (cursor.at(content.len()), None); } - return (cursor.at(next_pos), - Some((winner, matches_borrow[winner].1.take().unwrap()))) + return ( + cursor.at(next_pos), + Some((winner, matches_borrow[winner].1.take().unwrap())), + ); } /// Add an [`Element`] to the [`Document`] @@ -244,15 +262,53 @@ impl<'a, 'b> ParserState<'a, 'b> { } else { // Process paragraph events if doc.last_element::().is_some_and(|_| true) { - self.parser.handle_reports( - self.shared.rule_state.borrow_mut() - .on_scope_end(&self, doc, super::state::Scope::PARAGRAPH), - ); + self.parser + .handle_reports(self.shared.rule_state.borrow_mut().on_scope_end( + &self, + doc, + super::state::Scope::PARAGRAPH, + )); } doc.push(elem); } } + + /// Resets the position and the match_data for a given rule. This is used + /// in order to have 'dynamic' rules that may not match at first, but their + /// matching rule is modified through the parsing process. + /// + /// This function also recursively calls itself on it's `parent`, in order + /// to fully reset the match. + /// + /// See [`CustomStyleRule`] for an example of how this is used. + /// + /// # Error + /// + /// Returns an error if `rule_name` was not found in the parser's ruleset. + pub fn reset_match(&self, rule_name: &str) -> Result<(), String> + { + if self.parser.rules().iter() + .zip(self.matches.borrow_mut().iter_mut()) + .try_for_each(|(rule, (match_pos, match_data))| { + if rule.name() != rule_name { return Ok(()) } + + *match_pos = 0; + match_data.take(); + Err(()) + }).is_ok() + { + return Err(format!("Could not find rule: {rule_name}")); + } + + // Resurcively reset + if let Some(parent) = self.parent + { + return parent.reset_match(rule_name); + } + + Ok(()) + } } pub trait Parser { @@ -268,37 +324,52 @@ pub trait Parser { /// Whether the parser emitted an error during it's parsing process fn has_error(&self) -> bool; - + /// Parse [`Source`] into a new [`Document`] /// /// # Errors /// - /// This method will not fail because we try to optimistically recover from parsing errors. - /// However the resulting document should not get compiled if an error has happened - /// see [`Parser::has_error()`] for reference - fn parse<'a>( - &self, - state: ParserState, + /// This method will not fail because we try to optimistically recover from + /// parsing errors. However the resulting document should not get compiled + /// if an error has happenedn, see [`Parser::has_error()`] for reference + /// + /// # Returns + /// + /// This method returns the resulting [`Document`] after psrsing `source`, + /// note that the [`ParserState`] is only meant to perform testing and not + /// meant to be reused. + fn parse<'p, 'a, 'doc>( + &'p self, + state: ParserState<'p, 'a>, source: Rc, - parent: Option<&'a dyn Document<'a>>, - ) -> Box + 'a>; + parent: Option<&'doc dyn Document<'doc>>, + ) -> (Box + 'doc>, ParserState<'p, 'a>); /// Parse [`Source`] into an already existing [`Document`] /// /// # Errors /// - /// This method will not fail because we try to optimistically recover from parsing errors. - /// However the resulting document should not get compiled if an error has happened - /// see [`Parser::has_error()`] for reference - fn parse_into<'a>(&self, - state: ParserState, - source: Rc, document: &'a dyn Document<'a>); + /// This method will not fail because we try to optimistically recover from + /// parsing errors. However the resulting document should not get compiled + /// if an error has happened see [`Parser::has_error()`] for reference + /// + /// # Returns + /// + /// The returned [`ParserState`] is not meant to be reused, it's meant for + /// testing. + fn parse_into<'p, 'a, 'doc>( + &'p self, + state: ParserState<'p, 'a>, + source: Rc, + document: &'doc dyn Document<'doc>, + ) -> ParserState<'p, 'a>; - fn add_rule( - &mut self, - rule: Box, - after: Option<&'static str>, - ) -> Result<(), String> { + /// Adds a rule to the parser. + /// + /// # Warning + /// + /// This method must not be called if a [`ParserState`] for this parser exists. + fn add_rule(&mut self, rule: Box, after: Option<&'static str>) -> Result<(), String> { if let Some(_) = self .rules() .iter() @@ -312,12 +383,12 @@ pub trait Parser { // Try to insert after if let Some(after) = after { - let index = - self.rules() - .iter() - .enumerate() - .find(|(_, rule)| rule.name() == after) - .map(|(idx, _)| idx); + let index = self + .rules() + .iter() + .enumerate() + .find(|(_, rule)| rule.name() == after) + .map(|(idx, _)| idx); if let Some(index) = index { self.rules_mut().insert(index, rule); @@ -331,10 +402,9 @@ pub trait Parser { Ok(()) } - fn handle_reports( - &self, - reports: Vec, Range)>>, - ) { + /// Handles the reports produced by parsing. The default is to output them + /// to stderr, but you are free to modify it. + fn handle_reports(&self, reports: Vec, Range)>>) { for mut report in reports { let mut sources: HashSet> = HashSet::new(); fn recurse_source(sources: &mut HashSet>, source: Rc) { @@ -364,8 +434,8 @@ pub trait Parser { if let Some(_s) = source.downcast_ref::() { report.labels.push( Label::new((location.source(), location.start() + 1..location.end())) - .with_message("In file included from here") - .with_order(-1), + .with_message("In file included from here") + .with_order(-1), ); }; @@ -373,12 +443,12 @@ pub trait Parser { let start = location.start() + (location.source().content().as_bytes()[location.start()] == '\n' as u8) - .then_some(1) - .unwrap_or(0); + .then_some(1) + .unwrap_or(0); report.labels.push( Label::new((location.source(), start..location.end())) - .with_message("In evaluation of") - .with_order(-1), + .with_message("In evaluation of") + .with_order(-1), ); }; } diff --git a/src/parser/util.rs b/src/parser/util.rs index f8d9668..8c73daa 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -144,6 +144,7 @@ pub fn parse_paragraph<'a>( new_state .parser .parse(new_state, source.clone(), Some(document)) + .0 }); if parsed.content().borrow().len() > 1 { return Err("Parsed document contains more than a single paragraph"); @@ -422,20 +423,23 @@ mod tests { (&doc as &dyn Document) .last_element_mut::() .unwrap() - .push(Box::new(Comment::new(tok.clone(), "COMMENT".to_string()))); + .push(Box::new(Comment::new(tok.clone(), "COMMENT".to_string()))) + .unwrap(); assert_eq!(process_text(&doc, "\na"), "a"); // A space is appended as previous element is inline (&doc as &dyn Document) .last_element_mut::() .unwrap() - .push(Box::new(Text::new(tok.clone(), "TEXT".to_string()))); + .push(Box::new(Text::new(tok.clone(), "TEXT".to_string()))) + .unwrap(); assert_eq!(process_text(&doc, "\na"), " a"); (&doc as &dyn Document) .last_element_mut::() .unwrap() - .push(Box::new(Style::new(tok.clone(), 0, false))); + .push(Box::new(Style::new(tok.clone(), 0, false))) + .unwrap(); assert_eq!(process_text(&doc, "\na"), " a"); }