2024-07-25 13:13:12 +02:00
|
|
|
use std::any::Any;
|
2024-07-19 11:52:12 +02:00
|
|
|
|
|
|
|
use regex::Regex;
|
|
|
|
|
2024-07-25 13:13:12 +02:00
|
|
|
use crate::compiler::compiler::Compiler;
|
|
|
|
use crate::compiler::compiler::Target;
|
|
|
|
use crate::document::document::Document;
|
2024-07-26 20:01:10 +02:00
|
|
|
use crate::document::element::ContainerElement;
|
2024-07-25 13:13:12 +02:00
|
|
|
use crate::document::element::ElemKind;
|
|
|
|
use crate::document::element::Element;
|
2024-10-20 19:38:15 +02:00
|
|
|
use crate::parser::parser::ParseMode;
|
2024-08-05 18:40:17 +02:00
|
|
|
use crate::parser::parser::ParserState;
|
2024-10-24 10:18:49 +02:00
|
|
|
use crate::parser::reports::*;
|
2024-07-25 13:13:12 +02:00
|
|
|
use crate::parser::rule::Rule;
|
|
|
|
use crate::parser::source::Cursor;
|
|
|
|
use crate::parser::source::Token;
|
2024-07-19 11:52:12 +02:00
|
|
|
|
|
|
|
// TODO: Full refactor
|
|
|
|
// Problem is that document parsed from other sources i.e by variables
|
|
|
|
// are not merged correctly into existing paragraph
|
|
|
|
// A solution would be to use the "(\n){2,}" regex to split paragraph, which would reduce the work needed for process_text
|
|
|
|
// Another fix would be to keep parsing (recursively) into the same document (like previous version)
|
|
|
|
// The issue is that this would break the current `Token` implementation
|
|
|
|
// Which would need to be reworked
|
|
|
|
#[derive(Debug)]
|
2024-07-25 13:13:12 +02:00
|
|
|
pub struct Paragraph {
|
2024-07-26 20:01:10 +02:00
|
|
|
pub location: Token,
|
2024-07-25 13:13:12 +02:00
|
|
|
pub content: Vec<Box<dyn Element>>,
|
2024-07-19 11:52:12 +02:00
|
|
|
}
|
|
|
|
|
2024-07-25 13:13:12 +02:00
|
|
|
impl Paragraph {
|
|
|
|
pub fn find_back<P: FnMut(&&Box<dyn Element + 'static>) -> bool>(
|
|
|
|
&self,
|
|
|
|
predicate: P,
|
|
|
|
) -> Option<&Box<dyn Element>> {
|
|
|
|
self.content.iter().rev().find(predicate)
|
2024-07-19 11:52:12 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-25 13:13:12 +02:00
|
|
|
impl Element for Paragraph {
|
|
|
|
fn location(&self) -> &Token { &self.location }
|
2024-07-19 11:52:12 +02:00
|
|
|
|
2024-07-25 13:13:12 +02:00
|
|
|
fn kind(&self) -> ElemKind { ElemKind::Special }
|
2024-07-19 11:52:12 +02:00
|
|
|
|
2024-07-25 13:13:12 +02:00
|
|
|
fn element_name(&self) -> &'static str { "Paragraph" }
|
2024-07-19 11:52:12 +02:00
|
|
|
|
2024-10-20 19:38:15 +02:00
|
|
|
fn compile(
|
|
|
|
&self,
|
|
|
|
compiler: &Compiler,
|
|
|
|
document: &dyn Document,
|
|
|
|
cursor: usize,
|
|
|
|
) -> Result<String, String> {
|
2024-07-25 13:13:12 +02:00
|
|
|
if self.content.is_empty() {
|
|
|
|
return Ok(String::new());
|
|
|
|
}
|
2024-07-19 11:52:12 +02:00
|
|
|
|
2024-07-25 13:13:12 +02:00
|
|
|
match compiler.target() {
|
|
|
|
Target::HTML => {
|
2024-08-02 13:36:04 +02:00
|
|
|
if self.content.is_empty() {
|
|
|
|
return Ok(String::new());
|
2024-07-25 13:13:12 +02:00
|
|
|
}
|
|
|
|
|
2024-08-02 13:36:04 +02:00
|
|
|
let mut result = String::new();
|
|
|
|
result.push_str("<p>");
|
|
|
|
|
|
|
|
for elems in &self.content {
|
2024-10-20 19:38:15 +02:00
|
|
|
result += elems
|
|
|
|
.compile(compiler, document, cursor + result.len())?
|
|
|
|
.as_str();
|
2024-07-19 11:52:12 +02:00
|
|
|
}
|
2024-08-02 13:36:04 +02:00
|
|
|
|
|
|
|
result.push_str("</p>");
|
|
|
|
Ok(result)
|
2024-07-25 13:13:12 +02:00
|
|
|
}
|
2024-08-02 13:36:04 +02:00
|
|
|
_ => todo!("Unimplemented compiler"),
|
2024-07-25 13:13:12 +02:00
|
|
|
}
|
|
|
|
}
|
2024-07-26 20:01:10 +02:00
|
|
|
|
|
|
|
fn as_container(&self) -> Option<&dyn ContainerElement> { Some(self) }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ContainerElement for Paragraph {
|
|
|
|
fn contained(&self) -> &Vec<Box<dyn Element>> { &self.content }
|
|
|
|
|
|
|
|
fn push(&mut self, elem: Box<dyn Element>) -> Result<(), String> {
|
|
|
|
if elem.location().source() == self.location().source() {
|
|
|
|
self.location.range = self.location.start()..elem.location().end();
|
|
|
|
}
|
2024-08-01 16:15:10 +02:00
|
|
|
if elem.kind() == ElemKind::Block {
|
|
|
|
return Err("Attempted to push block element inside a paragraph".to_string());
|
|
|
|
}
|
2024-07-26 20:01:10 +02:00
|
|
|
self.content.push(elem);
|
|
|
|
Ok(())
|
|
|
|
}
|
2024-07-19 11:52:12 +02:00
|
|
|
}
|
|
|
|
|
2024-08-08 17:11:32 +02:00
|
|
|
#[auto_registry::auto_registry(registry = "rules", path = "crate::elements::paragraph")]
|
2024-07-25 13:13:12 +02:00
|
|
|
pub struct ParagraphRule {
|
2024-07-19 11:52:12 +02:00
|
|
|
re: Regex,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ParagraphRule {
|
2024-07-25 13:13:12 +02:00
|
|
|
pub fn new() -> Self {
|
|
|
|
Self {
|
|
|
|
re: Regex::new(r"\n{2,}").unwrap(),
|
2024-07-19 11:52:12 +02:00
|
|
|
}
|
2024-07-25 13:13:12 +02:00
|
|
|
}
|
2024-07-19 11:52:12 +02:00
|
|
|
}
|
|
|
|
|
2024-07-25 13:13:12 +02:00
|
|
|
impl Rule for ParagraphRule {
|
2024-08-08 14:12:16 +02:00
|
|
|
fn name(&self) -> &'static str { "Paragraph" }
|
2024-10-20 19:38:15 +02:00
|
|
|
|
2024-08-08 14:12:16 +02:00
|
|
|
fn previous(&self) -> Option<&'static str> { Some("Comment") }
|
2024-07-19 11:52:12 +02:00
|
|
|
|
2024-10-20 19:38:15 +02:00
|
|
|
fn next_match(
|
|
|
|
&self,
|
|
|
|
_mode: &ParseMode,
|
|
|
|
_state: &ParserState,
|
|
|
|
cursor: &Cursor,
|
|
|
|
) -> Option<(usize, Box<dyn Any>)> {
|
2024-07-25 13:13:12 +02:00
|
|
|
self.re
|
2024-10-20 19:38:15 +02:00
|
|
|
.find_at(cursor.source.content(), cursor.pos)
|
|
|
|
.map(|m| (m.start(), Box::new([false; 0]) as Box<dyn Any>))
|
2024-07-25 13:13:12 +02:00
|
|
|
}
|
2024-07-19 11:52:12 +02:00
|
|
|
|
2024-07-25 13:13:12 +02:00
|
|
|
fn on_match(
|
|
|
|
&self,
|
2024-08-06 18:58:41 +02:00
|
|
|
state: &ParserState,
|
2024-07-25 13:13:12 +02:00
|
|
|
document: &dyn Document,
|
|
|
|
cursor: Cursor,
|
2024-08-06 18:58:41 +02:00
|
|
|
_match_data: Box<dyn Any>,
|
2024-10-24 09:54:52 +02:00
|
|
|
) -> (Cursor, Vec<Report>) {
|
2024-07-25 13:13:12 +02:00
|
|
|
let end_cursor = match self.re.captures_at(cursor.source.content(), cursor.pos) {
|
2024-07-19 11:52:12 +02:00
|
|
|
None => panic!("Unknown error"),
|
2024-07-25 13:13:12 +02:00
|
|
|
Some(capture) => cursor.at(capture.get(0).unwrap().end() - 1),
|
2024-07-19 11:52:12 +02:00
|
|
|
};
|
|
|
|
|
2024-08-06 18:58:41 +02:00
|
|
|
state.push(
|
2024-07-25 13:13:12 +02:00
|
|
|
document,
|
2024-07-26 20:01:10 +02:00
|
|
|
Box::new(Paragraph {
|
|
|
|
location: Token::new(cursor.pos..end_cursor.pos, cursor.source.clone()),
|
|
|
|
content: Vec::new(),
|
|
|
|
}),
|
2024-07-25 13:13:12 +02:00
|
|
|
);
|
2024-07-19 11:52:12 +02:00
|
|
|
|
|
|
|
(end_cursor, Vec::new())
|
2024-07-25 13:13:12 +02:00
|
|
|
}
|
2024-07-19 11:52:12 +02:00
|
|
|
}
|
2024-08-01 16:15:10 +02:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2024-10-24 09:54:52 +02:00
|
|
|
use super::*;
|
2024-08-01 16:15:10 +02:00
|
|
|
use crate::elements::paragraph::Paragraph;
|
|
|
|
use crate::elements::text::Text;
|
|
|
|
use crate::parser::langparser::LangParser;
|
2024-08-06 18:58:41 +02:00
|
|
|
use crate::parser::parser::Parser;
|
2024-08-01 16:15:10 +02:00
|
|
|
use crate::parser::source::SourceFile;
|
|
|
|
use crate::validate_document;
|
2024-10-24 10:18:49 +02:00
|
|
|
use std::rc::Rc;
|
2024-08-01 16:15:10 +02:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn parse() {
|
|
|
|
let source = Rc::new(SourceFile::with_content(
|
|
|
|
"".to_string(),
|
|
|
|
r#"
|
|
|
|
First paragraph
|
|
|
|
Second line
|
|
|
|
|
|
|
|
Second paragraph\
|
|
|
|
<- literal \\n
|
|
|
|
|
|
|
|
|
|
|
|
Last paragraph
|
|
|
|
"#
|
|
|
|
.to_string(),
|
|
|
|
None,
|
|
|
|
));
|
|
|
|
let parser = LangParser::default();
|
2024-10-20 19:38:15 +02:00
|
|
|
let (doc, _) = parser.parse(
|
|
|
|
ParserState::new(&parser, None),
|
|
|
|
source,
|
|
|
|
None,
|
|
|
|
ParseMode::default(),
|
|
|
|
);
|
2024-08-01 16:15:10 +02:00
|
|
|
|
|
|
|
validate_document!(doc.content().borrow(), 0,
|
|
|
|
Paragraph {
|
|
|
|
Text { content == "First paragraph Second line" };
|
|
|
|
};
|
|
|
|
Paragraph {
|
|
|
|
Text { content == "Second paragraph\n<- literal \\n" };
|
|
|
|
};
|
|
|
|
Paragraph {
|
|
|
|
Text { content == "Last paragraph " };
|
|
|
|
};
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|