From 6bd7bf40da0effe35a6d0ded6ea54e2c1d6a8808 Mon Sep 17 00:00:00 2001 From: ef3d0c3e Date: Thu, 25 Jul 2024 13:13:12 +0200 Subject: [PATCH] Media --- src/document/mod.rs | 1 + src/document/references.rs | 28 ++ src/elements/code.rs | 554 ++++++++++++++++++++++--------------- src/elements/graphviz.rs | 5 +- src/elements/media.rs | 462 +++++++++++++++++++++++++++++++ src/elements/mod.rs | 1 + src/elements/paragraph.rs | 156 ++++++----- src/elements/registrar.rs | 2 + src/parser/util.rs | 28 +- 9 files changed, 942 insertions(+), 295 deletions(-) create mode 100644 src/document/references.rs create mode 100644 src/elements/media.rs diff --git a/src/document/mod.rs b/src/document/mod.rs index d80a6b3..bff3340 100644 --- a/src/document/mod.rs +++ b/src/document/mod.rs @@ -1,4 +1,5 @@ pub mod document; +pub mod references; pub mod langdocument; pub mod element; pub mod variable; diff --git a/src/document/references.rs b/src/document/references.rs new file mode 100644 index 0000000..b9f0b8b --- /dev/null +++ b/src/document/references.rs @@ -0,0 +1,28 @@ +pub fn validate_refname(name: &str) -> Result<&str, String> { + let trimmed = name.trim_start().trim_end(); + if trimmed.is_empty() { + return Err("Refname cannot be empty".to_string()); + } + + for c in trimmed.chars() { + if c.is_ascii_punctuation() { + return Err(format!( + "Refname `{trimmed}` cannot contain punctuation codepoint: `{c}`" + )); + } + + if c.is_whitespace() { + return Err(format!( + "Refname `{trimmed}` cannot contain whitespaces: `{c}`" + )); + } + + if c.is_control() { + return Err(format!( + "Refname `{trimmed}` cannot contain control codepoint: `{c}`" + )); + } + } + + Ok(trimmed) +} diff --git a/src/elements/code.rs b/src/elements/code.rs index 164f109..f2d6cc4 100644 --- a/src/elements/code.rs +++ b/src/elements/code.rs @@ -1,25 +1,48 @@ -use std::{collections::HashMap, ops::Range, rc::Rc, sync::Once}; +use std::collections::HashMap; +use std::ops::Range; +use std::rc::Rc; +use std::sync::Once; -use ariadne::{Fmt, Label, Report, ReportKind}; -use crypto::{digest::Digest, sha2::Sha512}; -use mlua::{Function, Lua}; -use regex::{Captures, Regex}; -use syntect::{easy::HighlightLines, highlighting::ThemeSet, parsing::SyntaxSet}; +use ariadne::Fmt; +use ariadne::Label; +use ariadne::Report; +use ariadne::ReportKind; +use crypto::digest::Digest; +use crypto::sha2::Sha512; +use mlua::Function; +use mlua::Lua; +use regex::Captures; +use regex::Regex; +use syntect::easy::HighlightLines; +use syntect::highlighting::ThemeSet; +use syntect::parsing::SyntaxSet; -use crate::{cache::cache::{Cached, CachedError}, compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util::{self, Property, PropertyMapError, PropertyParser}}}; +use crate::cache::cache::Cached; +use crate::cache::cache::CachedError; +use crate::compiler::compiler::Compiler; +use crate::compiler::compiler::Target; +use crate::document::document::Document; +use crate::document::element::ElemKind; +use crate::document::element::Element; +use crate::parser::parser::Parser; +use crate::parser::rule::RegexRule; +use crate::parser::source::Source; +use crate::parser::source::Token; +use crate::parser::util::Property; +use crate::parser::util::PropertyMapError; +use crate::parser::util::PropertyParser; +use crate::parser::util::{self}; use lazy_static::lazy_static; #[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum CodeKind -{ +enum CodeKind { FullBlock, MiniBlock, Inline, } #[derive(Debug)] -struct Code -{ +struct Code { location: Token, block: CodeKind, language: String, @@ -30,57 +53,91 @@ struct Code } impl Code { - fn new(location: Token, block: CodeKind, language: String, name: Option, code: String, theme: Option, line_offset: usize) -> Self { - Self { location, block, language, name, code, theme, line_offset } - } - - fn highlight_html(&self, compiler: &Compiler) -> Result - { - lazy_static! { - static ref syntax_set : SyntaxSet = SyntaxSet::load_defaults_newlines(); - static ref theme_set : ThemeSet = ThemeSet::load_defaults(); + fn new( + location: Token, + block: CodeKind, + language: String, + name: Option, + code: String, + theme: Option, + line_offset: usize, + ) -> Self { + Self { + location, + block, + language, + name, + code, + theme, + line_offset, } - let syntax = match syntax_set.find_syntax_by_name(self.language.as_str()) - { + } + + fn highlight_html(&self, compiler: &Compiler) -> Result { + lazy_static! { + static ref syntax_set: SyntaxSet = SyntaxSet::load_defaults_newlines(); + static ref theme_set: ThemeSet = ThemeSet::load_defaults(); + } + let syntax = match syntax_set.find_syntax_by_name(self.language.as_str()) { Some(syntax) => syntax, - None => return Err(format!("Unable to find syntax for language: {}", self.language)) + None => { + return Err(format!( + "Unable to find syntax for language: {}", + self.language + )) + } }; - let theme_string = match self.theme.as_ref() - { + let theme_string = match self.theme.as_ref() { Some(theme) => theme.as_str(), None => "base16-ocean.dark", }; let mut h = HighlightLines::new(syntax, &theme_set.themes[theme_string]); let mut result = String::new(); - if self.block == CodeKind::FullBlock - { + if self.block == CodeKind::FullBlock { result += "
"; - if let Some(name) = &self.name - { - result += format!("
{}
", - compiler.sanitize(name.as_str())).as_str(); + if let Some(name) = &self.name { + result += format!( + "
{}
", + compiler.sanitize(name.as_str()) + ) + .as_str(); } - result += format!("
").as_str(); - for (line_id, line) in self.code.split(|c| c == '\n').enumerate() - { + result += + format!("
").as_str(); + for (line_id, line) in self.code.split(|c| c == '\n').enumerate() { result += "
"; // Line number - result += format!("
{}
", line_id+self.line_offset).as_str(); + result += + format!("
{}
", line_id + self.line_offset).as_str(); // Code result += "
";
-				match h.highlight_line(line, &syntax_set)
-				{
-					Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e.to_string())),
+				match h.highlight_line(line, &syntax_set) {
+					Err(e) => {
+						return Err(format!(
+							"Error highlighting line `{line}`: {}",
+							e.to_string()
+						))
+					}
 					Ok(regions) => {
-						match syntect::html::styled_line_to_highlighted_html(®ions[..], syntect::html::IncludeBackground::No)
-						{
-							Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())),
-							Ok(highlighted) => result += if highlighted.is_empty() { "
" } else { highlighted.as_str() } + match syntect::html::styled_line_to_highlighted_html( + ®ions[..], + syntect::html::IncludeBackground::No, + ) { + Err(e) => { + return Err(format!("Error highlighting code: {}", e.to_string())) + } + Ok(highlighted) => { + result += if highlighted.is_empty() { + "
" + } else { + highlighted.as_str() + } + } } } } @@ -88,41 +145,59 @@ impl Code { } result += "
"; - } - else if self.block == CodeKind::MiniBlock - { + } else if self.block == CodeKind::MiniBlock { result += "
"; - for line in self.code.split(|c| c == '\n') - { + for line in self.code.split(|c| c == '\n') { result += ""; } result += "
";
 				// Code
-				match h.highlight_line(line, &syntax_set)
-				{
-					Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e.to_string())),
+				match h.highlight_line(line, &syntax_set) {
+					Err(e) => {
+						return Err(format!(
+							"Error highlighting line `{line}`: {}",
+							e.to_string()
+						))
+					}
 					Ok(regions) => {
-						match syntect::html::styled_line_to_highlighted_html(®ions[..], syntect::html::IncludeBackground::No)
-						{
-							Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())),
-							Ok(highlighted) => result += if highlighted.is_empty() { "
" } else { highlighted.as_str() } + match syntect::html::styled_line_to_highlighted_html( + ®ions[..], + syntect::html::IncludeBackground::No, + ) { + Err(e) => { + return Err(format!("Error highlighting code: {}", e.to_string())) + } + Ok(highlighted) => { + result += if highlighted.is_empty() { + "
" + } else { + highlighted.as_str() + } + } } } } result += "
"; - } - else if self.block == CodeKind::Inline - { + } else if self.block == CodeKind::Inline { result += ""; - match h.highlight_line(self.code.as_str(), &syntax_set) - { - Err(e) => return Err(format!("Error highlighting line `{}`: {}", self.code, e.to_string())), + match h.highlight_line(self.code.as_str(), &syntax_set) { + Err(e) => { + return Err(format!( + "Error highlighting line `{}`: {}", + self.code, + e.to_string() + )) + } Ok(regions) => { - match syntect::html::styled_line_to_highlighted_html(®ions[..], syntect::html::IncludeBackground::No) - { - Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())), - Ok(highlighted) => result += highlighted.as_str() + match syntect::html::styled_line_to_highlighted_html( + ®ions[..], + syntect::html::IncludeBackground::No, + ) { + Err(e) => { + return Err(format!("Error highlighting code: {}", e.to_string())) + } + Ok(highlighted) => result += highlighted.as_str(), } } } @@ -133,81 +208,83 @@ impl Code { } } -impl Cached for Code -{ - type Key = String; - type Value = String; +impl Cached for Code { + type Key = String; + type Value = String; - fn sql_table() -> &'static str { + fn sql_table() -> &'static str { "CREATE TABLE IF NOT EXISTS cached_code ( digest TEXT PRIMARY KEY, highlighted BLOB NOT NULL);" - } + } - fn sql_get_query() -> &'static str { - "SELECT highlighted FROM cached_code WHERE digest = (?1)" - } + fn sql_get_query() -> &'static str { "SELECT highlighted FROM cached_code WHERE digest = (?1)" } - fn sql_insert_query() -> &'static str { + fn sql_insert_query() -> &'static str { "INSERT INTO cached_code (digest, highlighted) VALUES (?1, ?2)" - } + } - fn key(&self) -> ::Key { + fn key(&self) -> ::Key { let mut hasher = Sha512::new(); hasher.input((self.block as usize).to_be_bytes().as_slice()); hasher.input((self.line_offset as usize).to_be_bytes().as_slice()); - self.theme.as_ref().map(|theme| hasher.input(theme.as_bytes())); + self.theme + .as_ref() + .map(|theme| hasher.input(theme.as_bytes())); self.name.as_ref().map(|name| hasher.input(name.as_bytes())); hasher.input(self.language.as_bytes()); hasher.input(self.code.as_bytes()); hasher.result_str() - } + } } impl Element for Code { - fn location(&self) -> &Token { &self.location } + fn location(&self) -> &Token { &self.location } - fn kind(&self) -> ElemKind { if self.block == CodeKind::Inline { ElemKind::Inline } else { ElemKind::Block } } + fn kind(&self) -> ElemKind { + if self.block == CodeKind::Inline { + ElemKind::Inline + } else { + ElemKind::Block + } + } - fn element_name(&self) -> &'static str { "Code Block" } + fn element_name(&self) -> &'static str { "Code Block" } - fn to_string(&self) -> String { format!("{self:#?}") } + fn to_string(&self) -> String { format!("{self:#?}") } - fn compile(&self, compiler: &Compiler, _document: &dyn Document) - -> Result { - - match compiler.target() - { + fn compile(&self, compiler: &Compiler, _document: &dyn Document) -> Result { + match compiler.target() { Target::HTML => { - static CACHE_INIT : Once = Once::new(); - CACHE_INIT.call_once(|| if let Some(mut con) = compiler.cache() { - if let Err(e) = Code::init(&mut con) - { - eprintln!("Unable to create cache table: {e}"); + static CACHE_INIT: Once = Once::new(); + CACHE_INIT.call_once(|| { + if let Some(mut con) = compiler.cache() { + if let Err(e) = Code::init(&mut con) { + eprintln!("Unable to create cache table: {e}"); + } } }); - if let Some(mut con) = compiler.cache() - { - match self.cached(&mut con, |s| s.highlight_html(compiler)) - { + if let Some(mut con) = compiler.cache() { + match self.cached(&mut con, |s| s.highlight_html(compiler)) { Ok(s) => Ok(s), - Err(e) => match e - { - CachedError::SqlErr(e) => Err(format!("Querying the cache failed: {e}")), - CachedError::GenErr(e) => Err(e) - } + Err(e) => match e { + CachedError::SqlErr(e) => { + Err(format!("Querying the cache failed: {e}")) + } + CachedError::GenErr(e) => Err(e), + }, } - } - else - { + } else { self.highlight_html(compiler) } } - Target::LATEX => { todo!("") } + Target::LATEX => { + todo!("") + } } - } + } } pub struct CodeRule { @@ -218,132 +295,154 @@ pub struct CodeRule { impl CodeRule { pub fn new() -> Self { let mut props = HashMap::new(); - props.insert("line_offset".to_string(), + props.insert( + "line_offset".to_string(), Property::new( true, "Line number offset".to_string(), - Some("1".to_string()))); + Some("1".to_string()), + ), + ); Self { re: [ - Regex::new(r"(?:^|\n)```(?:\[((?:\\.|[^\\\\])*?)\])?(.*?)(?:,(.*))?\n((?:\\(?:.|\n)|[^\\\\])*?)```").unwrap(), - Regex::new(r"``(?:\[((?:\\.|[^\[\]\\])*?)\])?(?:(.*?)(?:\n|,))?((?:\\(?:.|\n)|[^\\\\])*?)``").unwrap(), + Regex::new( + r"(?:^|\n)```(?:\[((?:\\.|[^\\\\])*?)\])?(.*?)(?:,(.*))?\n((?:\\(?:.|\n)|[^\\\\])*?)```", + ) + .unwrap(), + Regex::new( + r"``(?:\[((?:\\.|[^\[\]\\])*?)\])?(?:(.*?)(?:\n|,))?((?:\\(?:.|\n)|[^\\\\])*?)``", + ) + .unwrap(), ], - properties: PropertyParser::new(props) + properties: PropertyParser::new(props), } } } -impl RegexRule for CodeRule -{ - fn name(&self) -> &'static str { "Code" } +impl RegexRule for CodeRule { + fn name(&self) -> &'static str { "Code" } - fn regexes(&self) -> &[regex::Regex] { &self.re } + fn regexes(&self) -> &[regex::Regex] { &self.re } - fn on_regex_match<'a>(&self, index: usize, parser: &dyn Parser, document: &'a dyn Document, token: Token, matches: Captures) - -> Vec, Range)>> { + fn on_regex_match<'a>( + &self, + index: usize, + parser: &dyn Parser, + document: &'a dyn Document, + token: Token, + matches: Captures, + ) -> Vec, Range)>> { let mut reports = vec![]; - let properties = match matches.get(1) - { + let properties = match matches.get(1) { None => match self.properties.default() { Ok(properties) => properties, Err(e) => { reports.push( Report::build(ReportKind::Error, token.source(), token.start()) - .with_message("Invalid code") - .with_label( - Label::new((token.source().clone(), token.range.clone())) - .with_message(format!("Code is missing properties: {e}")) - .with_color(parser.colors().error)) - .finish()); - return reports; - }, - } + .with_message("Invalid code") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message(format!("Code is missing properties: {e}")) + .with_color(parser.colors().error), + ) + .finish(), + ); + return reports; + } + }, Some(props) => { - let processed = util::process_escaped('\\', "]", - props.as_str().trim_start().trim_end()); - match self.properties.parse(processed.as_str()) - { + let processed = + util::process_escaped('\\', "]", props.as_str().trim_start().trim_end()); + match self.properties.parse(processed.as_str()) { Err(e) => { reports.push( Report::build(ReportKind::Error, token.source(), props.start()) - .with_message("Invalid Code Properties") - .with_label( - Label::new((token.source().clone(), props.range())) - .with_message(e) - .with_color(parser.colors().error)) - .finish()); + .with_message("Invalid Code Properties") + .with_label( + Label::new((token.source().clone(), props.range())) + .with_message(e) + .with_color(parser.colors().error), + ) + .finish(), + ); return reports; } - Ok(properties) => properties + Ok(properties) => properties, } } }; - - let code_lang = match matches.get(2) - { + + let code_lang = match matches.get(2) { None => "Plain Text".to_string(), Some(lang) => { let code_lang = lang.as_str().trim_end().trim_start().to_string(); - if code_lang.is_empty() - { + if code_lang.is_empty() { reports.push( Report::build(ReportKind::Error, token.source(), lang.start()) - .with_message("Missing code language") - .with_label( - Label::new((token.source().clone(), lang.range())) - .with_message("No language specified") - .with_color(parser.colors().error)) - .finish()); + .with_message("Missing code language") + .with_label( + Label::new((token.source().clone(), lang.range())) + .with_message("No language specified") + .with_color(parser.colors().error), + ) + .finish(), + ); return reports; } - + // TODO: validate language code_lang } }; - let mut code_content = if index == 0 - { util::process_escaped('\\',"```", matches.get(4).unwrap().as_str()) } - else - { util::process_escaped('\\',"``", matches.get(3).unwrap().as_str()) }; - if code_content.bytes().last() == Some('\n' as u8) // Remove newline + let mut code_content = if index == 0 { + util::process_escaped('\\', "```", matches.get(4).unwrap().as_str()) + } else { + util::process_escaped('\\', "``", matches.get(3).unwrap().as_str()) + }; + if code_content.bytes().last() == Some('\n' as u8) + // Remove newline { code_content.pop(); } - if code_content.is_empty() - { + if code_content.is_empty() { reports.push( Report::build(ReportKind::Error, token.source(), token.start()) - .with_message("Missing code content") - .with_label( - Label::new((token.source().clone(), token.range.clone())) - .with_message("Code content cannot be empty") - .with_color(parser.colors().error)) - .finish()); + .with_message("Missing code content") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message("Code content cannot be empty") + .with_color(parser.colors().error), + ) + .finish(), + ); return reports; } - let theme = document.get_variable("code.theme") + let theme = document + .get_variable("code.theme") .and_then(|var| Some(var.to_string())); - if index == 0 // Block + if index == 0 + // Block { - let code_name = matches.get(3) - .and_then(|name| { - let code_name = name.as_str().trim_end().trim_start().to_string(); - (!code_name.is_empty()).then_some(code_name) - }); - let line_offset = match properties.get("line_offset", - |prop, value| value.parse::().map_err(|e| (prop, e))) - { - Ok((_prop, offset)) => offset, - Err(e) => match e { - PropertyMapError::ParseError((prop, err)) => { - reports.push( + let code_name = matches.get(3).and_then(|name| { + let code_name = name.as_str().trim_end().trim_start().to_string(); + (!code_name.is_empty()).then_some(code_name) + }); + let line_offset = + match properties.get("line_offset", |prop, value| { + value.parse::().map_err(|e| (prop, e)) + }) { + Ok((_prop, offset)) => offset, + Err(e) => { + match e { + PropertyMapError::ParseError((prop, err)) => { + reports.push( Report::build(ReportKind::Error, token.source(), token.start()) .with_message("Invalid Code Property") .with_label( @@ -353,55 +452,68 @@ impl RegexRule for CodeRule err.fg(parser.colors().error))) .with_color(parser.colors().warning)) .finish()); - return reports; - }, - PropertyMapError::NotFoundError(err) => { - reports.push( - Report::build(ReportKind::Error, token.source(), token.start()) - .with_message("Invalid Code Property") - .with_label( - Label::new((token.source().clone(), token.start()+1..token.end())) - .with_message(format!("Property `{}` doesn't exist", - err.fg(parser.colors().info))) - .with_color(parser.colors().warning)) - .finish()); - return reports; + return reports; + } + PropertyMapError::NotFoundError(err) => { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Invalid Code Property") + .with_label( + Label::new(( + token.source().clone(), + token.start() + 1..token.end(), + )) + .with_message(format!( + "Property `{}` doesn't exist", + err.fg(parser.colors().info) + )) + .with_color(parser.colors().warning), + ) + .finish(), + ); + return reports; + } + } } - } + }; + + parser.push( + document, + Box::new(Code::new( + token.clone(), + CodeKind::FullBlock, + code_lang, + code_name, + code_content, + theme, + line_offset, + )), + ); + } else + // Maybe inline + { + let block = if code_content.contains('\n') { + CodeKind::MiniBlock + } else { + CodeKind::Inline }; - parser.push(document, Box::new( - Code::new( - token.clone(), - CodeKind::FullBlock, - code_lang, - code_name, - code_content, - theme, - line_offset - ) - )); - } - else // Maybe inline - { - let block = if code_content.contains('\n') { CodeKind::MiniBlock } - else { CodeKind::Inline }; - - parser.push(document, Box::new( - Code::new( - token.clone(), - block, - code_lang, - None, - code_content, - theme, - 1, - ) - )); + parser.push( + document, + Box::new(Code::new( + token.clone(), + block, + code_lang, + None, + code_content, + theme, + 1, + )), + ); } reports - } + } // TODO fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)> { vec![] } diff --git a/src/elements/graphviz.rs b/src/elements/graphviz.rs index 6af8b1a..0dfaafe 100644 --- a/src/elements/graphviz.rs +++ b/src/elements/graphviz.rs @@ -314,10 +314,7 @@ impl RegexRule for GraphRule { token.source().clone(), token.start() + 1..token.end(), )) - .with_message(format!( - "Property `{}` is missing", - err.fg(parser.colors().info) - )) + .with_message(err) .with_color(parser.colors().warning), ) .finish(), diff --git a/src/elements/media.rs b/src/elements/media.rs new file mode 100644 index 0000000..f5eb62d --- /dev/null +++ b/src/elements/media.rs @@ -0,0 +1,462 @@ +use std::collections::HashMap; +use std::ops::Range; +use std::rc::Rc; +use std::str::FromStr; + +use ariadne::Fmt; +use ariadne::Label; +use ariadne::Report; +use ariadne::ReportKind; +use regex::Captures; +use regex::Match; +use regex::Regex; +use regex::RegexBuilder; + +use crate::compiler::compiler::Compiler; +use crate::compiler::compiler::Target; +use crate::document::document::Document; +use crate::document::document::DocumentAccessors; +use crate::document::element::ElemKind; +use crate::document::element::Element; +use crate::document::references::validate_refname; +use crate::parser::parser::ReportColors; +use crate::parser::rule::RegexRule; +use crate::parser::source::Source; +use crate::parser::source::Token; +use crate::parser::source::VirtualSource; +use crate::parser::util; +use crate::parser::util::parse_paragraph; +use crate::parser::util::Property; +use crate::parser::util::PropertyMap; +use crate::parser::util::PropertyMapError; +use crate::parser::util::PropertyParser; + +use super::paragraph::Paragraph; + +#[derive(Debug)] +pub enum MediaType { + IMAGE, + VIDEO, + AUDIO, +} + +impl FromStr for MediaType { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "image" => Ok(MediaType::IMAGE), + "video" => Ok(MediaType::VIDEO), + "audio" => Ok(MediaType::AUDIO), + _ => Err(format!("Unknown media type: {s}")), + } + } +} + +#[derive(Debug)] +struct MediaGroup { + pub(self) location: Token, + pub(self) media: Vec, +} + +impl MediaGroup { + fn push(&mut self, media: Media) -> Result<(), String> { + if self.location.source() != media.location.source() { + return Err(format!( + "Attempted to insert media from {} into MediaGroup from {}", + self.location.source(), + media.location.source() + )); + } + + self.location.range = self.location.start()..media.location.end(); + self.media.push(media); + Ok(()) + } +} + +impl Element for MediaGroup { + fn location(&self) -> &Token { &self.location } + + fn kind(&self) -> ElemKind { ElemKind::Block } + + fn element_name(&self) -> &'static str { "Media Group" } + + fn to_string(&self) -> String { format!("{self:#?}") } + + fn compile(&self, compiler: &Compiler, document: &dyn Document) -> Result { + match compiler.target() { + Target::HTML => { + let mut result = String::new(); + + result.push_str("
"); + for medium in &self.media { + match medium.compile(compiler, document) { + Ok(r) => result.push_str(r.as_str()), + Err(e) => return Err(e), + } + } + result.push_str("
"); + + Ok(result) + } + _ => todo!(""), + } + } +} + +#[derive(Debug)] +struct Media { + pub(self) location: Token, + pub(self) reference: String, + pub(self) uri: String, + pub(self) media_type: MediaType, + pub(self) width: Option, + pub(self) caption: Option, + pub(self) description: Option, +} + +impl Element for Media { + fn location(&self) -> &Token { &self.location } + + fn kind(&self) -> ElemKind { ElemKind::Block } + + fn element_name(&self) -> &'static str { "Media" } + + fn to_string(&self) -> String { format!("{self:#?}") } + + fn compile(&self, compiler: &Compiler, document: &dyn Document) -> Result { + match compiler.target() { + Target::HTML => { + let mut result = String::new(); + + result.push_str("
"); + + Ok(result) + } + _ => todo!(""), + } + } +} + +pub struct MediaRule { + re: [Regex; 1], + properties: PropertyParser, +} + +impl MediaRule { + pub fn new() -> Self { + let mut props = HashMap::new(); + props.insert( + "type".to_string(), + Property::new( + false, + "Override for the media type detection".to_string(), + None, + ), + ); + props.insert( + "width".to_string(), + Property::new(false, "Override for the media width".to_string(), None), + ); + Self { + re: [RegexBuilder::new( + r"^!\[(.*)\]\(((?:\\.|[^\\\\])*?)\)(?:\[((?:\\.|[^\\\\])*?)\])?((?:\\(?:.|\n)|[^\\\\])*?$)?", + ) + .multi_line(true) + .build() + .unwrap()], + properties: PropertyParser::new(props), + } + } + + fn validate_uri(uri: &str) -> Result<&str, String> { + let trimmed = uri.trim_start().trim_end(); + + if trimmed.is_empty() { + return Err("URIs is empty".to_string()); + } + + Ok(trimmed) + } + + fn parse_properties( + &self, + colors: &ReportColors, + token: &Token, + m: &Option, + ) -> Result, Range)>> { + match m { + None => match self.properties.default() { + Ok(properties) => Ok(properties), + Err(e) => Err( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Invalid Media Properties") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message(format!("Media is missing required property: {e}")) + .with_color(colors.error), + ) + .finish(), + ), + }, + Some(props) => { + let processed = + util::process_escaped('\\', "]", props.as_str().trim_start().trim_end()); + match self.properties.parse(processed.as_str()) { + Err(e) => Err( + Report::build(ReportKind::Error, token.source(), props.start()) + .with_message("Invalid Media Properties") + .with_label( + Label::new((token.source().clone(), props.range())) + .with_message(e) + .with_color(colors.error), + ) + .finish(), + ), + Ok(properties) => Ok(properties), + } + } + } + } + + fn detect_filetype(filename: &str) -> Option { + let sep = match filename.rfind('.') { + Some(pos) => pos, + None => return None, + }; + + // TODO: https://developer.mozilla.org/en-US/docs/Web/Media/Formats/Containers + match filename.split_at(sep + 1).1.to_ascii_lowercase().as_str() { + "png" | "apng" | "avif" | "gif" | "webp" | "svg" | "bmp" | "jpg" | "jpeg" | "jfif" + | "pjpeg" | "pjp" => Some(MediaType::IMAGE), + _ => None, + } + } +} + +impl RegexRule for MediaRule { + fn name(&self) -> &'static str { "Media" } + + fn regexes(&self) -> &[regex::Regex] { &self.re } + + fn on_regex_match<'a>( + &self, + _: usize, + parser: &dyn crate::parser::parser::Parser, + document: &'a (dyn Document<'a> + 'a), + token: Token, + matches: Captures, + ) -> Vec, Range)>> { + let mut reports = vec![]; + + let refname = match ( + matches.get(1).unwrap(), + validate_refname(matches.get(1).unwrap().as_str()), + ) { + (_, Ok(refname)) => refname.to_string(), + (m, Err(err)) => { + reports.push( + Report::build(ReportKind::Error, token.source(), m.start()) + .with_message("Invalid Media Refname") + .with_label( + Label::new((token.source().clone(), m.range())).with_message(err), + ) + .finish(), + ); + return reports; + } + }; + + let uri = match ( + matches.get(2).unwrap(), + MediaRule::validate_uri(matches.get(2).unwrap().as_str()), + ) { + (_, Ok(uri)) => uri.to_string(), + (m, Err(err)) => { + reports.push( + Report::build(ReportKind::Error, token.source(), m.start()) + .with_message("Invalid Media URI") + .with_label( + Label::new((token.source().clone(), m.range())).with_message(err), + ) + .finish(), + ); + return reports; + } + }; + + // Properties + let properties = match self.parse_properties(parser.colors(), &token, &matches.get(3)) { + Ok(pm) => pm, + Err(report) => { + reports.push(report); + return reports; + } + }; + + let media_type = + match Self::detect_filetype(uri.as_str()) { + Some(media_type) => media_type, + None => match properties.get("type", |prop, value| { + MediaType::from_str(value.as_str()).map_err(|e| (prop, e)) + }) { + Ok((_prop, kind)) => kind, + Err(e) => match e { + PropertyMapError::ParseError((prop, err)) => { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Invalid Media Property") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message(format!( + "Property `type: {}` cannot be converted: {}", + prop.fg(parser.colors().info), + err.fg(parser.colors().error) + )) + .with_color(parser.colors().warning), + ) + .finish(), + ); + return reports; + } + PropertyMapError::NotFoundError(err) => { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Invalid Media Property") + .with_label( + Label::new(( + token.source().clone(), + token.start() + 1..token.end(), + )) + .with_message(format!("{err}. Required because mediatype could not be detected")) + .with_color(parser.colors().error), + ) + .finish(), + ); + return reports; + } + }, + }, + }; + + let width = properties + .get("width", |_, value| -> Result { + Ok(value.clone()) + }) + .ok() + .and_then(|(_, s)| Some(s)); + + let description = match matches.get(4) { + Some(content) => { + let source = Rc::new(VirtualSource::new( + Token::new(content.range(), token.source()), + format!("Media[{refname}] description"), + content.as_str().trim_start().trim_end().to_string(), + )); + if source.content().is_empty() { + None + } else { + match parse_paragraph(parser, source, document) { + Ok(paragraph) => Some(*paragraph), + Err(err) => { + reports.push( + Report::build(ReportKind::Error, token.source(), content.start()) + .with_message("Invalid Media Description") + .with_label( + Label::new((token.source().clone(), content.range())) + .with_message(format!( + "Could not parse description: {err}" + )) + .with_color(parser.colors().error), + ) + .finish(), + ); + return reports; + } + } + } + } + None => panic!("Unknown error"), + }; + + // TODO: caption + let mut group = match document.last_element_mut::() { + Some(group) => group, + None => { + parser.push( + document, + Box::new(MediaGroup { + location: token.clone(), + media: vec![], + }), + ); + + document.last_element_mut::().unwrap() + } + }; + + if let Err(err) = group.push(Media { + location: token.clone(), + reference: refname, + uri, + media_type, + width, + caption: None, + description, + }) { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Invalid Media") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message(err) + .with_color(parser.colors().error), + ) + .finish(), + ); + } + + reports + } + + fn lua_bindings<'lua>(&self, _lua: &'lua mlua::Lua) -> Vec<(String, mlua::Function<'lua>)> { + vec![] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn regex() { + let rule = MediaRule::new(); + let re = &rule.regexes()[0]; + + assert!(re.is_match("![refname](some path...)[some properties] some description")); + assert!(re.is_match( + r"![refname](some p\)ath...\\)[some propert\]ies\\\\] some description\\nanother line" + )); + assert!(re.is_match_at("![r1](uri1)[props1] desc1\n![r2](uri2)[props2] desc2", 26)); + } +} diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 5f99511..8389ac4 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -13,3 +13,4 @@ pub mod code; pub mod tex; pub mod graphviz; pub mod raw; +pub mod media; diff --git a/src/elements/paragraph.rs b/src/elements/paragraph.rs index 65861ce..bb37c84 100644 --- a/src/elements/paragraph.rs +++ b/src/elements/paragraph.rs @@ -1,10 +1,22 @@ -use std::{any::Any, ops::Range, rc::Rc}; +use std::any::Any; +use std::ops::Range; +use std::rc::Rc; use ariadne::Report; -use mlua::{Function, Lua}; +use mlua::Function; +use mlua::Lua; use regex::Regex; -use crate::{compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::Rule, source::{Cursor, Source, Token}}}; +use crate::compiler::compiler::Compiler; +use crate::compiler::compiler::Target; +use crate::document::document::Document; +use crate::document::element::ElemKind; +use crate::document::element::Element; +use crate::parser::parser::Parser; +use crate::parser::rule::Rule; +use crate::parser::source::Cursor; +use crate::parser::source::Source; +use crate::parser::source::Token; // TODO: Full refactor // Problem is that document parsed from other sources i.e by variables @@ -14,117 +26,127 @@ use crate::{compiler::compiler::{Compiler, Target}, document::{document::Documen // The issue is that this would break the current `Token` implementation // Which would need to be reworked #[derive(Debug)] -pub struct Paragraph -{ - location: Token, - pub content: Vec> +pub struct Paragraph { + location: Token, + pub content: Vec>, } -impl Paragraph -{ - pub fn new(location: Token) -> Self { - Self { location, content: Vec::new() } - } +impl Paragraph { + pub fn new(location: Token) -> Self { + Self { + location, + content: Vec::new(), + } + } pub fn is_empty(&self) -> bool { self.content.is_empty() } - pub fn push(&mut self, elem: Box) - { - if elem.location().source() == self.location().source() - { - self.location.range = self.location.start() .. elem.location().end(); + pub fn push(&mut self, elem: Box) { + if elem.location().source() == self.location().source() { + self.location.range = self.location.start()..elem.location().end(); } self.content.push(elem); } - pub fn find_back) -> bool>(&self, mut predicate: P) - -> Option<&Box> { - self.content.iter().rev() - .find(predicate) + pub fn find_back) -> bool>( + &self, + predicate: P, + ) -> Option<&Box> { + self.content.iter().rev().find(predicate) } } -impl Element for Paragraph -{ - fn location(&self) -> &Token { &self.location } +impl Element for Paragraph { + fn location(&self) -> &Token { &self.location } - fn kind(&self) -> ElemKind { ElemKind::Special } + fn kind(&self) -> ElemKind { ElemKind::Special } - fn element_name(&self) -> &'static str { "Paragraph" } + fn element_name(&self) -> &'static str { "Paragraph" } - fn to_string(&self) -> String { format!("{:#?}", self) } + fn to_string(&self) -> String { format!("{:#?}", self) } - fn compile(&self, compiler: &Compiler, document: &dyn Document) -> Result { - if self.content.is_empty() { return Ok(String::new()) } + fn compile(&self, compiler: &Compiler, document: &dyn Document) -> Result { + if self.content.is_empty() { + return Ok(String::new()); + } - match compiler.target() - { - Target::HTML => { + match compiler.target() { + Target::HTML => { let mut result = String::new(); //if prev.is_none() || prev.unwrap().downcast_ref::().is_none() - { result.push_str("

"); } + { + result.push_str("

"); + } //else //{ result.push_str(" "); } let err = self.content.iter().try_for_each(|elem| { - match elem.compile(compiler, document) - { + match elem.compile(compiler, document) { Err(e) => return Err(e), - Ok(content) => { result.push_str(content.as_str()); Ok(()) }, + Ok(content) => { + result.push_str(content.as_str()); + Ok(()) + } } }); //if next.is_none() || next.unwrap().downcast_ref::().is_none() - { result.push_str("

"); } - - match err { + result.push_str("

"); + } + + match err { Err(e) => Err(e), Ok(()) => Ok(result), } - } - Target::LATEX => todo!("Unimplemented compiler") - } - } + } + Target::LATEX => todo!("Unimplemented compiler"), + } + } } -pub struct ParagraphRule -{ +pub struct ParagraphRule { re: Regex, } impl ParagraphRule { - pub fn new() -> Self { - Self { - re: Regex::new(r"\n{2,}").unwrap() + pub fn new() -> Self { + Self { + re: Regex::new(r"\n{2,}").unwrap(), } - } + } } -impl Rule for ParagraphRule -{ - fn name(&self) -> &'static str { "Paragraphing" } +impl Rule for ParagraphRule { + fn name(&self) -> &'static str { "Paragraphing" } - fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box)> { - self.re.find_at(cursor.source.content(), cursor.pos) - .and_then(|m| Some((m.start(), Box::new([false;0]) as Box)) ) - } + fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box)> { + self.re + .find_at(cursor.source.content(), cursor.pos) + .and_then(|m| Some((m.start(), Box::new([false; 0]) as Box))) + } - fn on_match(&self, parser: &dyn Parser, document: &dyn Document, cursor: Cursor, _match_data: Option>) - -> (Cursor, Vec, Range)>>) { - - let end_cursor = match self.re.captures_at(cursor.source.content(), cursor.pos) - { + fn on_match( + &self, + parser: &dyn Parser, + document: &dyn Document, + cursor: Cursor, + _match_data: Option>, + ) -> (Cursor, Vec, Range)>>) { + let end_cursor = match self.re.captures_at(cursor.source.content(), cursor.pos) { None => panic!("Unknown error"), - Some(capture) => - cursor.at(capture.get(0).unwrap().end()-1) + Some(capture) => cursor.at(capture.get(0).unwrap().end() - 1), }; - parser.push(document, Box::new(Paragraph::new( - Token::new(cursor.pos..end_cursor.pos, cursor.source.clone()) - ))); + parser.push( + document, + Box::new(Paragraph::new(Token::new( + cursor.pos..end_cursor.pos, + cursor.source.clone(), + ))), + ); (end_cursor, Vec::new()) - } + } // TODO fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)> { vec![] } diff --git a/src/elements/registrar.rs b/src/elements/registrar.rs index 379bbf9..9e14258 100644 --- a/src/elements/registrar.rs +++ b/src/elements/registrar.rs @@ -6,6 +6,7 @@ use super::graphviz::GraphRule; use super::import::ImportRule; use super::link::LinkRule; use super::list::ListRule; +use super::media::MediaRule; use super::paragraph::ParagraphRule; use super::raw::RawRule; use super::script::ScriptRule; @@ -28,6 +29,7 @@ pub fn register(parser: &mut P) { parser.add_rule(Box::new(CodeRule::new()), None).unwrap(); parser.add_rule(Box::new(TexRule::new()), None).unwrap(); parser.add_rule(Box::new(GraphRule::new()), None).unwrap(); + parser.add_rule(Box::new(MediaRule::new()), None).unwrap(); parser.add_rule(Box::new(StyleRule::new()), None).unwrap(); parser.add_rule(Box::new(SectionRule::new()), None).unwrap(); diff --git a/src/parser/util.rs b/src/parser/util.rs index 8027c2a..dcfa20d 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::rc::Rc; use unicode_segmentation::UnicodeSegmentation; @@ -7,6 +8,9 @@ use crate::document::document::DocumentAccessors; use crate::document::element::ElemKind; use crate::elements::paragraph::Paragraph; +use super::parser::Parser; +use super::source::Source; + /// Processes text for escape characters and paragraphing pub fn process_text(document: &dyn Document, content: &str) -> String { let mut escaped = false; @@ -129,6 +133,26 @@ pub fn process_escaped>(escape: char, token: &'static str, content processed } +/// Parses source into a single paragraph +/// If source contains anything but a single paragraph, an error is returned +pub fn parse_paragraph<'a>( + parser: &dyn Parser, + source: Rc, + document: &'a dyn Document<'a>, +) -> Result, &'static str> { + let parsed = parser.parse(source.clone(), Some(document)); + if parsed.content().borrow().len() > 1 { + return Err("Parsed document contains more than a single paragraph"); + } else if parsed.content().borrow().len() == 0 { + return Err("Parser document is empty"); + } else if parsed.last_element::().is_none() { + return Err("Parsed element is not a paragraph"); + } + + let paragraph = parsed.content().borrow_mut().pop().unwrap(); + Ok(paragraph.downcast::().unwrap()) +} + #[derive(Debug)] pub struct Property { required: bool, @@ -210,9 +234,7 @@ pub struct PropertyParser { } impl PropertyParser { - pub fn new(properties: HashMap) -> Self { - Self { properties } - } + pub fn new(properties: HashMap) -> Self { Self { properties } } /// Attempts to build a default propertymap ///
"); + let width = self + .width + .as_ref() + .map_or(String::new(), |w| format!(r#" width="{w}""#)); + match self.media_type { + MediaType::IMAGE => result.push_str( + format!(r#""#, self.uri).as_str(), + ), + MediaType::VIDEO => todo!(), + MediaType::AUDIO => todo!(), + } + result.push_str(format!(r#"

{}

"#, "TODO").as_str()); + if let Some(paragraph) = self.description.as_ref() { + match paragraph.compile(compiler, document) { + Ok(res) => result.push_str(res.as_str()), + Err(err) => return Err(err), + } + } + result.push_str("