This commit is contained in:
ef3d0c3e 2024-07-25 13:13:12 +02:00
parent 933ec6f604
commit 6bd7bf40da
9 changed files with 942 additions and 295 deletions

View file

@ -1,4 +1,5 @@
pub mod document; pub mod document;
pub mod references;
pub mod langdocument; pub mod langdocument;
pub mod element; pub mod element;
pub mod variable; pub mod variable;

View file

@ -0,0 +1,28 @@
pub fn validate_refname(name: &str) -> Result<&str, String> {
let trimmed = name.trim_start().trim_end();
if trimmed.is_empty() {
return Err("Refname cannot be empty".to_string());
}
for c in trimmed.chars() {
if c.is_ascii_punctuation() {
return Err(format!(
"Refname `{trimmed}` cannot contain punctuation codepoint: `{c}`"
));
}
if c.is_whitespace() {
return Err(format!(
"Refname `{trimmed}` cannot contain whitespaces: `{c}`"
));
}
if c.is_control() {
return Err(format!(
"Refname `{trimmed}` cannot contain control codepoint: `{c}`"
));
}
}
Ok(trimmed)
}

View file

@ -1,25 +1,48 @@
use std::{collections::HashMap, ops::Range, rc::Rc, sync::Once}; use std::collections::HashMap;
use std::ops::Range;
use std::rc::Rc;
use std::sync::Once;
use ariadne::{Fmt, Label, Report, ReportKind}; use ariadne::Fmt;
use crypto::{digest::Digest, sha2::Sha512}; use ariadne::Label;
use mlua::{Function, Lua}; use ariadne::Report;
use regex::{Captures, Regex}; use ariadne::ReportKind;
use syntect::{easy::HighlightLines, highlighting::ThemeSet, parsing::SyntaxSet}; use crypto::digest::Digest;
use crypto::sha2::Sha512;
use mlua::Function;
use mlua::Lua;
use regex::Captures;
use regex::Regex;
use syntect::easy::HighlightLines;
use syntect::highlighting::ThemeSet;
use syntect::parsing::SyntaxSet;
use crate::{cache::cache::{Cached, CachedError}, compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util::{self, Property, PropertyMapError, PropertyParser}}}; use crate::cache::cache::Cached;
use crate::cache::cache::CachedError;
use crate::compiler::compiler::Compiler;
use crate::compiler::compiler::Target;
use crate::document::document::Document;
use crate::document::element::ElemKind;
use crate::document::element::Element;
use crate::parser::parser::Parser;
use crate::parser::rule::RegexRule;
use crate::parser::source::Source;
use crate::parser::source::Token;
use crate::parser::util::Property;
use crate::parser::util::PropertyMapError;
use crate::parser::util::PropertyParser;
use crate::parser::util::{self};
use lazy_static::lazy_static; use lazy_static::lazy_static;
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum CodeKind enum CodeKind {
{
FullBlock, FullBlock,
MiniBlock, MiniBlock,
Inline, Inline,
} }
#[derive(Debug)] #[derive(Debug)]
struct Code struct Code {
{
location: Token, location: Token,
block: CodeKind, block: CodeKind,
language: String, language: String,
@ -30,57 +53,91 @@ struct Code
} }
impl Code { impl Code {
fn new(location: Token, block: CodeKind, language: String, name: Option<String>, code: String, theme: Option<String>, line_offset: usize) -> Self { fn new(
Self { location, block, language, name, code, theme, line_offset } location: Token,
} block: CodeKind,
language: String,
fn highlight_html(&self, compiler: &Compiler) -> Result<String, String> name: Option<String>,
{ code: String,
lazy_static! { theme: Option<String>,
static ref syntax_set : SyntaxSet = SyntaxSet::load_defaults_newlines(); line_offset: usize,
static ref theme_set : ThemeSet = ThemeSet::load_defaults(); ) -> Self {
Self {
location,
block,
language,
name,
code,
theme,
line_offset,
} }
let syntax = match syntax_set.find_syntax_by_name(self.language.as_str()) }
{
fn highlight_html(&self, compiler: &Compiler) -> Result<String, String> {
lazy_static! {
static ref syntax_set: SyntaxSet = SyntaxSet::load_defaults_newlines();
static ref theme_set: ThemeSet = ThemeSet::load_defaults();
}
let syntax = match syntax_set.find_syntax_by_name(self.language.as_str()) {
Some(syntax) => syntax, Some(syntax) => syntax,
None => return Err(format!("Unable to find syntax for language: {}", self.language)) None => {
return Err(format!(
"Unable to find syntax for language: {}",
self.language
))
}
}; };
let theme_string = match self.theme.as_ref() let theme_string = match self.theme.as_ref() {
{
Some(theme) => theme.as_str(), Some(theme) => theme.as_str(),
None => "base16-ocean.dark", None => "base16-ocean.dark",
}; };
let mut h = HighlightLines::new(syntax, &theme_set.themes[theme_string]); let mut h = HighlightLines::new(syntax, &theme_set.themes[theme_string]);
let mut result = String::new(); let mut result = String::new();
if self.block == CodeKind::FullBlock if self.block == CodeKind::FullBlock {
{
result += "<div class=\"code-block\">"; result += "<div class=\"code-block\">";
if let Some(name) = &self.name if let Some(name) = &self.name {
{ result += format!(
result += format!("<div class=\"code-block-title\">{}</div>", "<div class=\"code-block-title\">{}</div>",
compiler.sanitize(name.as_str())).as_str(); compiler.sanitize(name.as_str())
)
.as_str();
} }
result += format!("<div class=\"code-block-content\"><table cellspacing=\"0\">").as_str(); result +=
for (line_id, line) in self.code.split(|c| c == '\n').enumerate() format!("<div class=\"code-block-content\"><table cellspacing=\"0\">").as_str();
{ for (line_id, line) in self.code.split(|c| c == '\n').enumerate() {
result += "<tr><td class=\"code-block-gutter\">"; result += "<tr><td class=\"code-block-gutter\">";
// Line number // Line number
result += format!("<pre><span>{}</span></pre>", line_id+self.line_offset).as_str(); result +=
format!("<pre><span>{}</span></pre>", line_id + self.line_offset).as_str();
// Code // Code
result += "</td><td class=\"code-block-line\"><pre>"; result += "</td><td class=\"code-block-line\"><pre>";
match h.highlight_line(line, &syntax_set) match h.highlight_line(line, &syntax_set) {
{ Err(e) => {
Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e.to_string())), return Err(format!(
"Error highlighting line `{line}`: {}",
e.to_string()
))
}
Ok(regions) => { Ok(regions) => {
match syntect::html::styled_line_to_highlighted_html(&regions[..], syntect::html::IncludeBackground::No) match syntect::html::styled_line_to_highlighted_html(
{ &regions[..],
Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())), syntect::html::IncludeBackground::No,
Ok(highlighted) => result += if highlighted.is_empty() { "<br>" } else { highlighted.as_str() } ) {
Err(e) => {
return Err(format!("Error highlighting code: {}", e.to_string()))
}
Ok(highlighted) => {
result += if highlighted.is_empty() {
"<br>"
} else {
highlighted.as_str()
}
}
} }
} }
} }
@ -88,41 +145,59 @@ impl Code {
} }
result += "</table></div></div>"; result += "</table></div></div>";
} } else if self.block == CodeKind::MiniBlock {
else if self.block == CodeKind::MiniBlock
{
result += "<div class=\"code-block\"><div class=\"code-block-content\"><table cellspacing=\"0\">"; result += "<div class=\"code-block\"><div class=\"code-block-content\"><table cellspacing=\"0\">";
for line in self.code.split(|c| c == '\n') for line in self.code.split(|c| c == '\n') {
{
result += "<tr><td class=\"code-block-line\"><pre>"; result += "<tr><td class=\"code-block-line\"><pre>";
// Code // Code
match h.highlight_line(line, &syntax_set) match h.highlight_line(line, &syntax_set) {
{ Err(e) => {
Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e.to_string())), return Err(format!(
"Error highlighting line `{line}`: {}",
e.to_string()
))
}
Ok(regions) => { Ok(regions) => {
match syntect::html::styled_line_to_highlighted_html(&regions[..], syntect::html::IncludeBackground::No) match syntect::html::styled_line_to_highlighted_html(
{ &regions[..],
Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())), syntect::html::IncludeBackground::No,
Ok(highlighted) => result += if highlighted.is_empty() { "<br>" } else { highlighted.as_str() } ) {
Err(e) => {
return Err(format!("Error highlighting code: {}", e.to_string()))
}
Ok(highlighted) => {
result += if highlighted.is_empty() {
"<br>"
} else {
highlighted.as_str()
}
}
} }
} }
} }
result += "</pre></td></tr>"; result += "</pre></td></tr>";
} }
result += "</table></div></div>"; result += "</table></div></div>";
} } else if self.block == CodeKind::Inline {
else if self.block == CodeKind::Inline
{
result += "<a class=\"inline-code\"><code>"; result += "<a class=\"inline-code\"><code>";
match h.highlight_line(self.code.as_str(), &syntax_set) match h.highlight_line(self.code.as_str(), &syntax_set) {
{ Err(e) => {
Err(e) => return Err(format!("Error highlighting line `{}`: {}", self.code, e.to_string())), return Err(format!(
"Error highlighting line `{}`: {}",
self.code,
e.to_string()
))
}
Ok(regions) => { Ok(regions) => {
match syntect::html::styled_line_to_highlighted_html(&regions[..], syntect::html::IncludeBackground::No) match syntect::html::styled_line_to_highlighted_html(
{ &regions[..],
Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())), syntect::html::IncludeBackground::No,
Ok(highlighted) => result += highlighted.as_str() ) {
Err(e) => {
return Err(format!("Error highlighting code: {}", e.to_string()))
}
Ok(highlighted) => result += highlighted.as_str(),
} }
} }
} }
@ -133,81 +208,83 @@ impl Code {
} }
} }
impl Cached for Code impl Cached for Code {
{ type Key = String;
type Key = String; type Value = String;
type Value = String;
fn sql_table() -> &'static str { fn sql_table() -> &'static str {
"CREATE TABLE IF NOT EXISTS cached_code ( "CREATE TABLE IF NOT EXISTS cached_code (
digest TEXT PRIMARY KEY, digest TEXT PRIMARY KEY,
highlighted BLOB NOT NULL);" highlighted BLOB NOT NULL);"
} }
fn sql_get_query() -> &'static str { fn sql_get_query() -> &'static str { "SELECT highlighted FROM cached_code WHERE digest = (?1)" }
"SELECT highlighted FROM cached_code WHERE digest = (?1)"
}
fn sql_insert_query() -> &'static str { fn sql_insert_query() -> &'static str {
"INSERT INTO cached_code (digest, highlighted) VALUES (?1, ?2)" "INSERT INTO cached_code (digest, highlighted) VALUES (?1, ?2)"
} }
fn key(&self) -> <Self as Cached>::Key { fn key(&self) -> <Self as Cached>::Key {
let mut hasher = Sha512::new(); let mut hasher = Sha512::new();
hasher.input((self.block as usize).to_be_bytes().as_slice()); hasher.input((self.block as usize).to_be_bytes().as_slice());
hasher.input((self.line_offset as usize).to_be_bytes().as_slice()); hasher.input((self.line_offset as usize).to_be_bytes().as_slice());
self.theme.as_ref().map(|theme| hasher.input(theme.as_bytes())); self.theme
.as_ref()
.map(|theme| hasher.input(theme.as_bytes()));
self.name.as_ref().map(|name| hasher.input(name.as_bytes())); self.name.as_ref().map(|name| hasher.input(name.as_bytes()));
hasher.input(self.language.as_bytes()); hasher.input(self.language.as_bytes());
hasher.input(self.code.as_bytes()); hasher.input(self.code.as_bytes());
hasher.result_str() hasher.result_str()
} }
} }
impl Element for Code { impl Element for Code {
fn location(&self) -> &Token { &self.location } fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { if self.block == CodeKind::Inline { ElemKind::Inline } else { ElemKind::Block } } fn kind(&self) -> ElemKind {
if self.block == CodeKind::Inline {
ElemKind::Inline
} else {
ElemKind::Block
}
}
fn element_name(&self) -> &'static str { "Code Block" } fn element_name(&self) -> &'static str { "Code Block" }
fn to_string(&self) -> String { format!("{self:#?}") } fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, _document: &dyn Document) fn compile(&self, compiler: &Compiler, _document: &dyn Document) -> Result<String, String> {
-> Result<String, String> { match compiler.target() {
match compiler.target()
{
Target::HTML => { Target::HTML => {
static CACHE_INIT : Once = Once::new(); static CACHE_INIT: Once = Once::new();
CACHE_INIT.call_once(|| if let Some(mut con) = compiler.cache() { CACHE_INIT.call_once(|| {
if let Err(e) = Code::init(&mut con) if let Some(mut con) = compiler.cache() {
{ if let Err(e) = Code::init(&mut con) {
eprintln!("Unable to create cache table: {e}"); eprintln!("Unable to create cache table: {e}");
}
} }
}); });
if let Some(mut con) = compiler.cache() if let Some(mut con) = compiler.cache() {
{ match self.cached(&mut con, |s| s.highlight_html(compiler)) {
match self.cached(&mut con, |s| s.highlight_html(compiler))
{
Ok(s) => Ok(s), Ok(s) => Ok(s),
Err(e) => match e Err(e) => match e {
{ CachedError::SqlErr(e) => {
CachedError::SqlErr(e) => Err(format!("Querying the cache failed: {e}")), Err(format!("Querying the cache failed: {e}"))
CachedError::GenErr(e) => Err(e) }
} CachedError::GenErr(e) => Err(e),
},
} }
} } else {
else
{
self.highlight_html(compiler) self.highlight_html(compiler)
} }
} }
Target::LATEX => { todo!("") } Target::LATEX => {
todo!("")
}
} }
} }
} }
pub struct CodeRule { pub struct CodeRule {
@ -218,83 +295,99 @@ pub struct CodeRule {
impl CodeRule { impl CodeRule {
pub fn new() -> Self { pub fn new() -> Self {
let mut props = HashMap::new(); let mut props = HashMap::new();
props.insert("line_offset".to_string(), props.insert(
"line_offset".to_string(),
Property::new( Property::new(
true, true,
"Line number offset".to_string(), "Line number offset".to_string(),
Some("1".to_string()))); Some("1".to_string()),
),
);
Self { Self {
re: [ re: [
Regex::new(r"(?:^|\n)```(?:\[((?:\\.|[^\\\\])*?)\])?(.*?)(?:,(.*))?\n((?:\\(?:.|\n)|[^\\\\])*?)```").unwrap(), Regex::new(
Regex::new(r"``(?:\[((?:\\.|[^\[\]\\])*?)\])?(?:(.*?)(?:\n|,))?((?:\\(?:.|\n)|[^\\\\])*?)``").unwrap(), r"(?:^|\n)```(?:\[((?:\\.|[^\\\\])*?)\])?(.*?)(?:,(.*))?\n((?:\\(?:.|\n)|[^\\\\])*?)```",
)
.unwrap(),
Regex::new(
r"``(?:\[((?:\\.|[^\[\]\\])*?)\])?(?:(.*?)(?:\n|,))?((?:\\(?:.|\n)|[^\\\\])*?)``",
)
.unwrap(),
], ],
properties: PropertyParser::new(props) properties: PropertyParser::new(props),
} }
} }
} }
impl RegexRule for CodeRule impl RegexRule for CodeRule {
{ fn name(&self) -> &'static str { "Code" }
fn name(&self) -> &'static str { "Code" }
fn regexes(&self) -> &[regex::Regex] { &self.re } fn regexes(&self) -> &[regex::Regex] { &self.re }
fn on_regex_match<'a>(&self, index: usize, parser: &dyn Parser, document: &'a dyn Document, token: Token, matches: Captures) fn on_regex_match<'a>(
-> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> { &self,
index: usize,
parser: &dyn Parser,
document: &'a dyn Document,
token: Token,
matches: Captures,
) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut reports = vec![]; let mut reports = vec![];
let properties = match matches.get(1) let properties = match matches.get(1) {
{
None => match self.properties.default() { None => match self.properties.default() {
Ok(properties) => properties, Ok(properties) => properties,
Err(e) => { Err(e) => {
reports.push( reports.push(
Report::build(ReportKind::Error, token.source(), token.start()) Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid code") .with_message("Invalid code")
.with_label( .with_label(
Label::new((token.source().clone(), token.range.clone())) Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Code is missing properties: {e}")) .with_message(format!("Code is missing properties: {e}"))
.with_color(parser.colors().error)) .with_color(parser.colors().error),
.finish()); )
return reports; .finish(),
}, );
} return reports;
}
},
Some(props) => { Some(props) => {
let processed = util::process_escaped('\\', "]", let processed =
props.as_str().trim_start().trim_end()); util::process_escaped('\\', "]", props.as_str().trim_start().trim_end());
match self.properties.parse(processed.as_str()) match self.properties.parse(processed.as_str()) {
{
Err(e) => { Err(e) => {
reports.push( reports.push(
Report::build(ReportKind::Error, token.source(), props.start()) Report::build(ReportKind::Error, token.source(), props.start())
.with_message("Invalid Code Properties") .with_message("Invalid Code Properties")
.with_label( .with_label(
Label::new((token.source().clone(), props.range())) Label::new((token.source().clone(), props.range()))
.with_message(e) .with_message(e)
.with_color(parser.colors().error)) .with_color(parser.colors().error),
.finish()); )
.finish(),
);
return reports; return reports;
} }
Ok(properties) => properties Ok(properties) => properties,
} }
} }
}; };
let code_lang = match matches.get(2) let code_lang = match matches.get(2) {
{
None => "Plain Text".to_string(), None => "Plain Text".to_string(),
Some(lang) => { Some(lang) => {
let code_lang = lang.as_str().trim_end().trim_start().to_string(); let code_lang = lang.as_str().trim_end().trim_start().to_string();
if code_lang.is_empty() if code_lang.is_empty() {
{
reports.push( reports.push(
Report::build(ReportKind::Error, token.source(), lang.start()) Report::build(ReportKind::Error, token.source(), lang.start())
.with_message("Missing code language") .with_message("Missing code language")
.with_label( .with_label(
Label::new((token.source().clone(), lang.range())) Label::new((token.source().clone(), lang.range()))
.with_message("No language specified") .with_message("No language specified")
.with_color(parser.colors().error)) .with_color(parser.colors().error),
.finish()); )
.finish(),
);
return reports; return reports;
} }
@ -305,45 +398,51 @@ impl RegexRule for CodeRule
} }
}; };
let mut code_content = if index == 0 let mut code_content = if index == 0 {
{ util::process_escaped('\\',"```", matches.get(4).unwrap().as_str()) } util::process_escaped('\\', "```", matches.get(4).unwrap().as_str())
else } else {
{ util::process_escaped('\\',"``", matches.get(3).unwrap().as_str()) }; util::process_escaped('\\', "``", matches.get(3).unwrap().as_str())
if code_content.bytes().last() == Some('\n' as u8) // Remove newline };
if code_content.bytes().last() == Some('\n' as u8)
// Remove newline
{ {
code_content.pop(); code_content.pop();
} }
if code_content.is_empty() if code_content.is_empty() {
{
reports.push( reports.push(
Report::build(ReportKind::Error, token.source(), token.start()) Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Missing code content") .with_message("Missing code content")
.with_label( .with_label(
Label::new((token.source().clone(), token.range.clone())) Label::new((token.source().clone(), token.range.clone()))
.with_message("Code content cannot be empty") .with_message("Code content cannot be empty")
.with_color(parser.colors().error)) .with_color(parser.colors().error),
.finish()); )
.finish(),
);
return reports; return reports;
} }
let theme = document.get_variable("code.theme") let theme = document
.get_variable("code.theme")
.and_then(|var| Some(var.to_string())); .and_then(|var| Some(var.to_string()));
if index == 0 // Block if index == 0
// Block
{ {
let code_name = matches.get(3) let code_name = matches.get(3).and_then(|name| {
.and_then(|name| { let code_name = name.as_str().trim_end().trim_start().to_string();
let code_name = name.as_str().trim_end().trim_start().to_string(); (!code_name.is_empty()).then_some(code_name)
(!code_name.is_empty()).then_some(code_name) });
}); let line_offset =
let line_offset = match properties.get("line_offset", match properties.get("line_offset", |prop, value| {
|prop, value| value.parse::<usize>().map_err(|e| (prop, e))) value.parse::<usize>().map_err(|e| (prop, e))
{ }) {
Ok((_prop, offset)) => offset, Ok((_prop, offset)) => offset,
Err(e) => match e { Err(e) => {
PropertyMapError::ParseError((prop, err)) => { match e {
reports.push( PropertyMapError::ParseError((prop, err)) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start()) Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Code Property") .with_message("Invalid Code Property")
.with_label( .with_label(
@ -353,55 +452,68 @@ impl RegexRule for CodeRule
err.fg(parser.colors().error))) err.fg(parser.colors().error)))
.with_color(parser.colors().warning)) .with_color(parser.colors().warning))
.finish()); .finish());
return reports; return reports;
}, }
PropertyMapError::NotFoundError(err) => { PropertyMapError::NotFoundError(err) => {
reports.push( reports.push(
Report::build(ReportKind::Error, token.source(), token.start()) Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Code Property") .with_message("Invalid Code Property")
.with_label( .with_label(
Label::new((token.source().clone(), token.start()+1..token.end())) Label::new((
.with_message(format!("Property `{}` doesn't exist", token.source().clone(),
err.fg(parser.colors().info))) token.start() + 1..token.end(),
.with_color(parser.colors().warning)) ))
.finish()); .with_message(format!(
return reports; "Property `{}` doesn't exist",
err.fg(parser.colors().info)
))
.with_color(parser.colors().warning),
)
.finish(),
);
return reports;
}
}
} }
} };
parser.push(
document,
Box::new(Code::new(
token.clone(),
CodeKind::FullBlock,
code_lang,
code_name,
code_content,
theme,
line_offset,
)),
);
} else
// Maybe inline
{
let block = if code_content.contains('\n') {
CodeKind::MiniBlock
} else {
CodeKind::Inline
}; };
parser.push(document, Box::new( parser.push(
Code::new( document,
token.clone(), Box::new(Code::new(
CodeKind::FullBlock, token.clone(),
code_lang, block,
code_name, code_lang,
code_content, None,
theme, code_content,
line_offset theme,
) 1,
)); )),
} );
else // Maybe inline
{
let block = if code_content.contains('\n') { CodeKind::MiniBlock }
else { CodeKind::Inline };
parser.push(document, Box::new(
Code::new(
token.clone(),
block,
code_lang,
None,
code_content,
theme,
1,
)
));
} }
reports reports
} }
// TODO // TODO
fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)> { vec![] } fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)> { vec![] }

View file

@ -314,10 +314,7 @@ impl RegexRule for GraphRule {
token.source().clone(), token.source().clone(),
token.start() + 1..token.end(), token.start() + 1..token.end(),
)) ))
.with_message(format!( .with_message(err)
"Property `{}` is missing",
err.fg(parser.colors().info)
))
.with_color(parser.colors().warning), .with_color(parser.colors().warning),
) )
.finish(), .finish(),

462
src/elements/media.rs Normal file
View file

@ -0,0 +1,462 @@
use std::collections::HashMap;
use std::ops::Range;
use std::rc::Rc;
use std::str::FromStr;
use ariadne::Fmt;
use ariadne::Label;
use ariadne::Report;
use ariadne::ReportKind;
use regex::Captures;
use regex::Match;
use regex::Regex;
use regex::RegexBuilder;
use crate::compiler::compiler::Compiler;
use crate::compiler::compiler::Target;
use crate::document::document::Document;
use crate::document::document::DocumentAccessors;
use crate::document::element::ElemKind;
use crate::document::element::Element;
use crate::document::references::validate_refname;
use crate::parser::parser::ReportColors;
use crate::parser::rule::RegexRule;
use crate::parser::source::Source;
use crate::parser::source::Token;
use crate::parser::source::VirtualSource;
use crate::parser::util;
use crate::parser::util::parse_paragraph;
use crate::parser::util::Property;
use crate::parser::util::PropertyMap;
use crate::parser::util::PropertyMapError;
use crate::parser::util::PropertyParser;
use super::paragraph::Paragraph;
#[derive(Debug)]
pub enum MediaType {
IMAGE,
VIDEO,
AUDIO,
}
impl FromStr for MediaType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"image" => Ok(MediaType::IMAGE),
"video" => Ok(MediaType::VIDEO),
"audio" => Ok(MediaType::AUDIO),
_ => Err(format!("Unknown media type: {s}")),
}
}
}
#[derive(Debug)]
struct MediaGroup {
pub(self) location: Token,
pub(self) media: Vec<Media>,
}
impl MediaGroup {
fn push(&mut self, media: Media) -> Result<(), String> {
if self.location.source() != media.location.source() {
return Err(format!(
"Attempted to insert media from {} into MediaGroup from {}",
self.location.source(),
media.location.source()
));
}
self.location.range = self.location.start()..media.location.end();
self.media.push(media);
Ok(())
}
}
impl Element for MediaGroup {
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Block }
fn element_name(&self) -> &'static str { "Media Group" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, document: &dyn Document) -> Result<String, String> {
match compiler.target() {
Target::HTML => {
let mut result = String::new();
result.push_str("<div class=\"media\">");
for medium in &self.media {
match medium.compile(compiler, document) {
Ok(r) => result.push_str(r.as_str()),
Err(e) => return Err(e),
}
}
result.push_str("</div>");
Ok(result)
}
_ => todo!(""),
}
}
}
#[derive(Debug)]
struct Media {
pub(self) location: Token,
pub(self) reference: String,
pub(self) uri: String,
pub(self) media_type: MediaType,
pub(self) width: Option<String>,
pub(self) caption: Option<String>,
pub(self) description: Option<Paragraph>,
}
impl Element for Media {
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Block }
fn element_name(&self) -> &'static str { "Media" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, document: &dyn Document) -> Result<String, String> {
match compiler.target() {
Target::HTML => {
let mut result = String::new();
result.push_str("<div class=\"medium\">");
let width = self
.width
.as_ref()
.map_or(String::new(), |w| format!(r#" width="{w}""#));
match self.media_type {
MediaType::IMAGE => result.push_str(
format!(r#"<a href="{0}"><img src="{0}"{width}></a>"#, self.uri).as_str(),
),
MediaType::VIDEO => todo!(),
MediaType::AUDIO => todo!(),
}
result.push_str(format!(r#"<p class="medium-refname">{}</p>"#, "TODO").as_str());
if let Some(paragraph) = self.description.as_ref() {
match paragraph.compile(compiler, document) {
Ok(res) => result.push_str(res.as_str()),
Err(err) => return Err(err),
}
}
result.push_str("</div>");
Ok(result)
}
_ => todo!(""),
}
}
}
pub struct MediaRule {
re: [Regex; 1],
properties: PropertyParser,
}
impl MediaRule {
pub fn new() -> Self {
let mut props = HashMap::new();
props.insert(
"type".to_string(),
Property::new(
false,
"Override for the media type detection".to_string(),
None,
),
);
props.insert(
"width".to_string(),
Property::new(false, "Override for the media width".to_string(), None),
);
Self {
re: [RegexBuilder::new(
r"^!\[(.*)\]\(((?:\\.|[^\\\\])*?)\)(?:\[((?:\\.|[^\\\\])*?)\])?((?:\\(?:.|\n)|[^\\\\])*?$)?",
)
.multi_line(true)
.build()
.unwrap()],
properties: PropertyParser::new(props),
}
}
fn validate_uri(uri: &str) -> Result<&str, String> {
let trimmed = uri.trim_start().trim_end();
if trimmed.is_empty() {
return Err("URIs is empty".to_string());
}
Ok(trimmed)
}
fn parse_properties(
&self,
colors: &ReportColors,
token: &Token,
m: &Option<Match>,
) -> Result<PropertyMap, Report<'_, (Rc<dyn Source>, Range<usize>)>> {
match m {
None => match self.properties.default() {
Ok(properties) => Ok(properties),
Err(e) => Err(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Media Properties")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Media is missing required property: {e}"))
.with_color(colors.error),
)
.finish(),
),
},
Some(props) => {
let processed =
util::process_escaped('\\', "]", props.as_str().trim_start().trim_end());
match self.properties.parse(processed.as_str()) {
Err(e) => Err(
Report::build(ReportKind::Error, token.source(), props.start())
.with_message("Invalid Media Properties")
.with_label(
Label::new((token.source().clone(), props.range()))
.with_message(e)
.with_color(colors.error),
)
.finish(),
),
Ok(properties) => Ok(properties),
}
}
}
}
fn detect_filetype(filename: &str) -> Option<MediaType> {
let sep = match filename.rfind('.') {
Some(pos) => pos,
None => return None,
};
// TODO: https://developer.mozilla.org/en-US/docs/Web/Media/Formats/Containers
match filename.split_at(sep + 1).1.to_ascii_lowercase().as_str() {
"png" | "apng" | "avif" | "gif" | "webp" | "svg" | "bmp" | "jpg" | "jpeg" | "jfif"
| "pjpeg" | "pjp" => Some(MediaType::IMAGE),
_ => None,
}
}
}
impl RegexRule for MediaRule {
fn name(&self) -> &'static str { "Media" }
fn regexes(&self) -> &[regex::Regex] { &self.re }
fn on_regex_match<'a>(
&self,
_: usize,
parser: &dyn crate::parser::parser::Parser,
document: &'a (dyn Document<'a> + 'a),
token: Token,
matches: Captures,
) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut reports = vec![];
let refname = match (
matches.get(1).unwrap(),
validate_refname(matches.get(1).unwrap().as_str()),
) {
(_, Ok(refname)) => refname.to_string(),
(m, Err(err)) => {
reports.push(
Report::build(ReportKind::Error, token.source(), m.start())
.with_message("Invalid Media Refname")
.with_label(
Label::new((token.source().clone(), m.range())).with_message(err),
)
.finish(),
);
return reports;
}
};
let uri = match (
matches.get(2).unwrap(),
MediaRule::validate_uri(matches.get(2).unwrap().as_str()),
) {
(_, Ok(uri)) => uri.to_string(),
(m, Err(err)) => {
reports.push(
Report::build(ReportKind::Error, token.source(), m.start())
.with_message("Invalid Media URI")
.with_label(
Label::new((token.source().clone(), m.range())).with_message(err),
)
.finish(),
);
return reports;
}
};
// Properties
let properties = match self.parse_properties(parser.colors(), &token, &matches.get(3)) {
Ok(pm) => pm,
Err(report) => {
reports.push(report);
return reports;
}
};
let media_type =
match Self::detect_filetype(uri.as_str()) {
Some(media_type) => media_type,
None => match properties.get("type", |prop, value| {
MediaType::from_str(value.as_str()).map_err(|e| (prop, e))
}) {
Ok((_prop, kind)) => kind,
Err(e) => match e {
PropertyMapError::ParseError((prop, err)) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Media Property")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!(
"Property `type: {}` cannot be converted: {}",
prop.fg(parser.colors().info),
err.fg(parser.colors().error)
))
.with_color(parser.colors().warning),
)
.finish(),
);
return reports;
}
PropertyMapError::NotFoundError(err) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Media Property")
.with_label(
Label::new((
token.source().clone(),
token.start() + 1..token.end(),
))
.with_message(format!("{err}. Required because mediatype could not be detected"))
.with_color(parser.colors().error),
)
.finish(),
);
return reports;
}
},
},
};
let width = properties
.get("width", |_, value| -> Result<String, ()> {
Ok(value.clone())
})
.ok()
.and_then(|(_, s)| Some(s));
let description = match matches.get(4) {
Some(content) => {
let source = Rc::new(VirtualSource::new(
Token::new(content.range(), token.source()),
format!("Media[{refname}] description"),
content.as_str().trim_start().trim_end().to_string(),
));
if source.content().is_empty() {
None
} else {
match parse_paragraph(parser, source, document) {
Ok(paragraph) => Some(*paragraph),
Err(err) => {
reports.push(
Report::build(ReportKind::Error, token.source(), content.start())
.with_message("Invalid Media Description")
.with_label(
Label::new((token.source().clone(), content.range()))
.with_message(format!(
"Could not parse description: {err}"
))
.with_color(parser.colors().error),
)
.finish(),
);
return reports;
}
}
}
}
None => panic!("Unknown error"),
};
// TODO: caption
let mut group = match document.last_element_mut::<MediaGroup>() {
Some(group) => group,
None => {
parser.push(
document,
Box::new(MediaGroup {
location: token.clone(),
media: vec![],
}),
);
document.last_element_mut::<MediaGroup>().unwrap()
}
};
if let Err(err) = group.push(Media {
location: token.clone(),
reference: refname,
uri,
media_type,
width,
caption: None,
description,
}) {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Media")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(err)
.with_color(parser.colors().error),
)
.finish(),
);
}
reports
}
fn lua_bindings<'lua>(&self, _lua: &'lua mlua::Lua) -> Vec<(String, mlua::Function<'lua>)> {
vec![]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn regex() {
let rule = MediaRule::new();
let re = &rule.regexes()[0];
assert!(re.is_match("![refname](some path...)[some properties] some description"));
assert!(re.is_match(
r"![refname](some p\)ath...\\)[some propert\]ies\\\\] some description\\nanother line"
));
assert!(re.is_match_at("![r1](uri1)[props1] desc1\n![r2](uri2)[props2] desc2", 26));
}
}

View file

@ -13,3 +13,4 @@ pub mod code;
pub mod tex; pub mod tex;
pub mod graphviz; pub mod graphviz;
pub mod raw; pub mod raw;
pub mod media;

View file

@ -1,10 +1,22 @@
use std::{any::Any, ops::Range, rc::Rc}; use std::any::Any;
use std::ops::Range;
use std::rc::Rc;
use ariadne::Report; use ariadne::Report;
use mlua::{Function, Lua}; use mlua::Function;
use mlua::Lua;
use regex::Regex; use regex::Regex;
use crate::{compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::Rule, source::{Cursor, Source, Token}}}; use crate::compiler::compiler::Compiler;
use crate::compiler::compiler::Target;
use crate::document::document::Document;
use crate::document::element::ElemKind;
use crate::document::element::Element;
use crate::parser::parser::Parser;
use crate::parser::rule::Rule;
use crate::parser::source::Cursor;
use crate::parser::source::Source;
use crate::parser::source::Token;
// TODO: Full refactor // TODO: Full refactor
// Problem is that document parsed from other sources i.e by variables // Problem is that document parsed from other sources i.e by variables
@ -14,117 +26,127 @@ use crate::{compiler::compiler::{Compiler, Target}, document::{document::Documen
// The issue is that this would break the current `Token` implementation // The issue is that this would break the current `Token` implementation
// Which would need to be reworked // Which would need to be reworked
#[derive(Debug)] #[derive(Debug)]
pub struct Paragraph pub struct Paragraph {
{ location: Token,
location: Token, pub content: Vec<Box<dyn Element>>,
pub content: Vec<Box<dyn Element>>
} }
impl Paragraph impl Paragraph {
{ pub fn new(location: Token) -> Self {
pub fn new(location: Token) -> Self { Self {
Self { location, content: Vec::new() } location,
} content: Vec::new(),
}
}
pub fn is_empty(&self) -> bool { self.content.is_empty() } pub fn is_empty(&self) -> bool { self.content.is_empty() }
pub fn push(&mut self, elem: Box<dyn Element>) pub fn push(&mut self, elem: Box<dyn Element>) {
{ if elem.location().source() == self.location().source() {
if elem.location().source() == self.location().source() self.location.range = self.location.start()..elem.location().end();
{
self.location.range = self.location.start() .. elem.location().end();
} }
self.content.push(elem); self.content.push(elem);
} }
pub fn find_back<P: FnMut(&&Box<dyn Element + 'static>) -> bool>(&self, mut predicate: P) pub fn find_back<P: FnMut(&&Box<dyn Element + 'static>) -> bool>(
-> Option<&Box<dyn Element>> { &self,
self.content.iter().rev() predicate: P,
.find(predicate) ) -> Option<&Box<dyn Element>> {
self.content.iter().rev().find(predicate)
} }
} }
impl Element for Paragraph impl Element for Paragraph {
{ fn location(&self) -> &Token { &self.location }
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Special } fn kind(&self) -> ElemKind { ElemKind::Special }
fn element_name(&self) -> &'static str { "Paragraph" } fn element_name(&self) -> &'static str { "Paragraph" }
fn to_string(&self) -> String { format!("{:#?}", self) } fn to_string(&self) -> String { format!("{:#?}", self) }
fn compile(&self, compiler: &Compiler, document: &dyn Document) -> Result<String, String> { fn compile(&self, compiler: &Compiler, document: &dyn Document) -> Result<String, String> {
if self.content.is_empty() { return Ok(String::new()) } if self.content.is_empty() {
return Ok(String::new());
}
match compiler.target() match compiler.target() {
{ Target::HTML => {
Target::HTML => {
let mut result = String::new(); let mut result = String::new();
//if prev.is_none() || prev.unwrap().downcast_ref::<Paragraph>().is_none() //if prev.is_none() || prev.unwrap().downcast_ref::<Paragraph>().is_none()
{ result.push_str("<p>"); } {
result.push_str("<p>");
}
//else //else
//{ result.push_str(" "); } //{ result.push_str(" "); }
let err = self.content.iter().try_for_each(|elem| { let err = self.content.iter().try_for_each(|elem| {
match elem.compile(compiler, document) match elem.compile(compiler, document) {
{
Err(e) => return Err(e), Err(e) => return Err(e),
Ok(content) => { result.push_str(content.as_str()); Ok(()) }, Ok(content) => {
result.push_str(content.as_str());
Ok(())
}
} }
}); });
//if next.is_none() || next.unwrap().downcast_ref::<Paragraph>().is_none() //if next.is_none() || next.unwrap().downcast_ref::<Paragraph>().is_none()
{ result.push_str("</p>"); }
match err
{ {
result.push_str("</p>");
}
match err {
Err(e) => Err(e), Err(e) => Err(e),
Ok(()) => Ok(result), Ok(()) => Ok(result),
} }
} }
Target::LATEX => todo!("Unimplemented compiler") Target::LATEX => todo!("Unimplemented compiler"),
} }
} }
} }
pub struct ParagraphRule pub struct ParagraphRule {
{
re: Regex, re: Regex,
} }
impl ParagraphRule { impl ParagraphRule {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
re: Regex::new(r"\n{2,}").unwrap() re: Regex::new(r"\n{2,}").unwrap(),
} }
} }
} }
impl Rule for ParagraphRule impl Rule for ParagraphRule {
{ fn name(&self) -> &'static str { "Paragraphing" }
fn name(&self) -> &'static str { "Paragraphing" }
fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box<dyn Any>)> { fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box<dyn Any>)> {
self.re.find_at(cursor.source.content(), cursor.pos) self.re
.and_then(|m| Some((m.start(), Box::new([false;0]) as Box<dyn Any>)) ) .find_at(cursor.source.content(), cursor.pos)
} .and_then(|m| Some((m.start(), Box::new([false; 0]) as Box<dyn Any>)))
}
fn on_match(&self, parser: &dyn Parser, document: &dyn Document, cursor: Cursor, _match_data: Option<Box<dyn Any>>) fn on_match(
-> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>) { &self,
parser: &dyn Parser,
let end_cursor = match self.re.captures_at(cursor.source.content(), cursor.pos) document: &dyn Document,
{ cursor: Cursor,
_match_data: Option<Box<dyn Any>>,
) -> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>) {
let end_cursor = match self.re.captures_at(cursor.source.content(), cursor.pos) {
None => panic!("Unknown error"), None => panic!("Unknown error"),
Some(capture) => Some(capture) => cursor.at(capture.get(0).unwrap().end() - 1),
cursor.at(capture.get(0).unwrap().end()-1)
}; };
parser.push(document, Box::new(Paragraph::new( parser.push(
Token::new(cursor.pos..end_cursor.pos, cursor.source.clone()) document,
))); Box::new(Paragraph::new(Token::new(
cursor.pos..end_cursor.pos,
cursor.source.clone(),
))),
);
(end_cursor, Vec::new()) (end_cursor, Vec::new())
} }
// TODO // TODO
fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)> { vec![] } fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)> { vec![] }

View file

@ -6,6 +6,7 @@ use super::graphviz::GraphRule;
use super::import::ImportRule; use super::import::ImportRule;
use super::link::LinkRule; use super::link::LinkRule;
use super::list::ListRule; use super::list::ListRule;
use super::media::MediaRule;
use super::paragraph::ParagraphRule; use super::paragraph::ParagraphRule;
use super::raw::RawRule; use super::raw::RawRule;
use super::script::ScriptRule; use super::script::ScriptRule;
@ -28,6 +29,7 @@ pub fn register<P: Parser>(parser: &mut P) {
parser.add_rule(Box::new(CodeRule::new()), None).unwrap(); parser.add_rule(Box::new(CodeRule::new()), None).unwrap();
parser.add_rule(Box::new(TexRule::new()), None).unwrap(); parser.add_rule(Box::new(TexRule::new()), None).unwrap();
parser.add_rule(Box::new(GraphRule::new()), None).unwrap(); parser.add_rule(Box::new(GraphRule::new()), None).unwrap();
parser.add_rule(Box::new(MediaRule::new()), None).unwrap();
parser.add_rule(Box::new(StyleRule::new()), None).unwrap(); parser.add_rule(Box::new(StyleRule::new()), None).unwrap();
parser.add_rule(Box::new(SectionRule::new()), None).unwrap(); parser.add_rule(Box::new(SectionRule::new()), None).unwrap();

View file

@ -1,4 +1,5 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::rc::Rc;
use unicode_segmentation::UnicodeSegmentation; use unicode_segmentation::UnicodeSegmentation;
@ -7,6 +8,9 @@ use crate::document::document::DocumentAccessors;
use crate::document::element::ElemKind; use crate::document::element::ElemKind;
use crate::elements::paragraph::Paragraph; use crate::elements::paragraph::Paragraph;
use super::parser::Parser;
use super::source::Source;
/// Processes text for escape characters and paragraphing /// Processes text for escape characters and paragraphing
pub fn process_text(document: &dyn Document, content: &str) -> String { pub fn process_text(document: &dyn Document, content: &str) -> String {
let mut escaped = false; let mut escaped = false;
@ -129,6 +133,26 @@ pub fn process_escaped<S: AsRef<str>>(escape: char, token: &'static str, content
processed processed
} }
/// Parses source into a single paragraph
/// If source contains anything but a single paragraph, an error is returned
pub fn parse_paragraph<'a>(
parser: &dyn Parser,
source: Rc<dyn Source>,
document: &'a dyn Document<'a>,
) -> Result<Box<Paragraph>, &'static str> {
let parsed = parser.parse(source.clone(), Some(document));
if parsed.content().borrow().len() > 1 {
return Err("Parsed document contains more than a single paragraph");
} else if parsed.content().borrow().len() == 0 {
return Err("Parser document is empty");
} else if parsed.last_element::<Paragraph>().is_none() {
return Err("Parsed element is not a paragraph");
}
let paragraph = parsed.content().borrow_mut().pop().unwrap();
Ok(paragraph.downcast::<Paragraph>().unwrap())
}
#[derive(Debug)] #[derive(Debug)]
pub struct Property { pub struct Property {
required: bool, required: bool,
@ -210,9 +234,7 @@ pub struct PropertyParser {
} }
impl PropertyParser { impl PropertyParser {
pub fn new(properties: HashMap<String, Property>) -> Self { pub fn new(properties: HashMap<String, Property>) -> Self { Self { properties } }
Self { properties }
}
/// Attempts to build a default propertymap /// Attempts to build a default propertymap
/// ///