Add graph

This commit is contained in:
ef3d0c3e 2024-07-24 11:54:04 +02:00
parent 12a4e956a9
commit b8f4671657
14 changed files with 1000 additions and 754 deletions

View file

@ -1,266 +1,340 @@
use std::{io::{Read, Write}, ops::Range, process::{Command, Stdio}, rc::Rc, sync::Once};
use std::collections::HashMap;
use std::io::Read;
use std::io::Write;
use std::ops::Range;
use std::process::Command;
use std::process::Stdio;
use std::rc::Rc;
use std::str::FromStr;
use std::sync::Once;
use ariadne::{Fmt, Label, Report, ReportKind};
use crypto::{digest::Digest, sha2::Sha512};
use mlua::{Function, Lua};
use regex::{Captures, Regex};
use crate::parser::util::Property;
use crate::parser::util::PropertyMapError;
use crate::parser::util::PropertyParser;
use ariadne::Fmt;
use ariadne::Label;
use ariadne::Report;
use ariadne::ReportKind;
use crypto::digest::Digest;
use crypto::sha2::Sha512;
use graphviz_rust::cmd::Format;
use graphviz_rust::cmd::Layout;
use graphviz_rust::exec;
use graphviz_rust::exec_dot;
use graphviz_rust::parse;
use graphviz_rust::printer::PrinterContext;
use mlua::Function;
use mlua::Lua;
use regex::Captures;
use regex::Regex;
use crate::{cache::cache::{Cached, CachedError}, compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util}};
#[derive(Debug, PartialEq, Eq)]
enum TexKind
{
Block,
Inline,
}
impl From<&TexKind> for ElemKind
{
fn from(value: &TexKind) -> Self {
match value {
TexKind::Inline => ElemKind::Inline,
_ => ElemKind::Block
}
}
}
use crate::cache::cache::Cached;
use crate::cache::cache::CachedError;
use crate::compiler::compiler::Compiler;
use crate::compiler::compiler::Target;
use crate::document::document::Document;
use crate::document::element::ElemKind;
use crate::document::element::Element;
use crate::parser::parser::Parser;
use crate::parser::rule::RegexRule;
use crate::parser::source::Source;
use crate::parser::source::Token;
use crate::parser::util;
#[derive(Debug)]
struct Tex
{
location: Token,
block: TexKind,
env: String,
tex: String,
caption: Option<String>,
struct Graphviz {
pub location: Token,
pub dot: String,
pub layout: Layout,
pub caption: Option<String>,
}
impl Tex {
fn new(location: Token, block: TexKind, env: String, tex: String, caption: Option<String>) -> Self {
Self { location, block, env, tex, caption }
}
fn format_latex(fontsize: &String, preamble: &String, tex: &String) -> FormattedTex
{
FormattedTex(format!(r"\documentclass[{}pt,preview]{{standalone}}
{}
\begin{{document}}
\begin{{preview}}
{}
\end{{preview}}
\end{{document}}",
fontsize, preamble, tex))
fn layout_from_str(value: &str) -> Result<Layout, String> {
match value {
"dot" => Ok(Layout::Dot),
"neato" => Ok(Layout::Neato),
"fdp" => Ok(Layout::Fdp),
"sfdp" => Ok(Layout::Sfdp),
"circo" => Ok(Layout::Circo),
"twopi" => Ok(Layout::Twopi),
"osage" => Ok(Layout::Asage), // typo in graphviz_rust ?
"patchwork" => Ok(Layout::Patchwork),
_ => Err(format!("Unknown layout: {value}")),
}
}
struct FormattedTex(String);
impl Graphviz {
/// Renders dot to svg
fn dot_to_svg(&self) -> Result<String, String> {
print!("Rendering Graphviz `{}`... ", self.dot);
impl FormattedTex
{
/// Renders latex to svg
fn latex_to_svg(&self, exec: &String, fontsize: &String) -> Result<String, String>
{
print!("Rendering LaTex `{}`... ", self.0);
let process = match Command::new(exec)
.arg("--fontsize").arg(fontsize)
.stdout(Stdio::piped())
.stdin(Stdio::piped())
.spawn()
{
Err(e) => return Err(format!("Could not spawn `{exec}`: {}", e)),
Ok(process) => process
};
let svg = match exec_dot(
self.dot.clone(),
vec![self.layout.into(), Format::Svg.into()],
) {
Ok(svg) => {
let out = String::from_utf8_lossy(svg.as_slice());
let split_at = out.find("<!-- Generated").unwrap(); // Remove svg header
if let Err(e) = process.stdin.unwrap().write_all(self.0.as_bytes())
{
panic!("Unable to write to `latex2svg`'s stdin: {}", e);
}
let mut result = String::new();
match process.stdout.unwrap().read_to_string(&mut result)
{
Err(e) => panic!("Unable to read `latex2svg` stdout: {}", e),
Ok(_) => {}
}
out.split_at(split_at).1.to_string()
}
Err(e) => return Err(format!("Unable to execute dot: {e}")),
};
println!("Done!");
Ok(result)
Ok(svg)
}
}
impl Cached for FormattedTex
{
type Key = String;
type Value = String;
impl Cached for Graphviz {
type Key = String;
type Value = String;
fn sql_table() -> &'static str {
"CREATE TABLE IF NOT EXISTS cached_tex (
fn sql_table() -> &'static str {
"CREATE TABLE IF NOT EXISTS cached_dot (
digest TEXT PRIMARY KEY,
svg BLOB NOT NULL);"
}
}
fn sql_get_query() -> &'static str {
"SELECT svg FROM cached_tex WHERE digest = (?1)"
}
fn sql_get_query() -> &'static str { "SELECT svg FROM cached_dot WHERE digest = (?1)" }
fn sql_insert_query() -> &'static str {
"INSERT INTO cached_tex (digest, svg) VALUES (?1, ?2)"
}
fn sql_insert_query() -> &'static str { "INSERT INTO cached_dot (digest, svg) VALUES (?1, ?2)" }
fn key(&self) -> <Self as Cached>::Key {
fn key(&self) -> <Self as Cached>::Key {
let mut hasher = Sha512::new();
hasher.input(self.0.as_bytes());
hasher.input((self.layout as usize).to_be_bytes().as_slice());
hasher.input(self.dot.as_bytes());
hasher.result_str()
}
}
}
impl Element for Tex {
fn location(&self) -> &Token { &self.location }
impl Element for Graphviz {
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { (&self.block).into() }
fn kind(&self) -> ElemKind { ElemKind::Block }
fn element_name(&self) -> &'static str { "LaTeX" }
fn element_name(&self) -> &'static str { "Graphviz" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, document: &dyn Document)
-> Result<String, String> {
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, _document: &dyn Document) -> Result<String, String> {
match compiler.target() {
Target::HTML => {
static CACHE_INIT : Once = Once::new();
CACHE_INIT.call_once(|| if let Some(mut con) = compiler.cache() {
if let Err(e) = FormattedTex::init(&mut con)
{
eprintln!("Unable to create cache table: {e}");
static CACHE_INIT: Once = Once::new();
CACHE_INIT.call_once(|| {
if let Some(mut con) = compiler.cache() {
if let Err(e) = Graphviz::init(&mut con) {
eprintln!("Unable to create cache table: {e}");
}
}
});
let exec = document.get_variable(format!("tex.{}.exec", self.env).as_str())
.map_or("latex2svg".to_string(), |var| var.to_string());
// FIXME: Because fontsize is passed as an arg, verify that it cannot be used to execute python/shell code
let fontsize = document.get_variable(format!("tex.{}.fontsize", self.env).as_str())
.map_or("12".to_string(), |var| var.to_string());
let preamble = document.get_variable(format!("tex.{}.preamble", self.env).as_str())
.map_or("".to_string(), |var| var.to_string());
let prepend = if self.block == TexKind::Inline { "".to_string() }
else
{
document.get_variable(format!("tex.{}.block_prepend", self.env).as_str())
.map_or("".to_string(), |var| var.to_string()+"\n")
};
let latex = match self.block
{
TexKind::Inline => Tex::format_latex(
&fontsize,
&preamble,
&format!("${{{}}}$", self.tex)),
_ => Tex::format_latex(
&fontsize,
&preamble,
&format!("{prepend}{}", self.tex))
};
if let Some(mut con) = compiler.cache()
{
match latex.cached(&mut con, |s| s.latex_to_svg(&exec, &fontsize))
{
if let Some(mut con) = compiler.cache() {
match self.cached(&mut con, |s| s.dot_to_svg()) {
Ok(s) => Ok(s),
Err(e) => match e
{
CachedError::SqlErr(e) => Err(format!("Querying the cache failed: {e}")),
CachedError::GenErr(e) => Err(e)
}
Err(e) => match e {
CachedError::SqlErr(e) => {
Err(format!("Querying the cache failed: {e}"))
}
CachedError::GenErr(e) => Err(e),
},
}
} else {
match self.dot_to_svg() {
Ok(svg) => Ok(svg),
Err(e) => Err(e),
}
}
else
{
latex.latex_to_svg(&exec, &fontsize)
}
}
_ => todo!("Unimplemented")
}
}
}
pub struct TexRule {
re: [Regex; 2],
}
impl TexRule {
pub fn new() -> Self {
Self {
re: [
Regex::new(r"\$\|(?:\[(.*)\])?(?:((?:\\.|[^\\\\])*?)\|\$)?").unwrap(),
Regex::new(r"\$(?:\[(.*)\])?(?:((?:\\.|[^\\\\])*?)\$)?").unwrap(),
],
_ => todo!("Unimplemented"),
}
}
}
impl RegexRule for TexRule
{
fn name(&self) -> &'static str { "Tex" }
pub struct GraphRule {
re: [Regex; 1],
properties: PropertyParser,
}
fn regexes(&self) -> &[regex::Regex] { &self.re }
impl GraphRule {
pub fn new() -> Self {
let mut props = HashMap::new();
props.insert(
"layout".to_string(),
Property::new(
true,
"Graphviz layout engine see <https://graphviz.org/docs/layouts/>".to_string(),
Some("dot".to_string()),
),
);
Self {
re: [Regex::new(
r"\[graph\](?:\[((?:\\.|[^\[\]\\])*?)\])?(?:((?:\\.|[^\\\\])*?)\[/graph\])?",
)
.unwrap()],
properties: PropertyParser::new(props),
}
}
}
fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &dyn Document, token: Token, matches: Captures)
-> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
impl RegexRule for GraphRule {
fn name(&self) -> &'static str { "Graph" }
fn regexes(&self) -> &[regex::Regex] { &self.re }
fn on_regex_match(
&self,
_: usize,
parser: &dyn Parser,
document: &dyn Document,
token: Token,
matches: Captures,
) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut reports = vec![];
let tex_env = matches.get(1)
.and_then(|env| Some(env.as_str().trim_start().trim_end()))
.and_then(|env| (!env.is_empty()).then_some(env))
.unwrap_or("main");
let tex_content = match matches.get(2)
{
// Unterminated `$`
let graph_content = match matches.get(2) {
// Unterminated `[graph]`
None => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Unterminated Tex Code")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Missing terminating `{}` after first `{}`",
["|$", "$"][index].fg(parser.colors().info),
["$|", "$"][index].fg(parser.colors().info)))
.with_color(parser.colors().error))
.finish());
.with_message("Unterminated Graph Code")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!(
"Missing terminating `{}` after first `{}`",
"[/graph]".fg(parser.colors().info),
"[graph]".fg(parser.colors().info)
))
.with_color(parser.colors().error),
)
.finish(),
);
return reports;
}
Some(content) => {
let processed = util::process_escaped('\\', ["|$", "$"][index],
content.as_str().trim_start().trim_end());
let processed = util::process_escaped(
'\\',
"[/graph]",
content.as_str().trim_start().trim_end(),
);
if processed.is_empty()
{
if processed.is_empty() {
reports.push(
Report::build(ReportKind::Warning, token.source(), content.start())
.with_message("Empty Tex Code")
.with_label(
Label::new((token.source().clone(), content.range()))
.with_message("Tex code is empty")
.with_color(parser.colors().warning))
.finish());
Report::build(ReportKind::Error, token.source(), content.start())
.with_message("Empty Graph Code")
.with_label(
Label::new((token.source().clone(), content.range()))
.with_message("Graph code is empty")
.with_color(parser.colors().error),
)
.finish(),
);
return reports;
}
processed
}
};
// Properties
let properties = match matches.get(1) {
None => match self.properties.default() {
Ok(properties) => properties,
Err(e) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Graph")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Graph is missing property: {e}"))
.with_color(parser.colors().error),
)
.finish(),
);
return reports;
}
},
Some(props) => {
let processed =
util::process_escaped('\\', "]", props.as_str().trim_start().trim_end());
match self.properties.parse(processed.as_str()) {
Err(e) => {
reports.push(
Report::build(ReportKind::Error, token.source(), props.start())
.with_message("Invalid Graph Properties")
.with_label(
Label::new((token.source().clone(), props.range()))
.with_message(e)
.with_color(parser.colors().error),
)
.finish(),
);
return reports;
}
Ok(properties) => properties,
}
}
};
// Property "layout"
let graph_layout = match properties.get("layout", |prop, value| {
layout_from_str(value.as_str()).map_err(|e| (prop, e))
}) {
Ok((_prop, kind)) => kind,
Err(e) => match e {
PropertyMapError::ParseError((prop, err)) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Graph Property")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!(
"Property `layout: {}` cannot be converted: {}",
prop.fg(parser.colors().info),
err.fg(parser.colors().error)
))
.with_color(parser.colors().warning),
)
.finish(),
);
return reports;
}
PropertyMapError::NotFoundError(err) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Graph Property")
.with_label(
Label::new((
token.source().clone(),
token.start() + 1..token.end(),
))
.with_message(format!(
"Property `{}` is missing",
err.fg(parser.colors().info)
))
.with_color(parser.colors().warning),
)
.finish(),
);
return reports;
}
},
};
// TODO: Caption
parser.push(document, Box::new(Tex::new(
token,
if index == 1 { TexKind::Inline } else { TexKind::Block },
tex_env.to_string(),
tex_content,
None,
)));
parser.push(
document,
Box::new(Graphviz {
location: token,
dot: graph_content,
layout: graph_layout,
caption: None,
}),
);
reports
}
}
// TODO
fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)> { vec![] }

View file

@ -11,4 +11,5 @@ pub mod section;
pub mod link;
pub mod code;
pub mod tex;
pub mod graphviz;
pub mod raw;

View file

@ -158,7 +158,7 @@ impl RegexRule for RawRule
.with_message("Invalid Code Property")
.with_label(
Label::new((token.source().clone(), token.start()+1..token.end()))
.with_message(format!("Property `{}` doesn't exist",
.with_message(format!("Property `{}` is missing",
err.fg(parser.colors().info)))
.with_color(parser.colors().warning))
.finish());

View file

@ -1,23 +1,36 @@
use crate::parser::parser::Parser;
use super::{code::CodeRule, comment::CommentRule, import::ImportRule, link::LinkRule, list::ListRule, paragraph::ParagraphRule, raw::RawRule, script::ScriptRule, section::SectionRule, style::StyleRule, tex::TexRule, text::TextRule, variable::{VariableRule, VariableSubstitutionRule}};
use super::code::CodeRule;
use super::comment::CommentRule;
use super::graphviz::GraphRule;
use super::import::ImportRule;
use super::link::LinkRule;
use super::list::ListRule;
use super::paragraph::ParagraphRule;
use super::raw::RawRule;
use super::script::ScriptRule;
use super::section::SectionRule;
use super::style::StyleRule;
use super::tex::TexRule;
use super::text::TextRule;
use super::variable::VariableRule;
use super::variable::VariableSubstitutionRule;
pub fn register<P: Parser>(parser: &mut P)
{
pub fn register<P: Parser>(parser: &mut P) {
parser.add_rule(Box::new(CommentRule::new()), None);
parser.add_rule(Box::new(ParagraphRule::new()), None);
parser.add_rule(Box::new(ImportRule::new()), None);
parser.add_rule(Box::new(ScriptRule::new()), None);
parser.add_rule(Box::new(VariableRule::new()), None);
parser.add_rule(Box::new(VariableSubstitutionRule::new()), None);
parser.add_rule(Box::new(RawRule::new()), None);
parser.add_rule(Box::new(ListRule::new()), None);
parser.add_rule(Box::new(CodeRule::new()), None);
parser.add_rule(Box::new(TexRule::new()), None);
parser.add_rule(Box::new(ImportRule::new()), None);
parser.add_rule(Box::new(ScriptRule::new()), None);
parser.add_rule(Box::new(VariableRule::new()), None);
parser.add_rule(Box::new(VariableSubstitutionRule::new()), None);
parser.add_rule(Box::new(RawRule::new()), None);
parser.add_rule(Box::new(ListRule::new()), None);
parser.add_rule(Box::new(CodeRule::new()), None);
parser.add_rule(Box::new(TexRule::new()), None);
parser.add_rule(Box::new(GraphRule::new()), None);
parser.add_rule(Box::new(StyleRule::new()), None);
parser.add_rule(Box::new(SectionRule::new()), None);
parser.add_rule(Box::new(LinkRule::new()), None);
parser.add_rule(Box::new(TextRule::default()), None);
parser.add_rule(Box::new(StyleRule::new()), None);
parser.add_rule(Box::new(SectionRule::new()), None);
parser.add_rule(Box::new(LinkRule::new()), None);
parser.add_rule(Box::new(TextRule::default()), None);
}

View file

@ -1,32 +1,52 @@
use std::{io::{Read, Write}, ops::Range, process::{Command, Stdio}, rc::Rc, sync::Once};
use std::io::Read;
use std::io::Write;
use std::ops::Range;
use std::process::Command;
use std::process::Stdio;
use std::rc::Rc;
use std::sync::Once;
use ariadne::{Fmt, Label, Report, ReportKind};
use crypto::{digest::Digest, sha2::Sha512};
use mlua::{Function, Lua};
use regex::{Captures, Regex};
use ariadne::Fmt;
use ariadne::Label;
use ariadne::Report;
use ariadne::ReportKind;
use crypto::digest::Digest;
use crypto::sha2::Sha512;
use mlua::Function;
use mlua::Lua;
use regex::Captures;
use regex::Regex;
use crate::{cache::cache::{Cached, CachedError}, compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util}};
use crate::cache::cache::Cached;
use crate::cache::cache::CachedError;
use crate::compiler::compiler::Compiler;
use crate::compiler::compiler::Target;
use crate::document::document::Document;
use crate::document::element::ElemKind;
use crate::document::element::Element;
use crate::parser::parser::Parser;
use crate::parser::rule::RegexRule;
use crate::parser::source::Source;
use crate::parser::source::Token;
use crate::parser::util;
#[derive(Debug, PartialEq, Eq)]
enum TexKind
{
enum TexKind {
Block,
Inline,
}
impl From<&TexKind> for ElemKind
{
fn from(value: &TexKind) -> Self {
impl From<&TexKind> for ElemKind {
fn from(value: &TexKind) -> Self {
match value {
TexKind::Inline => ElemKind::Inline,
_ => ElemKind::Block
_ => ElemKind::Block,
}
}
}
}
#[derive(Debug)]
struct Tex
{
struct Tex {
location: Token,
block: TexKind,
env: String,
@ -35,49 +55,59 @@ struct Tex
}
impl Tex {
fn new(location: Token, block: TexKind, env: String, tex: String, caption: Option<String>) -> Self {
Self { location, block, env, tex, caption }
}
fn new(
location: Token,
block: TexKind,
env: String,
tex: String,
caption: Option<String>,
) -> Self {
Self {
location,
block,
env,
tex,
caption,
}
}
fn format_latex(fontsize: &String, preamble: &String, tex: &String) -> FormattedTex
{
FormattedTex(format!(r"\documentclass[{}pt,preview]{{standalone}}
fn format_latex(fontsize: &String, preamble: &String, tex: &String) -> FormattedTex {
FormattedTex(format!(
r"\documentclass[{}pt,preview]{{standalone}}
{}
\begin{{document}}
\begin{{preview}}
{}
\end{{preview}}
\end{{document}}",
fontsize, preamble, tex))
fontsize, preamble, tex
))
}
}
struct FormattedTex(String);
impl FormattedTex
{
impl FormattedTex {
/// Renders latex to svg
fn latex_to_svg(&self, exec: &String, fontsize: &String) -> Result<String, String>
{
fn latex_to_svg(&self, exec: &String, fontsize: &String) -> Result<String, String> {
print!("Rendering LaTex `{}`... ", self.0);
let process = match Command::new(exec)
.arg("--fontsize").arg(fontsize)
.arg("--fontsize")
.arg(fontsize)
.stdout(Stdio::piped())
.stdin(Stdio::piped())
.spawn()
{
Err(e) => return Err(format!("Could not spawn `{exec}`: {}", e)),
Ok(process) => process
};
if let Err(e) = process.stdin.unwrap().write_all(self.0.as_bytes())
{
Err(e) => return Err(format!("Could not spawn `{exec}`: {}", e)),
Ok(process) => process,
};
if let Err(e) = process.stdin.unwrap().write_all(self.0.as_bytes()) {
panic!("Unable to write to `latex2svg`'s stdin: {}", e);
}
let mut result = String::new();
match process.stdout.unwrap().read_to_string(&mut result)
{
match process.stdout.unwrap().read_to_string(&mut result) {
Err(e) => panic!("Unable to read `latex2svg` stdout: {}", e),
Ok(_) => {}
}
@ -87,101 +117,103 @@ impl FormattedTex
}
}
impl Cached for FormattedTex
{
type Key = String;
type Value = String;
impl Cached for FormattedTex {
type Key = String;
type Value = String;
fn sql_table() -> &'static str {
fn sql_table() -> &'static str {
"CREATE TABLE IF NOT EXISTS cached_tex (
digest TEXT PRIMARY KEY,
svg BLOB NOT NULL);"
}
}
fn sql_get_query() -> &'static str {
fn sql_get_query() -> &'static str {
"SELECT svg FROM cached_tex WHERE digest = (?1)"
}
}
fn sql_insert_query() -> &'static str {
fn sql_insert_query() -> &'static str {
"INSERT INTO cached_tex (digest, svg) VALUES (?1, ?2)"
}
}
fn key(&self) -> <Self as Cached>::Key {
fn key(&self) -> <Self as Cached>::Key {
let mut hasher = Sha512::new();
hasher.input(self.0.as_bytes());
hasher.result_str()
}
}
}
impl Element for Tex {
fn location(&self) -> &Token { &self.location }
fn location(&self) -> &Token {
&self.location
}
fn kind(&self) -> ElemKind { (&self.block).into() }
fn kind(&self) -> ElemKind {
(&self.block).into()
}
fn element_name(&self) -> &'static str { "LaTeX" }
fn element_name(&self) -> &'static str {
"LaTeX"
}
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, document: &dyn Document)
-> Result<String, String> {
fn to_string(&self) -> String {
format!("{self:#?}")
}
fn compile(&self, compiler: &Compiler, document: &dyn Document) -> Result<String, String> {
match compiler.target() {
Target::HTML => {
static CACHE_INIT : Once = Once::new();
CACHE_INIT.call_once(|| if let Some(mut con) = compiler.cache() {
if let Err(e) = FormattedTex::init(&mut con)
{
eprintln!("Unable to create cache table: {e}");
static CACHE_INIT: Once = Once::new();
CACHE_INIT.call_once(|| {
if let Some(mut con) = compiler.cache() {
if let Err(e) = FormattedTex::init(&mut con) {
eprintln!("Unable to create cache table: {e}");
}
}
});
let exec = document.get_variable(format!("tex.{}.exec", self.env).as_str())
let exec = document
.get_variable(format!("tex.{}.exec", self.env).as_str())
.map_or("latex2svg".to_string(), |var| var.to_string());
// FIXME: Because fontsize is passed as an arg, verify that it cannot be used to execute python/shell code
let fontsize = document.get_variable(format!("tex.{}.fontsize", self.env).as_str())
let fontsize = document
.get_variable(format!("tex.{}.fontsize", self.env).as_str())
.map_or("12".to_string(), |var| var.to_string());
let preamble = document.get_variable(format!("tex.{}.preamble", self.env).as_str())
let preamble = document
.get_variable(format!("tex.{}.preamble", self.env).as_str())
.map_or("".to_string(), |var| var.to_string());
let prepend = if self.block == TexKind::Inline { "".to_string() }
else
{
document.get_variable(format!("tex.{}.block_prepend", self.env).as_str())
.map_or("".to_string(), |var| var.to_string()+"\n")
let prepend = if self.block == TexKind::Inline {
"".to_string()
} else {
document
.get_variable(format!("tex.{}.block_prepend", self.env).as_str())
.map_or("".to_string(), |var| var.to_string() + "\n")
};
let latex = match self.block
{
TexKind::Inline => Tex::format_latex(
&fontsize,
&preamble,
&format!("${{{}}}$", self.tex)),
_ => Tex::format_latex(
&fontsize,
&preamble,
&format!("{prepend}{}", self.tex))
};
if let Some(mut con) = compiler.cache()
{
match latex.cached(&mut con, |s| s.latex_to_svg(&exec, &fontsize))
{
Ok(s) => Ok(s),
Err(e) => match e
{
CachedError::SqlErr(e) => Err(format!("Querying the cache failed: {e}")),
CachedError::GenErr(e) => Err(e)
}
let latex = match self.block {
TexKind::Inline => {
Tex::format_latex(&fontsize, &preamble, &format!("${{{}}}$", self.tex))
}
}
else
{
_ => Tex::format_latex(&fontsize, &preamble, &format!("{prepend}{}", self.tex)),
};
if let Some(mut con) = compiler.cache() {
match latex.cached(&mut con, |s| s.latex_to_svg(&exec, &fontsize)) {
Ok(s) => Ok(s),
Err(e) => match e {
CachedError::SqlErr(e) => {
Err(format!("Querying the cache failed: {e}"))
}
CachedError::GenErr(e) => Err(e),
},
}
} else {
latex.latex_to_svg(&exec, &fontsize)
}
}
_ => todo!("Unimplemented")
_ => todo!("Unimplemented"),
}
}
}
}
pub struct TexRule {
@ -199,51 +231,68 @@ impl TexRule {
}
}
impl RegexRule for TexRule
{
fn name(&self) -> &'static str { "Tex" }
impl RegexRule for TexRule {
fn name(&self) -> &'static str {
"Tex"
}
fn regexes(&self) -> &[regex::Regex] { &self.re }
fn regexes(&self) -> &[regex::Regex] {
&self.re
}
fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &dyn Document, token: Token, matches: Captures)
-> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
fn on_regex_match(
&self,
index: usize,
parser: &dyn Parser,
document: &dyn Document,
token: Token,
matches: Captures,
) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut reports = vec![];
let tex_env = matches.get(1)
let tex_env = matches
.get(1)
.and_then(|env| Some(env.as_str().trim_start().trim_end()))
.and_then(|env| (!env.is_empty()).then_some(env))
.unwrap_or("main");
let tex_content = match matches.get(2)
{
let tex_content = match matches.get(2) {
// Unterminated `$`
None => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Unterminated Tex Code")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Missing terminating `{}` after first `{}`",
["|$", "$"][index].fg(parser.colors().info),
["$|", "$"][index].fg(parser.colors().info)))
.with_color(parser.colors().error))
.finish());
.with_message("Unterminated Tex Code")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!(
"Missing terminating `{}` after first `{}`",
["|$", "$"][index].fg(parser.colors().info),
["$|", "$"][index].fg(parser.colors().info)
))
.with_color(parser.colors().error),
)
.finish(),
);
return reports;
}
Some(content) => {
let processed = util::process_escaped('\\', ["|$", "$"][index],
content.as_str().trim_start().trim_end());
let processed = util::process_escaped(
'\\',
["|$", "$"][index],
content.as_str().trim_start().trim_end(),
);
if processed.is_empty()
{
if processed.is_empty() {
reports.push(
Report::build(ReportKind::Warning, token.source(), content.start())
.with_message("Empty Tex Code")
.with_label(
Label::new((token.source().clone(), content.range()))
.with_message("Tex code is empty")
.with_color(parser.colors().warning))
.finish());
.with_message("Empty Tex Code")
.with_label(
Label::new((token.source().clone(), content.range()))
.with_message("Tex code is empty")
.with_color(parser.colors().warning),
)
.finish(),
);
}
processed
}
@ -251,17 +300,26 @@ impl RegexRule for TexRule
// TODO: Caption
parser.push(document, Box::new(Tex::new(
token,
if index == 1 { TexKind::Inline } else { TexKind::Block },
tex_env.to_string(),
tex_content,
None,
)));
parser.push(
document,
Box::new(Tex::new(
token,
if index == 1 {
TexKind::Inline
} else {
TexKind::Block
},
tex_env.to_string(),
tex_content,
None,
)),
);
reports
}
}
// TODO
fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)> { vec![] }
fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)> {
vec![]
}
}

View file

@ -114,6 +114,8 @@ impl Parser for LsParser
fn state(&self) -> std::cell::Ref<'_, StateHolder> { self.state.borrow() }
fn state_mut(&self) -> std::cell::RefMut<'_, StateHolder> { self.state.borrow_mut() }
fn has_error(&self) -> bool { true }
fn push<'a>(&self, doc: &dyn Document, elem: Box<dyn Element>) {
todo!()

View file

@ -1,28 +1,30 @@
#![feature(char_indices_offset)]
mod document;
mod cache;
mod compiler;
mod parser;
mod document;
mod elements;
mod lua;
mod cache;
mod parser;
use std::{env, rc::Rc};
use std::env;
use std::rc::Rc;
use compiler::compiler::Compiler;
use getopts::Options;
use parser::{langparser::LangParser, parser::Parser};
use parser::langparser::LangParser;
use parser::parser::Parser;
use crate::parser::source::SourceFile;
extern crate getopts;
fn print_usage(program: &str, opts: Options) {
let brief = format!("Usage: {} -i FILE [options]", program);
print!("{}", opts.usage(&brief));
let brief = format!("Usage: {} -i FILE [options]", program);
print!("{}", opts.usage(&brief));
}
fn print_version()
{
print!("NML -- Not a Markup Language
fn print_version() {
print!(
"NML -- Not a Markup Language
Copyright (c) 2024
NML is licensed under the GNU Affero General Public License version 3 (AGPLv3),
under the terms of the Free Software Foundation <https://www.gnu.org/licenses/agpl-3.0.en.html>.
@ -30,12 +32,13 @@ under the terms of the Free Software Foundation <https://www.gnu.org/licenses/ag
This program is free software; you may modify and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
NML version: 0.4\n");
NML version: 0.4\n"
);
}
fn main() {
let args: Vec<String> = env::args().collect();
let program = args[0].clone();
let args: Vec<String> = env::args().collect();
let program = args[0].clone();
let mut opts = Options::new();
opts.optopt("i", "", "Input file", "FILE");
@ -45,11 +48,12 @@ fn main() {
opts.optflag("v", "version", "Print program version and licenses");
let matches = match opts.parse(&args[1..]) {
Ok(m) => { m }
Err(f) => { panic!("{}", f.to_string()) }
Ok(m) => m,
Err(f) => {
panic!("{}", f.to_string())
}
};
if matches.opt_present("v")
{
if matches.opt_present("v") {
print_version();
return;
}
@ -72,16 +76,15 @@ fn main() {
let source = SourceFile::new(input.to_string(), None).unwrap();
let doc = parser.parse(Rc::new(source), None);
if debug_opts.contains(&"ast".to_string())
{
if debug_opts.contains(&"ast".to_string()) {
println!("-- BEGIN AST DEBUGGING --");
doc.content().borrow().iter().for_each(|elem| {
println!("{}", (elem).to_string())
});
doc.content()
.borrow()
.iter()
.for_each(|elem| println!("{}", (elem).to_string()));
println!("-- END AST DEBUGGING --");
}
// TODO
//if debug_opts.contains(&"ref".to_string())
//{
@ -92,8 +95,7 @@ fn main() {
// });
// println!("-- END REFERENCES DEBUGGING --");
//}
if debug_opts.contains(&"var".to_string())
{
if debug_opts.contains(&"var".to_string()) {
println!("-- BEGIN VARIABLES DEBUGGING --");
let sc = doc.scope().borrow();
sc.variables.iter().for_each(|(_name, var)| {
@ -102,10 +104,13 @@ fn main() {
println!("-- END VARIABLES DEBUGGING --");
}
if parser.has_error() {
println!("Compilation aborted due to errors while parsing");
return;
}
let compiler = Compiler::new(compiler::compiler::Target::HTML, db_path);
let out = compiler.compile(doc.as_ref());
std::fs::write("a.html", out).unwrap();
}

View file

@ -1,15 +1,38 @@
use std::{cell::{Ref, RefCell, RefMut}, collections::{HashMap, HashSet}, ops::Range, rc::Rc};
use std::cell::RefCell;
use std::cell::RefMut;
use std::collections::HashMap;
use std::collections::HashSet;
use std::ops::Range;
use std::rc::Rc;
use ariadne::{Label, Report};
use ariadne::Label;
use ariadne::Report;
use crate::{document::{document::{DocumentAccessors, Document}, element::{ElemKind, Element}, langdocument::LangDocument}, elements::{paragraph::Paragraph, registrar::register, text::Text}, lua::kernel::{Kernel, KernelHolder}, parser::source::{SourceFile, VirtualSource}};
use crate::document::document::Document;
use crate::document::document::DocumentAccessors;
use crate::document::element::ElemKind;
use crate::document::element::Element;
use crate::document::langdocument::LangDocument;
use crate::elements::paragraph::Paragraph;
use crate::elements::registrar::register;
use crate::elements::text::Text;
use crate::lua::kernel::Kernel;
use crate::lua::kernel::KernelHolder;
use crate::parser::source::SourceFile;
use crate::parser::source::VirtualSource;
use super::{parser::{Parser, ReportColors}, rule::Rule, source::{Cursor, Source, Token}, state::StateHolder, util};
use super::parser::Parser;
use super::parser::ReportColors;
use super::rule::Rule;
use super::source::Cursor;
use super::source::Source;
use super::source::Token;
use super::state::StateHolder;
use super::util;
/// Parser for the language
#[derive(Debug)]
pub struct LangParser
{
pub struct LangParser {
rules: Vec<Box<dyn Rule>>,
colors: ReportColors,
@ -19,10 +42,8 @@ pub struct LangParser
pub kernels: RefCell<HashMap<String, Kernel>>,
}
impl LangParser
{
pub fn default() -> Self
{
impl LangParser {
pub fn default() -> Self {
let mut s = Self {
rules: vec![],
colors: ReportColors::with_colors(),
@ -32,24 +53,25 @@ impl LangParser
};
register(&mut s);
s.kernels.borrow_mut()
s.kernels
.borrow_mut()
.insert("main".to_string(), Kernel::new(&s));
s
}
fn handle_reports<'a>(&self, _source: Rc<dyn Source>, reports: Vec<Report<'a, (Rc<dyn Source>, Range<usize>)>>)
{
for mut report in reports
{
fn handle_reports<'a>(
&self,
_source: Rc<dyn Source>,
reports: Vec<Report<'a, (Rc<dyn Source>, Range<usize>)>>,
) {
for mut report in reports {
let mut sources: HashSet<Rc<dyn Source>> = HashSet::new();
fn recurse_source(sources: &mut HashSet<Rc<dyn Source>>, source: Rc<dyn Source>) {
sources.insert(source.clone());
match source.location()
{
match source.location() {
Some(parent) => {
let parent_source = parent.source();
if sources.get(&parent_source).is_none()
{
if sources.get(&parent_source).is_none() {
recurse_source(sources, parent_source);
}
}
@ -61,80 +83,92 @@ impl LangParser
recurse_source(&mut sources, label.span.0.clone());
});
let cache = sources.iter()
let cache = sources
.iter()
.map(|source| (source.clone(), source.content().clone()))
.collect::<Vec<(Rc<dyn Source>, String)>>();
cache.iter()
.for_each(|(source, _)| {
if let Some (location) = source.location()
{
if let Some(_s) = source.downcast_ref::<SourceFile>()
{
report.labels.push(
Label::new((location.source(), location.start()+1 .. location.end()))
cache.iter().for_each(|(source, _)| {
if let Some(location) = source.location() {
if let Some(_s) = source.downcast_ref::<SourceFile>() {
report.labels.push(
Label::new((location.source(), location.start() + 1..location.end()))
.with_message("In file included from here")
.with_order(-1)
);
};
.with_order(-1),
);
};
if let Some(_s) = source.downcast_ref::<VirtualSource>()
{
let start = location.start() + (location.source().content().as_bytes()[location.start()] == '\n' as u8)
if let Some(_s) = source.downcast_ref::<VirtualSource>() {
let start = location.start()
+ (location.source().content().as_bytes()[location.start()]
== '\n' as u8)
.then_some(1)
.unwrap_or(0);
report.labels.push(
Label::new((location.source(), start .. location.end()))
report.labels.push(
Label::new((location.source(), start..location.end()))
.with_message("In evaluation of")
.with_order(-1)
);
};
}
});
.with_order(-1),
);
};
}
});
report.eprint(ariadne::sources(cache)).unwrap()
}
}
}
impl Parser for LangParser
{
fn colors(&self) -> &ReportColors { &self.colors }
impl Parser for LangParser {
fn colors(&self) -> &ReportColors {
&self.colors
}
fn rules(&self) -> &Vec<Box<dyn Rule>> { &self.rules }
fn rules_mut(&mut self) -> &mut Vec<Box<dyn Rule>> { &mut self.rules }
fn rules(&self) -> &Vec<Box<dyn Rule>> {
&self.rules
}
fn rules_mut(&mut self) -> &mut Vec<Box<dyn Rule>> {
&mut self.rules
}
fn state(&self) -> std::cell::Ref<'_, StateHolder> { self.state.borrow() }
fn state_mut(&self) -> std::cell::RefMut<'_, StateHolder> { self.state.borrow_mut() }
fn state(&self) -> std::cell::Ref<'_, StateHolder> {
self.state.borrow()
}
fn state_mut(&self) -> std::cell::RefMut<'_, StateHolder> {
self.state.borrow_mut()
}
fn has_error(&self) -> bool { *self.err_flag.borrow() }
/// Add an [`Element`] to the [`Document`]
fn push<'a>(&self, doc: &dyn Document, elem: Box<dyn Element>)
{
if elem.kind() == ElemKind::Inline || elem.kind() == ElemKind::Invisible
{
let mut paragraph = doc.last_element_mut::<Paragraph>()
fn push<'a>(&self, doc: &dyn Document, elem: Box<dyn Element>) {
if elem.kind() == ElemKind::Inline || elem.kind() == ElemKind::Invisible {
let mut paragraph = doc
.last_element_mut::<Paragraph>()
.or_else(|| {
doc.push(Box::new(Paragraph::new(elem.location().clone())));
doc.last_element_mut::<Paragraph>()
}).unwrap();
})
.unwrap();
paragraph.push(elem);
}
else
{
} else {
// Process paragraph events
if doc.last_element::<Paragraph>()
.is_some_and(|_| true)
{
self.handle_reports(doc.source(),
self.state_mut().on_scope_end(self, doc, super::state::Scope::PARAGRAPH));
if doc.last_element::<Paragraph>().is_some_and(|_| true) {
self.handle_reports(
doc.source(),
self.state_mut()
.on_scope_end(self, doc, super::state::Scope::PARAGRAPH),
);
}
doc.push(elem);
}
}
fn parse<'a>(&self, source: Rc<dyn Source>, parent: Option<&'a dyn Document<'a>>) -> Box<dyn Document<'a>+'a>
{
fn parse<'a>(
&self,
source: Rc<dyn Source>,
parent: Option<&'a dyn Document<'a>>,
) -> Box<dyn Document<'a> + 'a> {
let doc = LangDocument::new(source.clone(), parent);
let mut matches = Vec::new();
for _ in 0..self.rules.len() {
@ -144,53 +178,59 @@ impl Parser for LangParser
let content = source.content();
let mut cursor = Cursor::new(0usize, doc.source()); // Cursor in file
if let Some(parent) = parent // Terminate parent's paragraph state
if let Some(parent) = parent
// Terminate parent's paragraph state
{
self.handle_reports(parent.source(),
self.state_mut().on_scope_end(self, parent, super::state::Scope::PARAGRAPH));
self.handle_reports(
parent.source(),
self.state_mut()
.on_scope_end(self, parent, super::state::Scope::PARAGRAPH),
);
}
loop
{
loop {
let (rule_pos, rule, match_data) = self.update_matches(&cursor, &mut matches);
// Unmatched content
let text_content = util::process_text(&doc, &content.as_str()[cursor.pos..rule_pos.pos]);
if !text_content.is_empty()
{
self.push(&doc, Box::new(Text::new(
Token::new(cursor.pos..rule_pos.pos, source.clone()),
text_content
)));
let text_content =
util::process_text(&doc, &content.as_str()[cursor.pos..rule_pos.pos]);
if !text_content.is_empty() {
self.push(
&doc,
Box::new(Text::new(
Token::new(cursor.pos..rule_pos.pos, source.clone()),
text_content,
)),
);
}
if let Some(rule) = rule
{
if let Some(rule) = rule {
// Rule callback
let dd: &'a dyn Document = unsafe {std::mem::transmute(&doc as &dyn Document)};
let dd: &'a dyn Document = unsafe { std::mem::transmute(&doc as &dyn Document) };
let (new_cursor, reports) = rule.on_match(self, dd, rule_pos, match_data);
self.handle_reports(doc.source(), reports);
// Advance
cursor = new_cursor;
}
else // No rules left
} else
// No rules left
{
break;
}
}
// State
self.handle_reports(doc.source(),
self.state_mut().on_scope_end(self, &doc, super::state::Scope::DOCUMENT));
self.handle_reports(
doc.source(),
self.state_mut()
.on_scope_end(self, &doc, super::state::Scope::DOCUMENT),
);
return Box::new(doc);
}
fn parse_into<'a>(&self, source: Rc<dyn Source>, document: &'a dyn Document<'a>)
{
fn parse_into<'a>(&self, source: Rc<dyn Source>, document: &'a dyn Document<'a>) {
let mut matches = Vec::new();
for _ in 0..self.rules.len() {
matches.push((0usize, None));
@ -199,22 +239,23 @@ impl Parser for LangParser
let content = source.content();
let mut cursor = Cursor::new(0usize, source.clone());
loop
{
loop {
let (rule_pos, rule, match_data) = self.update_matches(&cursor, &mut matches);
// Unmatched content
let text_content = util::process_text(document, &content.as_str()[cursor.pos..rule_pos.pos]);
if !text_content.is_empty()
{
self.push(document, Box::new(Text::new(
Token::new(cursor.pos..rule_pos.pos, source.clone()),
text_content
)));
let text_content =
util::process_text(document, &content.as_str()[cursor.pos..rule_pos.pos]);
if !text_content.is_empty() {
self.push(
document,
Box::new(Text::new(
Token::new(cursor.pos..rule_pos.pos, source.clone()),
text_content,
)),
);
}
if let Some(rule) = rule
{
if let Some(rule) = rule {
// Rule callback
let (new_cursor, reports) = (*rule).on_match(self, document, rule_pos, match_data);
@ -222,8 +263,8 @@ impl Parser for LangParser
// Advance
cursor = new_cursor;
}
else // No rules left
} else
// No rules left
{
break;
}
@ -232,24 +273,19 @@ impl Parser for LangParser
// State
//self.handle_reports(source.clone(),
// self.state_mut().on_scope_end(&self, &document, super::state::Scope::DOCUMENT));
//return doc;
}
}
impl KernelHolder for LangParser
{
fn get_kernel(&self, name: &str)
-> Option<RefMut<'_, Kernel>> {
RefMut::filter_map(self.kernels.borrow_mut(),
|map| map.get_mut(name)).ok()
}
impl KernelHolder for LangParser {
fn get_kernel(&self, name: &str) -> Option<RefMut<'_, Kernel>> {
RefMut::filter_map(self.kernels.borrow_mut(), |map| map.get_mut(name)).ok()
}
fn insert_kernel(&self, name: String, kernel: Kernel)
-> RefMut<'_, Kernel> {
//TODO do not get
self.kernels.borrow_mut()
.insert(name.clone(), kernel);
fn insert_kernel(&self, name: String, kernel: Kernel) -> RefMut<'_, Kernel> {
//TODO do not get
self.kernels.borrow_mut().insert(name.clone(), kernel);
self.get_kernel(name.as_str()).unwrap()
}
}
}

View file

@ -1,6 +1,6 @@
pub mod source;
pub mod parser;
pub mod langparser;
pub mod parser;
pub mod rule;
pub mod source;
pub mod state;
pub mod util;

View file

@ -1,19 +1,20 @@
use std::any::Any;
use std::cell::{Ref, RefMut};
use std::cell::Ref;
use std::cell::RefMut;
use std::rc::Rc;
use unicode_segmentation::UnicodeSegmentation;
use super::rule::Rule;
use super::source::{Cursor, Source};
use super::source::Cursor;
use super::source::Source;
use super::state::StateHolder;
use crate::document::document::Document;
use crate::document::element::Element;
use ariadne::Color;
use crate::lua::kernel::KernelHolder;
use ariadne::Color;
#[derive(Debug)]
pub struct ReportColors
{
pub struct ReportColors {
pub error: Color,
pub warning: Color,
pub info: Color,
@ -40,8 +41,7 @@ impl ReportColors {
}
}
pub trait Parser: KernelHolder
{
pub trait Parser: KernelHolder {
/// Gets the colors for formatting errors
///
/// When colors are disabled, all colors should resolve to empty string
@ -50,33 +50,40 @@ pub trait Parser: KernelHolder
fn rules(&self) -> &Vec<Box<dyn Rule>>;
fn rules_mut(&mut self) -> &mut Vec<Box<dyn Rule>>;
fn add_rule(&mut self, rule: Box<dyn Rule>, after: Option<&'static str>) -> Result<(), String>
{
fn add_rule(&mut self, rule: Box<dyn Rule>, after: Option<&'static str>) -> Result<(), String> {
// Error on duplicate rule
let rule_name = (*rule).name();
if let Err(e) = self.rules().iter().try_for_each(|rule| {
if (*rule).name() != rule_name { return Ok(()); }
return Err(format!("Attempted to introduce duplicate rule: `{rule_name}`"));
})
{
return Err(e)
if (*rule).name() != rule_name {
return Ok(());
}
return Err(format!(
"Attempted to introduce duplicate rule: `{rule_name}`"
));
}) {
return Err(e);
}
match after
{
match after {
Some(name) => {
let before = self.rules().iter()
let before = self
.rules()
.iter()
.enumerate()
.find(|(_pos, r)| (r).name() == name);
match before
{
Some((pos, _)) => self.rules_mut().insert(pos+1, rule),
_ => return Err(format!("Unable to find rule named `{name}`, to insert rule `{}` after it", rule.name()))
match before {
Some((pos, _)) => self.rules_mut().insert(pos + 1, rule),
_ => {
return Err(format!(
"Unable to find rule named `{name}`, to insert rule `{}` after it",
rule.name()
))
}
}
}
_ => self.rules_mut().push(rule)
_ => self.rules_mut().push(rule),
}
Ok(())
@ -85,72 +92,89 @@ pub trait Parser: KernelHolder
fn state(&self) -> Ref<'_, StateHolder>;
fn state_mut(&self) -> RefMut<'_, StateHolder>;
fn has_error(&self) -> bool;
// Update [`matches`] and returns the position of the next matched rule.
// If rule is empty, it means that there are no rules left to parse (i.e
// end of document).
fn update_matches(&self, cursor: &Cursor, matches: &mut Vec<(usize, Option<Box<dyn Any>>)>)
-> (Cursor, Option<&Box<dyn Rule>>, Option<Box<dyn Any>>)
{
fn update_matches(
&self,
cursor: &Cursor,
matches: &mut Vec<(usize, Option<Box<dyn Any>>)>,
) -> (Cursor, Option<&Box<dyn Rule>>, Option<Box<dyn Any>>) {
// Update matches
// TODO: Trivially parellalizable
self.rules().iter().zip(matches.iter_mut()).for_each(
|(rule, (matched_at, match_data))| {
// Don't upate if not stepped over yet
if *matched_at > cursor.pos { return }
(*matched_at, *match_data) = match rule.next_match(cursor)
{
None => (usize::MAX, None),
Some((mut pos, mut data)) =>
{
// Check if escaped
while pos != usize::MAX
{
let content = cursor.source.content().as_str();
let mut graphemes = content[0 .. pos].graphemes(true);
let mut escaped = false;
'inner: loop
{
let g = graphemes.next_back();
if !g.is_some() || g.unwrap() != "\\" { break 'inner; }
escaped = !escaped;
}
if !escaped { break; }
// Find next potential match
(pos, data) = match rule.next_match(&cursor.at(pos+1)) {
Some((new_pos, new_data)) => (new_pos, new_data),
None => (usize::MAX, data) // Stop iterating
}
}
(pos, (pos != usize::MAX).then_some(data))
self.rules()
.iter()
.zip(matches.iter_mut())
.for_each(|(rule, (matched_at, match_data))| {
// Don't upate if not stepped over yet
if *matched_at > cursor.pos {
return;
}
}
});
(*matched_at, *match_data) = match rule.next_match(cursor) {
None => (usize::MAX, None),
Some((mut pos, mut data)) => {
// Check if escaped
while pos != usize::MAX {
let content = cursor.source.content().as_str();
let mut graphemes = content[0..pos].graphemes(true);
let mut escaped = false;
'inner: loop {
let g = graphemes.next_back();
if !g.is_some() || g.unwrap() != "\\" {
break 'inner;
}
escaped = !escaped;
}
if !escaped {
break;
}
// Find next potential match
(pos, data) = match rule.next_match(&cursor.at(pos + 1)) {
Some((new_pos, new_data)) => (new_pos, new_data),
None => (usize::MAX, data), // Stop iterating
}
}
(pos, (pos != usize::MAX).then_some(data))
}
}
});
// Get winning match
let (winner, (next_pos, _match_data)) = matches.iter()
let (winner, (next_pos, _match_data)) = matches
.iter()
.enumerate()
.min_by_key(|(_, (pos, _match_data))| pos).unwrap();
if *next_pos == usize::MAX // No rule has matched
.min_by_key(|(_, (pos, _match_data))| pos)
.unwrap();
if *next_pos == usize::MAX
// No rule has matched
{
let content = cursor.source.content();
// No winners, i.e no matches left
return (cursor.at(content.len()), None, None);
}
(cursor.at(*next_pos),
Some(&self.rules()[winner]),
std::mem::replace(&mut matches[winner].1, None))
(
cursor.at(*next_pos),
Some(&self.rules()[winner]),
std::mem::replace(&mut matches[winner].1, None),
)
}
/// Add an [`Element`] to the [`Document`]
fn push<'a>(&self, doc: &dyn Document, elem: Box<dyn Element>);
/// Parse [`Source`] into a new [`Document`]
fn parse<'a>(&self, source: Rc<dyn Source>, parent: Option<&'a dyn Document<'a>>) -> Box<dyn Document<'a>+'a>;
fn parse<'a>(
&self,
source: Rc<dyn Source>,
parent: Option<&'a dyn Document<'a>>,
) -> Box<dyn Document<'a> + 'a>;
/// Parse [`Source`] into an already existing [`Document`]
fn parse_into<'a>(&self, source: Rc<dyn Source>, document: &'a dyn Document<'a>);

View file

@ -1,8 +1,11 @@
use super::parser::Parser;
use super::source::{Cursor, Source, Token};
use ariadne::Report;
use mlua::{Function, Lua};
use super::source::Cursor;
use super::source::Source;
use super::source::Token;
use crate::document::document::Document;
use ariadne::Report;
use mlua::Function;
use mlua::Lua;
use std::any::Any;
use std::ops::Range;
@ -14,16 +17,21 @@ pub trait Rule {
/// Finds the next match starting from [`cursor`]
fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box<dyn Any>)>;
/// Callback when rule matches
fn on_match<'a>(&self, parser: &dyn Parser, document: &'a (dyn Document<'a>+'a), cursor: Cursor, match_data: Option<Box<dyn Any>>) -> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>);
fn on_match<'a>(
&self,
parser: &dyn Parser,
document: &'a (dyn Document<'a> + 'a),
cursor: Cursor,
match_data: Option<Box<dyn Any>>,
) -> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>);
/// Export bindings to lua
fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)>;
}
impl core::fmt::Debug for dyn Rule
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
impl core::fmt::Debug for dyn Rule {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Rule{{{}}}", self.name())
}
}
}
/*
@ -38,7 +46,7 @@ pub trait RegexRule: Rule
}
impl<T: RegexRule> Rule for T {
fn name(&self) -> &'static str { RegexRule::name(self) }
fn name(&self) -> &'static str { RegexRule::name(self) }
/// Finds the next match starting from [`cursor`]
fn next_match<'a>(&self, cursor: &'a Cursor) -> Option<usize>
@ -65,53 +73,78 @@ impl<T: RegexRule> Rule for T {
}
*/
pub trait RegexRule
{
pub trait RegexRule {
fn name(&self) -> &'static str;
/// Returns the rule's regexes
fn regexes(&self) -> &[regex::Regex];
/// Callback on regex rule match
fn on_regex_match<'a>(&self, index: usize, parser: &dyn Parser, document: &'a (dyn Document<'a>+'a), token: Token, matches: regex::Captures) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>;
fn on_regex_match<'a>(
&self,
index: usize,
parser: &dyn Parser,
document: &'a (dyn Document<'a> + 'a),
token: Token,
matches: regex::Captures,
) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>;
fn lua_bindings<'lua>(&self, _lua: &'lua Lua) -> Vec<(String, Function<'lua>)>;
}
impl<T: RegexRule> Rule for T {
fn name(&self) -> &'static str { RegexRule::name(self) }
/// Finds the next match starting from [`cursor`]
fn next_match(&self, cursor: &Cursor)
-> Option<(usize, Box<dyn Any>)> {
let content = cursor.source.content();
let mut found: Option<(usize, usize)> = None;
self.regexes().iter().enumerate().for_each(|(id, re)| {
if let Some(m) = re.find_at(content.as_str(), cursor.pos)
{
found = found
.and_then(|(f_pos, f_id)|
if f_pos > m.start() { Some((m.start(), id)) } else { Some((f_pos, f_id)) } )
.or(Some((m.start(), id)));
}
});
return found.map(|(pos, id)|
(pos, Box::new(id) as Box<dyn Any>));
fn name(&self) -> &'static str {
RegexRule::name(self)
}
fn on_match<'a>(&self, parser: &dyn Parser, document: &'a (dyn Document<'a>+'a), cursor: Cursor, match_data: Option<Box<dyn Any>>)
-> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>) {
/// Finds the next match starting from [`cursor`]
fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box<dyn Any>)> {
let content = cursor.source.content();
let index = unsafe { match_data.unwrap_unchecked().downcast::<usize>().unwrap_unchecked() };
let mut found: Option<(usize, usize)> = None;
self.regexes().iter().enumerate().for_each(|(id, re)| {
if let Some(m) = re.find_at(content.as_str(), cursor.pos) {
found = found
.and_then(|(f_pos, f_id)| {
if f_pos > m.start() {
Some((m.start(), id))
} else {
Some((f_pos, f_id))
}
})
.or(Some((m.start(), id)));
}
});
return found.map(|(pos, id)| (pos, Box::new(id) as Box<dyn Any>));
}
fn on_match<'a>(
&self,
parser: &dyn Parser,
document: &'a (dyn Document<'a> + 'a),
cursor: Cursor,
match_data: Option<Box<dyn Any>>,
) -> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>) {
let content = cursor.source.content();
let index = unsafe {
match_data
.unwrap_unchecked()
.downcast::<usize>()
.unwrap_unchecked()
};
let re = &self.regexes()[*index];
let captures = re.captures_at(content.as_str(), cursor.pos).unwrap();
let token = Token::new(captures.get(0).unwrap().range(), cursor.source.clone());
let token_end = token.end();
return (cursor.at(token_end), self.on_regex_match(*index, parser, document, token, captures));
return (
cursor.at(token_end),
self.on_regex_match(*index, parser, document, token, captures),
);
}
fn lua_bindings<'lua>(&self, lua: &'lua Lua) -> Vec<(String, Function<'lua>)> { self.lua_bindings(lua) }
fn lua_bindings<'lua>(&self, lua: &'lua Lua) -> Vec<(String, Function<'lua>)> {
self.lua_bindings(lua)
}
}

View file

@ -1,78 +1,74 @@
use std::{fs, ops::Range, rc::Rc};
use core::fmt::Debug;
use std::fs;
use std::ops::Range;
use std::rc::Rc;
use downcast_rs::{impl_downcast, Downcast};
use serde::{Deserialize, Serialize};
use downcast_rs::impl_downcast;
use downcast_rs::Downcast;
/// Trait for source content
pub trait Source: Downcast
{
/// Gets the source's location
fn location(&self) -> Option<&Token>;
/// Gets the source's name
fn name(&self) -> &String;
/// Gets the source's content
fn content(&self) -> &String;
pub trait Source: Downcast {
/// Gets the source's location
fn location(&self) -> Option<&Token>;
/// Gets the source's name
fn name(&self) -> &String;
/// Gets the source's content
fn content(&self) -> &String;
}
impl_downcast!(Source);
impl core::fmt::Display for dyn Source
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
impl core::fmt::Display for dyn Source {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name())
}
}
}
impl core::fmt::Debug for dyn Source
{
impl core::fmt::Debug for dyn Source {
// TODO
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Source{{{}}}", self.name())
}
}
}
impl std::cmp::PartialEq for dyn Source
{
fn eq(&self, other: &Self) -> bool {
self.name() == other.name()
}
impl std::cmp::PartialEq for dyn Source {
fn eq(&self, other: &Self) -> bool {
self.name() == other.name()
}
}
impl std::cmp::Eq for dyn Source {}
impl std::hash::Hash for dyn Source
{
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
impl std::hash::Hash for dyn Source {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.name().hash(state)
}
}
}
pub struct SourceFile
{
location: Option<Token>,
path: String,
content: String,
pub struct SourceFile {
location: Option<Token>,
path: String,
content: String,
}
impl SourceFile
{
// TODO: Create a SourceFileRegistry holding already loaded files to avoid reloading them
pub fn new(path: String, location: Option<Token>) -> Result<Self, String>
{
match fs::read_to_string(&path)
{
Err(_) => return Err(String::from(format!("Unable to read file content: `{}`", path))),
Ok(content) => Ok(Self{
location,
path,
content,
}),
impl SourceFile {
// TODO: Create a SourceFileRegistry holding already loaded files to avoid reloading them
pub fn new(path: String, location: Option<Token>) -> Result<Self, String> {
match fs::read_to_string(&path) {
Err(_) => {
return Err(String::from(format!(
"Unable to read file content: `{}`",
path
)))
}
Ok(content) => Ok(Self {
location,
path,
content,
}),
}
}
}
pub fn with_content(path: String, content: String, location: Option<Token>) -> Self
{
pub fn with_content(path: String, content: String, location: Option<Token>) -> Self {
Self {
location: location,
path: path,
@ -81,38 +77,48 @@ impl SourceFile
}
}
impl Source for SourceFile
{
fn location(&self) -> Option<&Token> { self.location.as_ref() }
fn name(&self) -> &String { &self.path }
fn content(&self) -> &String { &self.content }
impl Source for SourceFile {
fn location(&self) -> Option<&Token> {
self.location.as_ref()
}
fn name(&self) -> &String {
&self.path
}
fn content(&self) -> &String {
&self.content
}
}
pub struct VirtualSource
{
pub struct VirtualSource {
location: Token,
name: String,
content: String,
name: String,
content: String,
}
impl VirtualSource
{
pub fn new(location: Token, name: String, content: String) -> Self
{
Self { location, name, content }
}
impl VirtualSource {
pub fn new(location: Token, name: String, content: String) -> Self {
Self {
location,
name,
content,
}
}
}
impl Source for VirtualSource
{
fn location(&self) -> Option<&Token> { Some(&self.location) }
fn name(&self) -> &String { &self.name }
fn content(&self) -> &String { &self.content }
impl Source for VirtualSource {
fn location(&self) -> Option<&Token> {
Some(&self.location)
}
fn name(&self) -> &String {
&self.name
}
fn content(&self) -> &String {
&self.content
}
}
#[derive(Debug)]
pub struct Cursor
{
pub struct Cursor {
pub pos: usize,
pub source: Rc<dyn Source>,
}
@ -122,9 +128,8 @@ impl Cursor {
Self { pos, source }
}
/// Creates [`cursor`] at [`new_pos`] in the same [`file`]
pub fn at(&self, new_pos: usize) -> Self
{
/// Creates [`cursor`] at [`new_pos`] in the same [`file`]
pub fn at(&self, new_pos: usize) -> Self {
Self {
pos: new_pos,
source: self.source.clone(),
@ -132,8 +137,7 @@ impl Cursor {
}
}
impl Clone for Cursor
{
impl Clone for Cursor {
fn clone(&self) -> Self {
Self {
pos: self.pos,
@ -147,41 +151,35 @@ impl Clone for Cursor
}
#[derive(Debug, Clone)]
pub struct Token
{
pub range: Range<usize>,
pub struct Token {
pub range: Range<usize>,
source: Rc<dyn Source>,
}
impl Token
{
impl Token {
pub fn new(range: Range<usize>, source: Rc<dyn Source>) -> Self {
Self { range, source }
}
pub fn source(&self) -> Rc<dyn Source>
{
return self.source.clone()
pub fn source(&self) -> Rc<dyn Source> {
return self.source.clone();
}
/// Construct Token from a range
pub fn from(start: &Cursor, end: &Cursor) -> Self
{
assert!(Rc::ptr_eq(&start.source, &end.source));
/// Construct Token from a range
pub fn from(start: &Cursor, end: &Cursor) -> Self {
assert!(Rc::ptr_eq(&start.source, &end.source));
Self {
range: start.pos .. end.pos,
source: start.source.clone()
}
}
Self {
range: start.pos..end.pos,
source: start.source.clone(),
}
}
pub fn start(&self) -> usize
{
pub fn start(&self) -> usize {
return self.range.start;
}
pub fn end(&self) -> usize
{
pub fn end(&self) -> usize {
return self.range.end;
}
}

View file

@ -1,16 +1,20 @@
use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
use std::cell::RefCell;
use std::collections::HashMap;
use std::ops::Range;
use std::rc::Rc;
use ariadne::Report;
use downcast_rs::{impl_downcast, Downcast};
use downcast_rs::impl_downcast;
use downcast_rs::Downcast;
use crate::document::document::Document;
use super::{parser::Parser, source::Source};
use super::parser::Parser;
use super::source::Source;
/// Scope for state objects
#[derive(PartialEq, PartialOrd, Debug)]
pub enum Scope
{
pub enum Scope {
/// Global state
GLOBAL = 0,
/// Document-local state
@ -21,32 +25,32 @@ pub enum Scope
PARAGRAPH = 2,
}
pub trait State: Downcast
{
pub trait State: Downcast {
/// Returns the state's [`Scope`]
fn scope(&self) -> Scope;
/// Callback called when state goes out of scope
fn on_remove<'a>(&self, parser: &dyn Parser, document: &dyn Document) -> Vec<Report<'a, (Rc<dyn Source>, Range<usize>)>>;
fn on_remove<'a>(
&self,
parser: &dyn Parser,
document: &dyn Document,
) -> Vec<Report<'a, (Rc<dyn Source>, Range<usize>)>>;
}
impl_downcast!(State);
impl core::fmt::Debug for dyn State
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
impl core::fmt::Debug for dyn State {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "State{{Scope: {:#?}}}", self.scope())
}
}
}
/// Object owning all the states
#[derive(Debug)]
pub struct StateHolder
{
data: HashMap<String, Rc<RefCell<dyn State>>>
pub struct StateHolder {
data: HashMap<String, Rc<RefCell<dyn State>>>,
}
impl StateHolder
{
impl StateHolder {
pub fn new() -> Self {
Self {
data: HashMap::new(),
@ -54,41 +58,41 @@ impl StateHolder
}
// Attempts to push [`state`]. On collision, returns an error with the already present state
pub fn insert(&mut self, name: String, state: Rc<RefCell<dyn State>>) -> Result<Rc<RefCell<dyn State>>, Rc<RefCell<dyn State>>>
{
match self.data.insert(name, state.clone())
{
pub fn insert(
&mut self,
name: String,
state: Rc<RefCell<dyn State>>,
) -> Result<Rc<RefCell<dyn State>>, Rc<RefCell<dyn State>>> {
match self.data.insert(name, state.clone()) {
Some(state) => Err(state),
_ => Ok(state)
_ => Ok(state),
}
}
pub fn query(&self, name: &String) -> Option<Rc<RefCell<dyn State>>>
{
self.data
.get(name)
.map_or(None, |st| Some(st.clone()))
pub fn query(&self, name: &String) -> Option<Rc<RefCell<dyn State>>> {
self.data.get(name).map_or(None, |st| Some(st.clone()))
}
pub fn on_scope_end(&mut self, parser: &dyn Parser, document: &dyn Document, scope: Scope) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>
{
pub fn on_scope_end(
&mut self,
parser: &dyn Parser,
document: &dyn Document,
scope: Scope,
) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut result = vec![];
self.data
.retain(|_name, state|
{
if state.borrow().scope() >= scope
{
state.borrow().on_remove(parser, document)
.drain(..)
.for_each(|report| result.push(report));
false
}
else
{
true
}
});
self.data.retain(|_name, state| {
if state.borrow().scope() >= scope {
state
.borrow()
.on_remove(parser, document)
.drain(..)
.for_each(|report| result.push(report));
false
} else {
true
}
});
return result;
}

View file

@ -2,13 +2,10 @@ use std::collections::HashMap;
use unicode_segmentation::UnicodeSegmentation;
use crate::{
document::{
document::{Document, DocumentAccessors},
element::ElemKind,
},
elements::paragraph::Paragraph,
};
use crate::document::document::Document;
use crate::document::document::DocumentAccessors;
use crate::document::element::ElemKind;
use crate::elements::paragraph::Paragraph;
/// Processes text for escape characters and paragraphing
pub fn process_text(document: &dyn Document, content: &str) -> String {
@ -360,11 +357,12 @@ impl PropertyParser {
#[cfg(test)]
mod tests {
use super::*;
use crate::{
document::langdocument::LangDocument,
elements::{comment::Comment, style::Style, text::Text},
parser::source::{SourceFile, Token},
};
use crate::document::langdocument::LangDocument;
use crate::elements::comment::Comment;
use crate::elements::style::Style;
use crate::elements::text::Text;
use crate::parser::source::SourceFile;
use crate::parser::source::Token;
use std::rc::Rc;
#[test]
@ -387,6 +385,13 @@ mod tests {
let tok = Token::new(0..0, source);
doc.push(Box::new(Paragraph::new(tok.clone())));
// Comments are ignored (kind => Invisible)
(&doc as &dyn Document)
.last_element_mut::<Paragraph>()
.unwrap()
.push(Box::new(Comment::new(tok.clone(), "COMMENT".to_string())));
assert_eq!(process_text(&doc, "\na"), "a");
// A space is appended as previous element is inline
(&doc as &dyn Document)
.last_element_mut::<Paragraph>()
@ -399,13 +404,6 @@ mod tests {
.unwrap()
.push(Box::new(Style::new(tok.clone(), 0, false)));
assert_eq!(process_text(&doc, "\na"), " a");
// Comments are ignored (kind => Invisible)
(&doc as &dyn Document)
.last_element_mut::<Paragraph>()
.unwrap()
.push(Box::new(Comment::new(tok.clone(), "COMMENT".to_string())));
assert_eq!(process_text(&doc, "\na"), " a");
}
#[test]