Features & slight refector

This commit is contained in:
ef3d0c3e 2024-07-19 11:52:12 +02:00
parent b076958893
commit 012e7dbee6
45 changed files with 4863 additions and 419 deletions

View file

@ -1,11 +1,37 @@
[package]
name = "rust_learn"
name = "nml"
version = "0.1.0"
edition = "2021"
license = "GNU AGPL3"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[[bin]]
name = "nml"
path = "src/main.rs"
[[bin]]
name = "nmlls"
path = "src/server.rs"
[profile.profiling]
inherits = "release"
debug = true
[dependencies]
crossbeam-utils = "0.8.19"
files = "2.2.3"
ariadne = "0.4.1"
dashmap = "6.0.1"
downcast-rs = "1.2.1"
getopts = "0.2.21"
lazy_static = "1.5.0"
lsp-server = "0.7.6"
lsp-types = "0.97.0"
mlua = { version = "0.9.9", features = ["lua54", "vendored"] }
regex = "1.10.3"
rusqlite = "0.31.0"
rust-crypto = "0.2.36"
serde = "1.0.204"
serde_json = "1.0.120"
syntect = "5.2.0"
tokio = { version = "1.38.1", features = ["macros", "rt-multi-thread", "io-std"]}
tower-lsp = "0.20.0"
unicode-segmentation = "1.11.0"

97
src/cache/cache.rs vendored Normal file
View file

@ -0,0 +1,97 @@
use std::{error::Error, path::PathBuf};
use rusqlite::{types::FromSql, Connection, Params, ToSql};
struct Cache {
con: Connection
}
impl Cache {
fn new(file: PathBuf) -> Result<Self, String> {
match Connection::open(file)
{
Err(e) => return Err(format!("Could not connect to cache database: {}", e.to_string())),
Ok(con) => Ok(Self { con })
}
}
}
pub enum CachedError<E>
{
SqlErr(rusqlite::Error),
GenErr(E)
}
pub trait Cached
{
type Key;
type Value;
/// SQL Query to create the cache table
/// Note: You must use `IF NOT EXIST`
fn sql_table() -> &'static str;
/// SQL Get query
fn sql_get_query() -> &'static str;
/// SQL insert query
fn sql_insert_query() -> &'static str;
fn key(&self) -> <Self as Cached>::Key;
fn init(con: &mut Connection) -> Result<(), rusqlite::Error>
{
con.execute(<Self as Cached>::sql_table(), ())
.map(|_| ())
}
fn cached<E, F>(&self, con: &mut Connection, f: F)
-> Result<<Self as Cached>::Value, CachedError<E>>
where
<Self as Cached>::Key: ToSql,
<Self as Cached>::Value: FromSql + ToSql,
F: FnOnce(&Self) -> Result<<Self as Cached>::Value, E>,
{
let key = self.key();
// Find in cache
let mut query = match con.prepare(<Self as Cached>::sql_get_query())
{
Ok(query) => query,
Err(e) => return Err(CachedError::SqlErr(e))
};
let value = query.query_row([&key], |row|
{
Ok(row.get_unwrap::<_, <Self as Cached>::Value>(0))
}).ok();
if let Some(value) = value
{
// Found in cache
return Ok(value)
}
else
{
// Compute a value
let value = match f(&self)
{
Ok(val) => val,
Err(e) => return Err(CachedError::GenErr(e))
};
// Try to insert
let mut query = match con.prepare(<Self as Cached>::sql_insert_query())
{
Ok(query) => query,
Err(e) => return Err(CachedError::SqlErr(e))
};
match query.execute((&key, &value))
{
Ok(_) => Ok(value),
Err(e) => Err(CachedError::SqlErr(e))
}
}
}
}

1
src/cache/mod.rs vendored Normal file
View file

@ -0,0 +1 @@
pub mod cache;

153
src/compiler/compiler.rs Normal file
View file

@ -0,0 +1,153 @@
use std::{cell::{RefCell, RefMut}, rc::Rc};
use rusqlite::Connection;
use crate::document::{document::Document, variable::Variable};
#[derive(Clone, Copy)]
pub enum Target
{
HTML,
LATEX,
}
pub struct Compiler
{
target: Target,
cache: Option<RefCell<Connection>>,
}
impl Compiler
{
pub fn new(target: Target, db_path: Option<String>) -> Self {
let cache = match db_path
{
None => None,
Some(path) => {
match Connection::open(path)
{
Err(e) => panic!("Cannot connect to database: {e}"),
Ok(con) => Some(con),
}
}
};
Self {
target,
cache: cache.map(|con| RefCell::new(con)),
}
}
pub fn target(&self) -> Target
{
self.target
}
pub fn cache(&self) -> Option<RefMut<'_, Connection>>
{
self.cache
.as_ref()
.map(RefCell::borrow_mut)
}
pub fn sanitize<S: AsRef<str>>(&self, str: S) -> String {
match self.target
{
Target::HTML => str.as_ref()
.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace("\"", "&quot;"),
_ => todo!("Sanitize not implemented")
}
}
pub fn header(&self, document: &Document) -> String
{
pub fn get_variable_or_error(document: &Document, var_name: &'static str) -> Option<Rc<dyn Variable>>
{
document.get_variable(var_name)
.and_then(|(_, var)| Some(var))
.or_else(|| {
println!("Missing variable `{var_name}` in {}", document.source().name());
None
})
}
let mut result = String::new();
match self.target()
{
Target::HTML => {
result += "<!DOCTYPE HTML><html><head>";
result += "<meta charset=\"UTF-8\">";
if let Some(page_title) = get_variable_or_error(document, "html.page_title")
{
result += format!("<title>{}</title>", self.sanitize(page_title.to_string())).as_str();
}
if let Some((_, css)) = document.get_variable("html.css")
{
result += format!("<link rel=\"stylesheet\" href=\"{}\">", self.sanitize(css.to_string())).as_str();
}
result += "</head><body>";
// TODO: TOC
// TODO: Author, Date, Title, Div
},
Target::LATEX => {
},
}
result
}
pub fn footer(&self, _document: &Document) -> String
{
let mut result = String::new();
match self.target()
{
Target::HTML => {
result += "</body></html>";
},
Target::LATEX => {
},
}
result
}
pub fn compile(&self, document: &Document) -> String
{
let mut out = String::new();
let borrow = document.content.borrow();
// Header
out += self.header(document).as_str();
// Body
for i in 0 .. borrow.len()
{
let elem = &borrow[i];
//let prev = match i
//{
// 0 => None,
// _ => borrow.get(i-1),
//};
//let next = borrow.get(i+1);
match elem.compile(self, document)
{
Ok(result) => {
//println!("Elem: {}\nCompiled to: {result}", elem.to_string());
out.push_str(result.as_str())
},
Err(err) => println!("Unable to compile element: {err}\n{}", elem.to_string())
}
}
// Footer
out += self.footer(document).as_str();
out
}
}

1
src/compiler/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod compiler;

210
src/document/document.rs Normal file
View file

@ -0,0 +1,210 @@
use std::cell::{Ref, RefCell, RefMut};
use std::collections::hash_map::HashMap;
use std::rc::Rc;
use crate::parser::source::Source;
use super::element::Element;
use super::variable::Variable;
#[derive(Debug)]
pub struct Scope {
/// List of all referenceable elements in current scope.
/// All elements in this should return a non empty
pub referenceable: HashMap<String, usize>,
pub variables: HashMap<String, Rc<dyn Variable>>,
}
impl Scope {
fn new() -> Self {
Self {
referenceable: HashMap::new(),
variables: HashMap::new(),
}
}
pub fn merge(&mut self, other: &mut Scope, merge_as: &String, ref_offset: usize)
{
match merge_as.is_empty()
{
true => {
// References
self.referenceable.extend(other.referenceable.drain()
.map(|(name, idx)|
(name, idx+ref_offset)));
// Variables
self.variables.extend(other.variables.drain()
.map(|(name, var)|
(name, var)));
},
false => {
// References
self.referenceable.extend(other.referenceable.drain()
.map(|(name, idx)|
(format!("{merge_as}.{name}"), idx+ref_offset)));
// Variables
self.variables.extend(other.variables.drain()
.map(|(name, var)|
(format!("{merge_as}.{name}"), var)));
}
}
}
}
#[derive(Debug)]
pub struct Document<'a> {
source: Rc<dyn Source>,
parent: Option<&'a Document<'a>>, /// Document's parent
// FIXME: Render these fields private
pub content: RefCell<Vec<Box<dyn Element>>>,
pub scope: RefCell<Scope>,
}
impl<'a> Document<'a> {
pub fn new(source: Rc<dyn Source>, parent: Option<&'a Document<'a>>) -> Self
{
Self {
source: source,
parent: parent,
content: RefCell::new(Vec::new()),
scope: RefCell::new(Scope::new()),
}
}
pub fn source(&self) -> Rc<dyn Source> { self.source.clone() }
pub fn parent(&self) -> Option<&Document> { self.parent }
/// Push an element [`elem`] to content. [`in_paragraph`] is true if a paragraph is active
pub fn push(&self, elem: Box<dyn Element>)
{
// Add index of current element to scope's reference table
if let Some(referenceable) = elem.as_referenceable()
{
// Only add if referenceable holds a reference
if let Some(ref_name) = referenceable.reference_name()
{
self.scope.borrow_mut().referenceable.insert(ref_name.clone(), self.content.borrow().len());
}
}
self.content.borrow_mut().push(elem);
}
pub fn last_element<T: Element>(&self, recurse: bool) -> Option<Ref<'_, T>>
{
let elem = Ref::filter_map(self.content.borrow(),
|content| content.last()
.and_then(|last| last.downcast_ref::<T>())).ok();
if elem.is_some() || !recurse { return elem }
match self.parent
{
None => None,
Some(parent) => parent.last_element(true),
}
}
pub fn last_element_mut<T: Element>(&self, recurse: bool) -> Option<RefMut<'_, T>>
{
let elem = RefMut::filter_map(self.content.borrow_mut(),
|content| content.last_mut()
.and_then(|last| last.downcast_mut::<T>())).ok();
if elem.is_some() || !recurse { return elem }
match self.parent
{
None => None,
Some(parent) => parent.last_element_mut(true),
}
}
pub fn get_reference(&self, ref_name: &str) -> Option<(&Document<'a>, std::cell::Ref<'_, Box<dyn Element>>)> {
match self.scope.borrow().referenceable.get(ref_name) {
// Return if found
Some(elem) => {
return Some((&self,
std::cell::Ref::map(self.content.borrow(),
|m| &m[*elem])))
},
// Continue search recursively
None => match self.parent {
Some(parent) => return parent.get_reference(ref_name),
// Not found
None => return None,
}
}
}
pub fn add_variable(&self, variable: Rc<dyn Variable>)
{
self.scope.borrow_mut().variables.insert(
variable.name().to_string(),
variable);
}
pub fn get_variable<S: AsRef<str>>(&self, name: S) -> Option<(&Document<'a>, Rc<dyn Variable>)>
{
match self.scope.borrow().variables.get(name.as_ref())
{
Some(variable) => {
return Some((&self, variable.clone()));
},
// Continue search recursively
None => match self.parent {
Some(parent) => return parent.get_variable(name),
// Not found
None => return None,
}
}
}
pub fn remove_variable<S: AsRef<str>>(&self, name: S) -> Option<(&Document<'a>, Rc<dyn Variable>)>
{
match self.scope.borrow_mut().variables.remove(name.as_ref())
{
Some(variable) => {
return Some((&self, variable.clone()));
},
// Continue search recursively
None => match self.parent {
Some(parent) => return parent.remove_variable(name),
// Not found
None => return None,
}
}
}
/// Merges [`other`] into [`self`]
pub fn merge(&self, other: Document, merge_as: Option<&String>)
{
match merge_as
{
Some(merge_as) => self.scope.borrow_mut()
.merge(
&mut *other.scope.borrow_mut(),
merge_as,
self.content.borrow().len()+1),
_ => {},
}
// Content
self.content.borrow_mut().extend((other.content.borrow_mut())
.drain(..)
.map(|value| value));
}
}

95
src/document/element.rs Normal file
View file

@ -0,0 +1,95 @@
use std::str::FromStr;
use downcast_rs::{impl_downcast, Downcast};
use crate::{compiler::compiler::Compiler, parser::source::Token};
use super::document::Document;
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum ElemKind {
/// An invisible element (i.e comment)
Invisible,
/// Special elements don't trigger special formatting events
Special,
/// Inline elements don't break paragraphing
Inline,
/// Block elements are outside of paragraphs
Block,
}
impl FromStr for ElemKind {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s
{
"invisible" => Ok(ElemKind::Invisible),
"special" => Ok(ElemKind::Special),
"inline" => Ok(ElemKind::Inline),
"block" => Ok(ElemKind::Block),
_ => Err(format!("Unknown ElemKind: {s}"))
}
}
}
pub trait Element: Downcast
{
/// Gets the element defined location i.e token without filename
fn location(&self) -> &Token;
fn kind(&self) -> ElemKind;
/// Get the element's name
fn element_name(&self) -> &'static str;
/// Outputs element to string for debug purposes
fn to_string(&self) -> String;
fn as_referenceable(&self) -> Option<&dyn ReferenceableElement> { None }
/// Compiles element
fn compile(&self, compiler: &Compiler, document: &Document) -> Result<String, String>;
}
impl_downcast!(Element);
impl core::fmt::Debug for dyn Element
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.to_string())
}
}
pub trait ReferenceableElement : Element {
/// Reference name
fn reference_name(&self) -> Option<&String>;
}
#[derive(Debug)]
pub struct Text
{
location: Token,
content: String,
}
impl Text
{
pub fn new(location: Token, content: String) -> Text
{
Text {
location: location,
content: content
}
}
}
impl Element for Text
{
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Inline }
fn element_name(&self) -> &'static str { "Text" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, _document: &Document) -> Result<String, String> {
Ok(compiler.sanitize(self.content.as_str()))
}
}

3
src/document/mod.rs Normal file
View file

@ -0,0 +1,3 @@
pub mod document;
pub mod element;
pub mod variable;

146
src/document/variable.rs Normal file
View file

@ -0,0 +1,146 @@
use std::{path::PathBuf, rc::Rc};
use crate::parser::{parser::Parser, source::{Source, Token, VirtualSource}};
use super::{document::Document, element::Text};
// TODO enforce to_string(from_string(to_string())) == to_string()
pub trait Variable
{
fn location(&self) -> &Token;
fn name(&self) -> &str;
/// Parse variable from string, returns an error message on failure
fn from_string(&mut self, str: &str) -> Option<String>;
/// Converts variable to a string
fn to_string(&self) -> String;
fn parse<'a>(&self, location: Token, parser: &dyn Parser, document: &'a Document);
}
impl core::fmt::Debug for dyn Variable
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}{{{}}}", self.name(), self.to_string())
}
}
#[derive(Debug)]
pub struct BaseVariable
{
location: Token,
name: String,
value: String,
}
impl BaseVariable {
pub fn new(location: Token, name: String, value: String) -> Self {
Self { location, name, value }
}
}
impl Variable for BaseVariable
{
fn location(&self) -> &Token { &self.location }
fn name(&self) -> &str { self.name.as_str() }
fn from_string(&mut self, str: &str) -> Option<String> {
self.value = str.to_string();
None
}
fn to_string(&self) -> String { self.value.clone() }
fn parse<'a>(&self, _location: Token, parser: &dyn Parser, document: &'a Document) {
let source = Rc::new(VirtualSource::new(
self.location().clone(),
self.name().to_string(),
self.to_string()));
parser.parse_into(source, document);
}
}
#[derive(Debug)]
pub struct PathVariable
{
location: Token,
name: String,
path: PathBuf,
}
impl PathVariable
{
pub fn new(location: Token, name: String, path: PathBuf) -> Self {
Self { location, name, path }
}
}
impl Variable for PathVariable
{
fn location(&self) -> &Token { &self.location }
fn name(&self) -> &str { self.name.as_str() }
fn from_string(&mut self, str: &str) -> Option<String> {
self.path = PathBuf::from(std::fs::canonicalize(str).unwrap());
None
}
fn to_string(&self) -> String { self.path.to_str().unwrap().to_string() }
fn parse<'a>(&self, location: Token, parser: &dyn Parser, document: &'a Document){
// TODO: Avoid copying the location twice...
// Maybe create a special VirtualSource where the `content()` method
// calls `Variable::to_string()`
let source = Rc::new(VirtualSource::new(
location.clone(),
self.name().to_string(),
self.to_string()));
parser.push(document, Box::new(Text::new(
Token::new(0..source.content().len(), source),
self.to_string()
)));
}
}
/*
struct ConfigVariable<T>
{
value: T,
name: String,
desc: String,
validator: Box<dyn Fn(&Self, &T) -> Option<&String>>,
}
impl<T> ConfigVariable<T>
{
fn description(&self) -> &String { &self.desc }
}
impl<T> Variable for ConfigVariable<T>
where T: FromStr + Display
{
fn name(&self) -> &str { self.name.as_str() }
/// Parse variable from string, returns an error message on failure
fn from_string(&mut self, str: &str) -> Option<String> {
match str.parse::<T>()
{
Ok(value) => {
(self.validator)(self, &value).or_else(|| {
self.value = value;
None
})
},
Err(_) => return Some(format!("Unable to parse `{str}` into variable `{}`", self.name))
}
}
/// Converts variable to a string
fn to_string(&self) -> String { self.value.to_string() }
}
*/

390
src/elements/code.rs Normal file
View file

@ -0,0 +1,390 @@
use std::{collections::HashMap, ops::Range, rc::Rc, sync::Once};
use ariadne::{Fmt, Label, Report, ReportKind};
use crypto::{digest::Digest, sha2::Sha512};
use regex::{Captures, Regex};
use syntect::{easy::HighlightLines, highlighting::ThemeSet, parsing::SyntaxSet};
use crate::{cache::cache::{Cached, CachedError}, compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util::{self, Property, PropertyParser}}};
use lazy_static::lazy_static;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum CodeKind
{
FullBlock,
MiniBlock,
Inline,
}
#[derive(Debug)]
struct Code
{
location: Token,
block: CodeKind,
language: String,
name: Option<String>,
code: String,
theme: Option<String>,
line_offset: usize,
}
impl Code {
fn new(location: Token, block: CodeKind, language: String, name: Option<String>, code: String, theme: Option<String>, line_offset: usize) -> Self {
Self { location, block, language, name, code, theme, line_offset }
}
fn highlight_html(&self, compiler: &Compiler) -> Result<String, String>
{
lazy_static! {
static ref syntax_set : SyntaxSet = SyntaxSet::load_defaults_newlines();
static ref theme_set : ThemeSet = ThemeSet::load_defaults();
}
let syntax = match syntax_set.find_syntax_by_name(self.language.as_str())
{
Some(syntax) => syntax,
None => return Err(format!("Unable to find syntax for language: {}", self.language))
};
let theme_string = match self.theme.as_ref()
{
Some(theme) => theme.as_str(),
None => "base16-ocean.dark",
};
let mut h = HighlightLines::new(syntax, &theme_set.themes[theme_string]);
let mut result = String::new();
if self.block == CodeKind::FullBlock
{
result += "<div class=\"code-block\">";
if let Some(name) = &self.name
{
result += format!("<div class=\"code-block-title\">{}</div>",
compiler.sanitize(name.as_str())).as_str();
}
result += format!("<div class=\"code-block-content\"><table cellspacing=\"0\">").as_str();
for (line_id, line) in self.code.split(|c| c == '\n').enumerate()
{
result += "<tr><td class=\"code-block-gutter\">";
// Line number
result += format!("<pre><span>{}</span></pre>", line_id+self.line_offset).as_str();
// Code
result += "</td><td class=\"code-block-line\"><pre>";
match h.highlight_line(line, &syntax_set)
{
Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e.to_string())),
Ok(regions) => {
match syntect::html::styled_line_to_highlighted_html(&regions[..], syntect::html::IncludeBackground::No)
{
Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())),
Ok(highlighted) => result += if highlighted.is_empty() { "<br>" } else { highlighted.as_str() }
}
}
}
result += "</pre></td></tr>";
}
result += "</table></div></div>";
}
else if self.block == CodeKind::MiniBlock
{
result += "<div class=\"code-block\"><div class=\"code-block-content\"><table cellspacing=\"0\">";
for line in self.code.split(|c| c == '\n')
{
result += "<tr><td class=\"code-block-line\"><pre>";
// Code
match h.highlight_line(line, &syntax_set)
{
Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e.to_string())),
Ok(regions) => {
match syntect::html::styled_line_to_highlighted_html(&regions[..], syntect::html::IncludeBackground::No)
{
Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())),
Ok(highlighted) => result += if highlighted.is_empty() { "<br>" } else { highlighted.as_str() }
}
}
}
result += "</pre></td></tr>";
}
result += "</table></div></div>";
}
else if self.block == CodeKind::Inline
{
result += "<a class=\"inline-code\"><code>";
match h.highlight_line(self.code.as_str(), &syntax_set)
{
Err(e) => return Err(format!("Error highlighting line `{}`: {}", self.code, e.to_string())),
Ok(regions) => {
match syntect::html::styled_line_to_highlighted_html(&regions[..], syntect::html::IncludeBackground::No)
{
Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())),
Ok(highlighted) => result += highlighted.as_str()
}
}
}
result += "</code></a>";
}
Ok(result)
}
}
impl Cached for Code
{
type Key = String;
type Value = String;
fn sql_table() -> &'static str {
"CREATE TABLE IF NOT EXISTS cached_code (
digest TEXT PRIMARY KEY,
highlighted BLOB NOT NULL);"
}
fn sql_get_query() -> &'static str {
"SELECT highlighted FROM cached_code WHERE digest = (?1)"
}
fn sql_insert_query() -> &'static str {
"INSERT INTO cached_code (digest, highlighted) VALUES (?1, ?2)"
}
fn key(&self) -> <Self as Cached>::Key {
let mut hasher = Sha512::new();
hasher.input((self.block as usize).to_be_bytes().as_slice());
hasher.input((self.line_offset as usize).to_be_bytes().as_slice());
self.theme.as_ref().map(|theme| hasher.input(theme.as_bytes()));
self.name.as_ref().map(|name| hasher.input(name.as_bytes()));
hasher.input(self.language.as_bytes());
hasher.input(self.code.as_bytes());
hasher.result_str()
}
}
impl Element for Code {
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { if self.block == CodeKind::Inline { ElemKind::Inline } else { ElemKind::Block } }
fn element_name(&self) -> &'static str { "Code Block" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, _document: &Document)
-> Result<String, String> {
match compiler.target()
{
Target::HTML => {
static CACHE_INIT : Once = Once::new();
CACHE_INIT.call_once(|| if let Some(mut con) = compiler.cache() {
if let Err(e) = Code::init(&mut con)
{
eprintln!("Unable to create cache table: {e}");
}
});
if let Some(mut con) = compiler.cache()
{
match self.cached(&mut con, |s| s.highlight_html(compiler))
{
Ok(s) => Ok(s),
Err(e) => match e
{
CachedError::SqlErr(e) => Err(format!("Querying the cache failed: {e}")),
CachedError::GenErr(e) => Err(e)
}
}
}
else
{
self.highlight_html(compiler)
}
}
Target::LATEX => { todo!("") }
}
}
}
pub struct CodeRule {
re: [Regex; 2],
properties: PropertyParser,
}
impl CodeRule {
pub fn new() -> Self {
let mut props = HashMap::new();
props.insert("line_offset".to_string(),
Property::new(
true,
"Line number offset".to_string(),
Some("1".to_string())));
Self {
re: [
Regex::new(r"(?:^|\n)```(?:\[((?:\\.|[^\\\\])*?)\])?(.*?)(?:,(.*))?\n((?:\\(?:.|\n)|[^\\\\])*?)```").unwrap(),
Regex::new(r"``(?:\[((?:\\.|[^\[\]\\])*?)\])?(?:(.*)(?:\n|,))?((?:\\(?:.|\n)|[^\\\\])*?)``").unwrap(),
],
properties: PropertyParser::new(props)
}
}
}
impl RegexRule for CodeRule
{
fn name(&self) -> &'static str { "Code" }
fn regexes(&self) -> &[regex::Regex] { &self.re }
fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures)
-> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut reports = vec![];
let properties = match matches.get(1)
{
None => match self.properties.default() {
Ok(properties) => properties,
Err(e) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid code")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Code is missing properties: {e}"))
.with_color(parser.colors().error))
.finish());
return reports;
},
}
Some(props) => {
let processed = util::process_escaped('\\', "]",
props.as_str().trim_start().trim_end());
match self.properties.parse(processed.as_str())
{
Err(e) => {
reports.push(
Report::build(ReportKind::Error, token.source(), props.start())
.with_message("Invalid Code Properties")
.with_label(
Label::new((token.source().clone(), props.range()))
.with_message(e)
.with_color(parser.colors().error))
.finish());
return reports;
}
Ok(properties) => properties
}
}
};
let code_lang = match matches.get(2)
{
None => "Plain Text".to_string(),
Some(lang) => {
let code_lang = lang.as_str().trim_end().trim_start().to_string();
if code_lang.is_empty()
{
reports.push(
Report::build(ReportKind::Error, token.source(), lang.start())
.with_message("Missing code language")
.with_label(
Label::new((token.source().clone(), lang.range()))
.with_message("No language specified")
.with_color(parser.colors().error))
.finish());
return reports;
}
// TODO: validate language
code_lang
}
};
let mut code_content = if index == 0
{ util::process_escaped('\\',"```", matches.get(4).unwrap().as_str()) }
else
{ util::process_escaped('\\',"``", matches.get(3).unwrap().as_str()) };
if code_content.bytes().last() == Some('\n' as u8) // Remove newline
{
code_content.pop();
}
if code_content.is_empty()
{
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Missing code content")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message("Code content cannot be empty")
.with_color(parser.colors().error))
.finish());
return reports;
}
let theme = document.get_variable("code.theme")
.and_then(|(_doc, var)| Some(var.to_string()));
if index == 0 // Block
{
let code_name = matches.get(3)
.and_then(|name| {
let code_name = name.as_str().trim_end().trim_start().to_string();
(!code_name.is_empty()).then_some(code_name)
});
let line_offset = match properties.get("line_offset",
|prop, value| value.parse::<usize>().map_err(|e| (prop, e)))
{
Ok((_prop, offset)) => offset,
Err((prop, e)) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Code Property")
.with_label(
Label::new((token.source().clone(), token.start()+1..token.end()))
.with_message(format!("Property `line_offset: {}` cannot be converted: {}",
prop.fg(parser.colors().info),
e.fg(parser.colors().error)))
.with_color(parser.colors().warning))
.finish());
return reports;
}
};
parser.push(document, Box::new(
Code::new(
token.clone(),
CodeKind::FullBlock,
code_lang,
code_name,
code_content,
theme,
line_offset
)
));
}
else // Maybe inline
{
let block = if code_content.contains('\n') { CodeKind::MiniBlock }
else { CodeKind::Inline };
parser.push(document, Box::new(
Code::new(
token.clone(),
block,
code_lang,
None,
code_content,
theme,
1,
)
));
}
reports
}
}

81
src/elements/comment.rs Normal file
View file

@ -0,0 +1,81 @@
use regex::{Captures, Regex};
use crate::parser::{parser::Parser, rule::RegexRule, source::{Source, Token}};
use ariadne::{Report, Label, ReportKind};
use crate::{compiler::compiler::Compiler, document::{document::Document, element::{ElemKind, Element}}};
use std::{ops::Range, rc::Rc};
#[derive(Debug)]
pub struct Comment {
location: Token,
content: String,
}
impl Comment
{
pub fn new(location: Token, content: String ) -> Self {
Self { location: location, content }
}
}
impl Element for Comment
{
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Invisible }
fn element_name(&self) -> &'static str { "Comment" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, _compiler: &Compiler, _document: &Document)
-> Result<String, String> {
Ok("".to_string())
}
}
pub struct CommentRule {
re: [Regex; 1],
}
impl CommentRule {
pub fn new() -> Self {
Self { re: [Regex::new(r"\s*::(.*)").unwrap()] }
}
}
impl RegexRule for CommentRule {
fn name(&self) -> &'static str { "Comment" }
fn regexes(&self) -> &[Regex] { &self.re }
fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures)
-> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut reports = vec![];
let content = match matches.get(1)
{
None => panic!("Unknown error"),
Some(comment) => {
let trimmed = comment.as_str().trim_start().trim_end().to_string();
if trimmed.is_empty()
{
reports.push(
Report::build(ReportKind::Warning, token.source(), comment.start())
.with_message("Empty comment")
.with_label(
Label::new((token.source(), comment.range()))
.with_message("Comment is empty")
.with_color(parser.colors().warning))
.finish());
}
trimmed
}
};
parser.push(document, Box::new(
Comment::new(
token.clone(),
content
)
));
return reports;
}
}

155
src/elements/import.rs Normal file
View file

@ -0,0 +1,155 @@
use regex::Regex;
use crate::parser::{parser::{Parser, ReportColors}, rule::RegexRule, source::{Source, SourceFile, Token}};
use ariadne::{Report, Fmt, Label, ReportKind};
use crate::document::document::Document;
use std::{ops::Range, rc::Rc};
use super::paragraph::Paragraph;
pub struct ImportRule {
re: [Regex; 1],
}
impl ImportRule {
pub fn new() -> Self {
Self {
re: [Regex::new(r"(?:^|\n)@import(?:\[(.*)\])?[^\S\r\n]+(.*)").unwrap()],
}
}
pub fn validate_name(_colors: &ReportColors, name: &str) -> Result<String, String>
{
Ok(name.to_string())
}
pub fn validate_as(_colors: &ReportColors, as_name: &str) -> Result<String, String>
{
// TODO: Use variable name validation rules
Ok(as_name.to_string())
}
}
impl RegexRule for ImportRule {
fn name(&self) -> &'static str { "Import" }
fn regexes(&self) -> &[Regex] { &self.re }
fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>
{
let mut result = vec![];
// Path
let import_file = match matches.get(2)
{
Some(name) => {
match ImportRule::validate_name(parser.colors(), name.as_str())
{
Err(msg) => {
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Invalid name for import")
.with_label(
Label::new((token.source(), name.range()))
.with_message(format!("Import name `{}` is invalid. {msg}",
name.as_str().fg(parser.colors().highlight)))
.with_color(parser.colors().error))
.finish());
return result;
},
Ok(filename) => {
let meta = match std::fs::metadata(filename.as_str())
{
Err(_) => {
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Invalid import path")
.with_label(
Label::new((token.source(), name.range()))
.with_message(format!("Unable to access file `{}`",
filename.fg(parser.colors().highlight)))
.with_color(parser.colors().error))
.finish());
return result;
},
Ok(meta) => meta
};
if !meta.is_file()
{
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Invalid import path")
.with_label(
Label::new((token.source(), name.range()))
.with_message(format!("Path `{}` is not a file!",
filename.fg(parser.colors().highlight)))
.with_color(parser.colors().error))
.finish());
return result;
}
filename
},
}
}
_ => panic!("Invalid name for import")
};
// [Optional] import as
let import_as = match matches.get(1)
{
Some(as_name) => {
match ImportRule::validate_as(parser.colors(), as_name.as_str())
{
Ok(as_name) => as_name,
Err(msg) => {
result.push(
Report::build(ReportKind::Error, token.source(), as_name.start())
.with_message("Invalid name for import as")
.with_label(
Label::new((token.source(), as_name.range()))
.with_message(format!("Canot import `{import_file}` as `{}`. {msg}",
as_name.as_str().fg(parser.colors().highlight)))
.with_color(parser.colors().error))
.finish());
return result;
},
}
}
_ => "".to_string()
};
let import = match SourceFile::new(import_file, Some(token.clone()))
{
Ok(import) => Rc::new(import),
Err(path) => {
result.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Unable to read file content")
.with_label(
Label::new((token.source(), token.range))
.with_message(format!("Failed to read content from path `{path}`"))
.with_color(parser.colors().error))
.finish());
return result;
}
};
// TODO
let import_doc = parser.parse(import, Some(&document));
document.merge(import_doc, Some(&import_as));
// Close paragraph
if document.last_element::<Paragraph>(false).is_some()
{
parser.push(document, Box::new(Paragraph::new(
Token::new(token.end()..token.end(), token.source())
)));
}
return result;
}
}

149
src/elements/link.rs Normal file
View file

@ -0,0 +1,149 @@
use regex::Regex;
use crate::parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util};
use ariadne::{Report, Fmt, Label, ReportKind};
use crate::{compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}};
use std::{ops::Range, rc::Rc};
#[derive(Debug)]
pub struct Link {
location: Token,
name: String, // Link name
url: String, // Link url
}
impl Link
{
pub fn new(location: Token, name: String, url: String) -> Self {
Self { location: location, name, url }
}
}
impl Element for Link
{
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Inline }
fn element_name(&self) -> &'static str { "Link" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, _document: &Document) -> Result<String, String> {
match compiler.target()
{
Target::HTML => {
Ok(format!("<a href=\"{}\">{}</a>",
compiler.sanitize(self.url.as_str()),
compiler.sanitize(self.name.as_str()),
))
},
Target::LATEX => {
Ok(format!("\\href{{{}}}{{{}}}",
compiler.sanitize(self.url.as_str()),
compiler.sanitize(self.name.as_str()),
))
},
}
}
}
pub struct LinkRule {
re: [Regex; 1],
}
impl LinkRule {
pub fn new() -> Self {
Self { re: [Regex::new(r"(?:^|\n)```(.*?)(?:,(.*))?\n((?:\\.|[^\[\]\\])*?)```").unwrap()] }
}
}
impl RegexRule for LinkRule {
fn name(&self) -> &'static str { "Link" }
fn regexes(&self) -> &[Regex] { &self.re }
fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>
{
let mut result = vec![];
let link_name = match matches.get(1)
{
Some(name) => {
if name.as_str().is_empty()
{
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Empty link name")
.with_label(
Label::new((token.source().clone(), name.range()))
.with_message("Link name is empty")
.with_color(parser.colors().error))
.finish());
return result;
}
// TODO: process into separate document...
let text_content = util::process_text(document, name.as_str());
if text_content.as_str().is_empty()
{
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Empty link name")
.with_label(
Label::new((token.source(), name.range()))
.with_message(format!("Link name is empty. Once processed, `{}` yields `{}`",
name.as_str().fg(parser.colors().highlight),
text_content.as_str().fg(parser.colors().highlight),
))
.with_color(parser.colors().error))
.finish());
return result;
}
text_content
},
_ => panic!("Empty link name"),
};
let link_url = match matches.get(2)
{
Some(url) => {
if url.as_str().is_empty()
{
result.push(
Report::build(ReportKind::Error, token.source(), url.start())
.with_message("Empty link url")
.with_label(
Label::new((token.source(), url.range()))
.with_message("Link url is empty")
.with_color(parser.colors().error))
.finish());
return result;
}
let text_content = util::process_text(document, url.as_str());
if text_content.as_str().is_empty()
{
result.push(
Report::build(ReportKind::Error, token.source(), url.start())
.with_message("Empty link url")
.with_label(
Label::new((token.source(), url.range()))
.with_message(format!("Link url is empty. Once processed, `{}` yields `{}`",
url.as_str().fg(parser.colors().highlight),
text_content.as_str().fg(parser.colors().highlight),
))
.with_color(parser.colors().error))
.finish());
return result;
}
text_content
},
_ => panic!("Empty link url"),
};
parser.push(document, Box::new(
Link::new(
token.clone(),
link_name,
link_url
)
));
return result;
}
}

335
src/elements/list.rs Normal file
View file

@ -0,0 +1,335 @@
use std::{any::Any, cell::Ref, ops::Range, rc::Rc};
use crate::{compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::Rule, source::{Cursor, Source, Token, VirtualSource}}};
use ariadne::{Label, Report, ReportKind};
use regex::Regex;
use super::paragraph::Paragraph;
#[derive(Debug)]
pub struct ListEntry {
location: Token,
numbering: Vec<(bool, usize)>,
content: Vec<Box<dyn Element>>,
// TODO bullet_maker : FnMut<...>
}
impl ListEntry {
pub fn new(location: Token, numbering: Vec<(bool, usize)>, content: Vec<Box<dyn Element>>) -> Self {
Self { location, numbering, content }
}
}
#[derive(Debug)]
pub struct List
{
location: Token,
entries: Vec<ListEntry>
}
impl List
{
pub fn new(location: Token) -> Self
{
Self
{
location,
entries: Vec::new()
}
}
pub fn push(&mut self, entry: ListEntry)
{
self.location.range = self.location.start()..entry.location.end();
self.entries.push(entry);
}
}
impl Element for List
{
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Block }
fn element_name(&self) -> &'static str { "List" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, document: &Document) -> Result<String, String> {
match compiler.target()
{
Target::HTML => {
let mut result = String::new();
//TODO: Do something about indexing
let mut current_list: Vec<bool> = vec![];
let mut match_stack = |result: &mut String, target: &Vec<(bool, usize)>| {
// Find index after which current_list and target differ
let mut match_idx = 0usize;
for i in 0..current_list.len()
{
if i >= target.len() || current_list[i] != target[i].0 { break }
else { match_idx = i+1; }
}
// Close until same match
for _ in match_idx..current_list.len()
{
result.push_str(["</ul>", "</ol>"][current_list.pop().unwrap() as usize]);
}
// Open
for i in match_idx..target.len()
{
result.push_str(["<ul>", "<ol>"][target[i].0 as usize]);
current_list.push(target[i].0);
}
};
match self.entries.iter()
.try_for_each(|ent|
{
match_stack(&mut result, &ent.numbering);
result.push_str("<li>");
match ent.content.iter().enumerate()
.try_for_each(|(idx, elem)| {
match elem.compile(compiler, document) {
Err(e) => Err(e),
Ok(s) => { result.push_str(s.as_str()); Ok(()) }
}
})
{
Err(e) => Err(e),
_ => {
result.push_str("</li>");
Ok(())
}
}
})
{
Err(e) => return Err(e),
_ => {}
}
match_stack(&mut result, &Vec::<(bool, usize)>::new());
Ok(result)
}
Target::LATEX => Err("Unimplemented compiler".to_string())
}
}
}
/*
impl Element for ListEntry
{
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Inline }
fn element_name(&self) -> &'static str { "List" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler) -> Result<String, String> {
lazy_static! {
static ref STATE_NAME : &'static str = "list.state";
static ref LIST_OPEN : [&'static str; 2] = ["<ul>", "<ol>"];
static ref LIST_CLOSE : [&'static str; 2] = ["</ul>", "</ol>"];
}
// TODO: State.shouldpreserve?
// Called upon every element
//let state = compiler.get_state_mut::<ListState, _>(*STATE_NAME)
//.or_else(|| {
// compiler.insert_state(STATE_NAME.to_string(), Box::new(ListState(Vec::new())) as Box<dyn Any>);
// compiler.get_state_mut::<ListState, _>(*STATE_NAME)
//}).unwrap();
match compiler.target()
{
Target::HTML => {
let mut result = String::new();
//TODO: Do something about indexing
//&self.numbering.iter()
// .zip(&state.0)
// .for_each(|((wants_numbered, _), is_numbered)|
// {
//
// });
result.push_str("<li>");
match self.content.iter()
.try_for_each(|ent| match ent.compile(compiler) {
Err(e) => Err(e),
Ok(s) => Ok(result.push_str(s.as_str())),
})
{
Err(e) => return Err(e),
_ => {}
}
result.push_str("</li>");
//result.push_str(LIST_OPEN[self.numbered as usize]);
//self.entries.iter()
// .for_each(|(_index, entry)|
// result.push_str(format!("<li>{}</li>", compiler.compile(entry)).as_str()));
//result.push_str(LIST_CLOSE[self.numbered as usize]);
Ok(result)
}
Target::LATEX => Err("Unimplemented compiler".to_string())
}
}
}
*/
pub struct ListRule
{
start_re: Regex,
continue_re: Regex
}
impl ListRule {
pub fn new() -> Self {
Self {
start_re: Regex::new(r"(?:^|\n)(?:[^\S\r\n]+)([*-]+).*").unwrap(),
continue_re: Regex::new(r"(?:^|\n)([^\S\r\n]+).*").unwrap(),
}
}
fn parse_depth(depth: &str, document: &Document) -> Vec<(bool, usize)>
{
let mut parsed = vec![];
let prev_entry = document.last_element::<List>(true)
.and_then(|list| Ref::filter_map(list, |m| m.entries.last() ).ok() )
.and_then(|entry| Ref::filter_map(entry, |e| Some(&e.numbering)).ok() );
let mut continue_match = true;
depth.chars().enumerate().for_each(|(idx, c)|
{
let number = prev_entry.as_ref()
.and_then(|v| {
if !continue_match { return None }
let numbered = c == '-';
match v.get(idx)
{
None => None,
Some((prev_numbered, prev_idx)) => {
if *prev_numbered != numbered { continue_match = false; None } // New depth
else if idx+1 == v.len() { Some(prev_idx+1) } // Increase from previous
else { Some(*prev_idx) } // Do nothing
}
}
})
.or(Some(0usize))
.unwrap();
match c
{
'*' => parsed.push((false, number)),
'-' => parsed.push((true, number)),
_ => panic!("Unimplemented")
}
});
return parsed;
}
}
impl Rule for ListRule
{
fn name(&self) -> &'static str { "List" }
fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box<dyn Any>)> {
self.start_re.find_at(cursor.source.content(), cursor.pos)
.map_or(None,
|m| Some((m.start(), Box::new([false;0]) as Box<dyn Any>)) )
}
fn on_match<'a>(&self, parser: &dyn Parser, document: &'a Document<'a>, cursor: Cursor, _match_data: Option<Box<dyn Any>>) -> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>) {
let mut reports = vec![];
let content = cursor.source.content();
let (end_cursor, numbering, source) = match self.start_re.captures_at(content, cursor.pos) {
None => panic!("Unknown error"),
Some(caps) => {
let mut end_pos = caps.get(0).unwrap().end();
let mut spacing = None; // Spacing used to continue list entry
loop {
// If another entry starts on the next line, don't continue matching
match self.next_match(&cursor.at(end_pos))
{
Some((pos, _)) => {
if pos == end_pos { break }
}
None => {},
}
// Continue matching as current entry
match self.continue_re.captures_at(content, end_pos) {
None => break,
Some(continue_caps) => {
if continue_caps.get(0).unwrap().start() != end_pos { break }
// Get the spacing
let cap_spacing = continue_caps.get(1).unwrap();
match &spacing {
None => spacing = Some(cap_spacing.range()),
Some(spacing) => 'some: {
if content[cap_spacing.range()] == content[spacing.clone()] { break 'some }
reports.push(
Report::build(ReportKind::Warning, cursor.source.clone(), continue_caps.get(1).unwrap().start())
.with_message("Invalid list entry spacing")
.with_label(
Label::new((cursor.source.clone(), cap_spacing.range()))
.with_message("Spacing for list entries must match")
.with_color(parser.colors().warning))
.with_label(
Label::new((cursor.source.clone(), spacing.clone()))
.with_message("Previous spacing")
.with_color(parser.colors().warning))
.finish());
},
}
end_pos = continue_caps.get(0).unwrap().end();
}
}
}
let start_pos = caps.get(1).unwrap().end();
let source = VirtualSource::new(
Token::new(start_pos..end_pos, cursor.source.clone()),
"List Entry".to_string(),
content.as_str()[start_pos..end_pos].to_string(),
);
(cursor.at(end_pos),
ListRule::parse_depth(caps.get(1).unwrap().as_str(), document),
source)
},
};
let parsed_entry = parser.parse(Rc::new(source), Some(&document));
let mut parsed_paragraph = parsed_entry.last_element_mut::<Paragraph>(false).unwrap(); // Extract content from paragraph
let entry = ListEntry::new(
Token::new(cursor.pos..end_cursor.pos, cursor.source.clone()),
numbering,
std::mem::replace(&mut parsed_paragraph.content, Vec::new())
);
// Ger previous list, if none insert a new list
let mut list = match document.last_element_mut::<List>(false)
{
Some(last) => last,
None => {
parser.push(document,
Box::new(List::new(
Token::new(cursor.pos..end_cursor.pos, cursor.source.clone()))));
document.last_element_mut::<List>(false).unwrap()
}
};
list.push(entry);
(end_cursor, reports)
}
}

13
src/elements/mod.rs Normal file
View file

@ -0,0 +1,13 @@
pub mod registrar;
pub mod comment;
pub mod paragraph;
pub mod variable;
pub mod import;
pub mod script;
pub mod list;
pub mod style;
pub mod section;
pub mod link;
pub mod code;
pub mod tex;
pub mod raw;

127
src/elements/paragraph.rs Normal file
View file

@ -0,0 +1,127 @@
use std::{any::Any, ops::Range, rc::Rc};
use ariadne::Report;
use regex::Regex;
use crate::{compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::Rule, source::{Cursor, Source, Token}}};
// TODO: Full refactor
// Problem is that document parsed from other sources i.e by variables
// are not merged correctly into existing paragraph
// A solution would be to use the "(\n){2,}" regex to split paragraph, which would reduce the work needed for process_text
// Another fix would be to keep parsing (recursively) into the same document (like previous version)
// The issue is that this would break the current `Token` implementation
// Which would need to be reworked
#[derive(Debug)]
pub struct Paragraph
{
location: Token,
pub content: Vec<Box<dyn Element>>
}
impl Paragraph
{
pub fn new(location: Token) -> Self {
Self { location, content: Vec::new() }
}
pub fn is_empty(&self) -> bool { self.content.is_empty() }
pub fn push(&mut self, elem: Box<dyn Element>)
{
if elem.location().source() == self.location().source()
{
self.location.range = self.location.start() .. elem.location().end();
}
self.content.push(elem);
}
pub fn find_back<P: FnMut(&&Box<dyn Element + 'static>) -> bool>(&self, mut predicate: P)
-> Option<&Box<dyn Element>> {
self.content.iter().rev()
.find(predicate)
}
}
impl Element for Paragraph
{
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Special }
fn element_name(&self) -> &'static str { "Paragraph" }
fn to_string(&self) -> String { format!("{:#?}", self) }
fn compile(&self, compiler: &Compiler, document: &Document) -> Result<String, String> {
if self.content.is_empty() { return Ok(String::new()) }
match compiler.target()
{
Target::HTML => {
let mut result = String::new();
//if prev.is_none() || prev.unwrap().downcast_ref::<Paragraph>().is_none()
{ result.push_str("<p>"); }
//else
//{ result.push_str(" "); }
let err = self.content.iter().try_for_each(|elem| {
match elem.compile(compiler, document)
{
Err(e) => return Err(e),
Ok(content) => { result.push_str(content.as_str()); Ok(()) },
}
});
//if next.is_none() || next.unwrap().downcast_ref::<Paragraph>().is_none()
{ result.push_str("</p>"); }
match err
{
Err(e) => Err(e),
Ok(()) => Ok(result),
}
}
Target::LATEX => todo!("Unimplemented compiler")
}
}
}
pub struct ParagraphRule
{
re: Regex,
}
impl ParagraphRule {
pub fn new() -> Self {
Self {
re: Regex::new(r"\n{2,}").unwrap()
}
}
}
impl Rule for ParagraphRule
{
fn name(&self) -> &'static str { "Paragraphing" }
fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box<dyn Any>)> {
self.re.find_at(cursor.source.content(), cursor.pos)
.and_then(|m| Some((m.start(), Box::new([false;0]) as Box<dyn Any>)) )
}
fn on_match(&self, parser: &dyn Parser, document: &Document, cursor: Cursor, _match_data: Option<Box<dyn Any>>)
-> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>) {
let end_cursor = match self.re.captures_at(cursor.source.content(), cursor.pos)
{
None => panic!("Unknown error"),
Some(capture) =>
cursor.at(capture.get(0).unwrap().end()-1)
};
parser.push(document, Box::new(Paragraph::new(
Token::new(cursor.pos..end_cursor.pos, cursor.source.clone())
)));
(end_cursor, Vec::new())
}
}

164
src/elements/raw.rs Normal file
View file

@ -0,0 +1,164 @@
use regex::{Captures, Regex};
use crate::{compiler::compiler::Compiler, document::element::{ElemKind, Element}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util::{self, Property, PropertyParser}}};
use ariadne::{Fmt, Label, Report, ReportKind};
use crate::document::document::Document;
use std::{collections::HashMap, ops::Range, rc::Rc, str::FromStr};
#[derive(Debug)]
struct Raw {
location: Token,
kind: ElemKind,
content: String,
}
impl Raw {
fn new(location: Token, kind: ElemKind, content: String) -> Self {
Self { location, kind, content }
}
}
impl Element for Raw {
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { self.kind.clone() }
fn element_name(&self) -> &'static str { "Raw" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, _document: &Document) -> Result<String, String> {
Ok(self.content.clone())
}
}
pub struct RawRule {
re: [Regex; 1],
properties: PropertyParser,
}
impl RawRule {
pub fn new() -> Self {
let mut props = HashMap::new();
props.insert("kind".to_string(),
Property::new(
true,
"Element display kind".to_string(),
Some("inline".to_string())));
Self {
re: [
Regex::new(r"\{\?(?:\[((?:\\.|[^\[\]\\])*?)\])?(?:((?:\\.|[^\\\\])*?)(\?\}))?").unwrap()
],
properties: PropertyParser::new(props)
}
}
}
impl RegexRule for RawRule
{
fn name(&self) -> &'static str { "Raw" }
fn regexes(&self) -> &[regex::Regex] { &self.re }
fn on_regex_match(&self, _index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures)
-> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut reports = vec![];
let raw_content = match matches.get(2)
{
// Unterminated
None => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Unterminated Raw Code")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Missing terminating `{}` after first `{}`",
"?}".fg(parser.colors().info),
"{?".fg(parser.colors().info)))
.with_color(parser.colors().error))
.finish());
return reports;
}
Some(content) => {
let processed = util::process_escaped('\\', "?}",
content.as_str().trim_start().trim_end());
if processed.is_empty()
{
reports.push(
Report::build(ReportKind::Warning, token.source(), content.start())
.with_message("Empty Raw Code")
.with_label(
Label::new((token.source().clone(), content.range()))
.with_message("Raw code is empty")
.with_color(parser.colors().warning))
.finish());
}
processed
}
};
let properties = match matches.get(1)
{
None => match self.properties.default() {
Ok(properties) => properties,
Err(e) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Raw Code")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Raw code is missing properties: {e}"))
.with_color(parser.colors().error))
.finish());
return reports;
},
}
Some(props) => {
let processed = util::process_escaped('\\', "]",
props.as_str().trim_start().trim_end());
match self.properties.parse(processed.as_str())
{
Err(e) => {
reports.push(
Report::build(ReportKind::Error, token.source(), props.start())
.with_message("Invalid Raw Code Properties")
.with_label(
Label::new((token.source().clone(), props.range()))
.with_message(e)
.with_color(parser.colors().error))
.finish());
return reports;
}
Ok(properties) => properties
}
}
};
let raw_kind : ElemKind = match properties.get("kind",
|prop, value| ElemKind::from_str(value.as_str()).map_err(|e| (prop, e)))
{
Ok((_prop, kind)) => kind,
Err((prop, e)) => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Invalid Raw Code Property")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Property `kind: {}` cannot be converted: {}",
prop.fg(parser.colors().info),
e.fg(parser.colors().error)))
.with_color(parser.colors().warning))
.finish());
return reports;
}
};
parser.push(document, Box::new(Raw::new(
token.clone(),
raw_kind,
raw_content
)));
reports
}
}

22
src/elements/registrar.rs Normal file
View file

@ -0,0 +1,22 @@
use crate::parser::parser::Parser;
use super::{code::CodeRule, comment::CommentRule, import::ImportRule, link::LinkRule, list::ListRule, paragraph::ParagraphRule, raw::RawRule, script::ScriptRule, section::SectionRule, style::StyleRule, tex::TexRule, variable::{VariableRule, VariableSubstitutionRule}};
pub fn register<P: Parser>(parser: &mut P)
{
parser.add_rule(Box::new(CommentRule::new()), None);
parser.add_rule(Box::new(ParagraphRule::new()), None);
parser.add_rule(Box::new(ImportRule::new()), None);
parser.add_rule(Box::new(ScriptRule::new()), None);
parser.add_rule(Box::new(VariableRule::new()), None);
parser.add_rule(Box::new(VariableSubstitutionRule::new()), None);
parser.add_rule(Box::new(RawRule::new()), None);
parser.add_rule(Box::new(ListRule::new()), None);
parser.add_rule(Box::new(CodeRule::new()), None);
parser.add_rule(Box::new(TexRule::new()), None);
parser.add_rule(Box::new(StyleRule::new()), None);
parser.add_rule(Box::new(SectionRule::new()), None);
parser.add_rule(Box::new(LinkRule::new()), None);
}

201
src/elements/script.rs Normal file
View file

@ -0,0 +1,201 @@
use regex::{Captures, Regex};
use crate::{document::element::Text, lua::kernel::{Kernel, KernelHolder}, parser::{parser::{Parser, ReportColors}, rule::RegexRule, source::{Source, Token, VirtualSource}, util}};
use ariadne::{Fmt, Label, Report, ReportKind};
use crate::document::document::Document;
use std::{ops::Range, rc::Rc};
pub struct ScriptRule
{
re: [Regex; 2],
eval_kinds: [(&'static str, &'static str); 2]
}
impl ScriptRule {
pub fn new() -> Self {
Self {
re: [
Regex::new(r"(?:^|\n)@<(?:(.*)\n?)((?:\\.|[^\[\]\\])*?)(?:\n?)>@").unwrap(),
Regex::new(r"%<([^\s[:alpha:]])?(?:\[(.*?)\])?((?:\\.|[^\[\]\\])*?)(?:\n?)>%").unwrap()
],
eval_kinds: [
("", "Eval to text"),
("!", "Eval and parse"),
]
}
}
fn validate_kernel_name(colors: &ReportColors, name: &str)
-> Result<String, String> {
let trimmed = name.trim_end().trim_start();
if trimmed.is_empty() { return Ok("main".to_string()) }
else if trimmed.find(|c: char| c.is_whitespace()).is_some() {
return Err(format!("Kernel name `{}` contains whitespaces",
trimmed.fg(colors.highlight)))
}
Ok(trimmed.to_string())
}
fn validate_kind(&self, colors: &ReportColors, kind: &str)
-> Result<usize, String> {
match self.eval_kinds.iter().position(|(kind_symbol, _)| kind == *kind_symbol)
{
Some(id) => Ok(id),
None => Err(format!("Unable to find eval kind `{}`. Available kinds:{}",
kind.fg(colors.highlight),
self.eval_kinds.iter().fold(String::new(), |out, (symbol, name)| {
out + format!("\n - '{symbol}' => {name}").as_str()
})))
}
}
}
impl RegexRule for ScriptRule
{
fn name(&self) -> &'static str { "Script" }
fn regexes(&self) -> &[regex::Regex] { &self.re }
fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures)
-> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut reports = vec![];
let kernel_name = match matches.get(if index == 0 {1} else {2}) {
None => "main".to_string(),
Some(name) => {
match ScriptRule::validate_kernel_name(parser.colors(), name.as_str())
{
Ok(name) => name,
Err(e) => {
reports.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Invalid kernel name")
.with_label(
Label::new((token.source(), name.range()))
.with_message(e)
.with_color(parser.colors().error))
.finish());
return reports;
}
}
}
};
let kernel_name = matches.get(if index == 0 {1} else {2})
.and_then(|name| {
let trimmed = name.as_str().trim_start().trim_end();
(!trimmed.is_empty()).then_some(trimmed)
})
.unwrap_or("main");
let kernel = parser.get_kernel(kernel_name).unwrap_or_else(|| {
parser.insert_kernel(kernel_name.to_string(), Kernel::new())
});
let kernel_data = matches.get(if index == 0 {2} else {3})
.and_then(|code| {
let trimmed = code.as_str().trim_start().trim_end();
(!trimmed.is_empty()).then_some((trimmed, code.range()))
}).or_else(|| {
reports.push(
Report::build(ReportKind::Warning, token.source(), token.start())
.with_message("Invalid kernel code")
.with_label(
Label::new((token.source(), token.start()+1..token.end()))
.with_message("Kernel code is empty")
.with_color(parser.colors().warning))
.finish());
None
});
if kernel_data.is_none() { return reports; }
let (kernel_content, kernel_range) = kernel_data.unwrap();
let source = Rc::new(VirtualSource::new(
Token::new(kernel_range, token.source()),
format!("{}#{}:lua_kernel@{kernel_name}", token.source().name(), matches.get(0).unwrap().start()),
util::process_escaped('\\', ">@", kernel_content)
)) as Rc<dyn Source>;
let chunk = kernel.lua.load(source.content())
.set_name(kernel_name);
if index == 0 // @< ... >@ -> Exec
{
match chunk.exec()
{
Ok(_) => {},
Err(e) => {
reports.push(
Report::build(ReportKind::Error, source.clone(), 0)
.with_message("Invalid kernel code")
.with_label(
Label::new((source.clone(), 0..source.content().len()))
.with_message(format!("Kernel execution failed:\n{}", e.to_string()))
.with_color(parser.colors().error))
.finish());
}
}
}
else if index == 1 // %< ... >% -> Eval
{
let kind = match matches.get(1) {
None => 0,
Some(kind) => {
match self.validate_kind(parser.colors(), kind.as_str())
{
Ok(kind) => kind,
Err(msg) => {
reports.push(
Report::build(ReportKind::Error, token.source(), kind.start())
.with_message("Invalid kernel code kind")
.with_label(
Label::new((token.source(), kind.range()))
.with_message(msg)
.with_color(parser.colors().error))
.finish());
return reports;
}
}
}
};
match chunk.eval::<String>()
{
Ok(result) => {
if kind == 0 // Eval to text
{
if !result.is_empty()
{
parser.push(document, Box::new(Text::new(
Token::new(1..source.content().len(), source.clone()),
util::process_text(document, result.as_str()),
)));
}
}
else if kind == 1 // Eval and Parse
{
let parse_source = Rc::new(VirtualSource::new(
Token::new(0..source.content().len(), source.clone()),
format!("parse({})", source.name()),
result
)) as Rc<dyn Source>;
//println!("SRC={parse_source:#?}, {}", parse_source.content());
parser.parse_into(parse_source, document);
}
},
Err(e) => {
reports.push(
Report::build(ReportKind::Error, source.clone(), 0)
.with_message("Invalid kernel code")
.with_label(
Label::new((source.clone(), 0..source.content().len()))
.with_message(format!("Kernel evaluation failed:\n{}", e.to_string()))
.with_color(parser.colors().error))
.finish());
}
}
}
reports
}
}

208
src/elements/section.rs Normal file
View file

@ -0,0 +1,208 @@
use regex::Regex;
use crate::{compiler::compiler::Target, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}}};
use ariadne::{Report, Fmt, Label, ReportKind};
use crate::{compiler::compiler::Compiler, document::{document::Document, element::{ElemKind, Element, ReferenceableElement}}};
use std::{ops::Range, rc::Rc};
#[derive(Debug)]
pub struct Section {
location: Token,
title: String, // Section title
depth: usize, // Section depth
kind: u8, // Section kind, e.g numbered, unnumbred, ...
reference: Option<String>, // Section reference name
}
impl Section
{
pub fn new(location: Token, title: String, depth: usize, kind: u8, reference: Option<String>) -> Self {
Self { location: location, title, depth, kind, reference }
}
}
impl Element for Section
{
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Block }
fn element_name(&self) -> &'static str { "Section" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn as_referenceable(&self) -> Option<&dyn ReferenceableElement> { Some(self) }
fn compile(&self, compiler: &Compiler, _document: &Document) -> Result<String, String> {
match compiler.target()
{
Target::HTML => {
Ok(format!("<h{0}>{1}</h{0}>",
self.depth,
compiler.sanitize(self.title.as_str())))
}
Target::LATEX => Err("Unimplemented compiler".to_string())
}
}
}
impl ReferenceableElement for Section
{
fn reference_name(&self) -> Option<&String> { self.reference.as_ref() }
}
pub struct SectionRule {
re: [Regex; 1],
}
impl SectionRule {
pub fn new() -> Self {
Self { re: [Regex::new(r"(?:^|\n)(#{1,})(?:\{(.*)\})?((\*|\+){1,})?(.*)").unwrap()] }
}
}
pub mod section_kind
{
pub const NONE : u8 = 0x00;
pub const NO_TOC : u8 = 0x01;
pub const NO_NUMBER : u8 = 0x02;
}
impl RegexRule for SectionRule {
fn name(&self) -> &'static str { "Section" }
fn regexes(&self) -> &[Regex] { &self.re }
fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>
{
let mut result = vec![];
let section_depth = match matches.get(1)
{
Some(depth) => {
if depth.len() > 6 {
result.push(
Report::build(ReportKind::Error, token.source(), depth.start())
.with_message("Invalid section depth")
.with_label(
Label::new((token.source(), depth.range()))
.with_message(format!("Section is of depth {}, which is greather than {} (maximum depth allowed)",
depth.len().fg(parser.colors().info),
6.fg(parser.colors().info)))
.with_color(parser.colors().error))
.finish());
return result;
}
depth.len()
},
_ => panic!("Empty section depth"),
};
// [Optional] Reference name
let section_refname = matches.get(2).map_or_else(|| None,
|refname| {
// Check for duplicate reference
if let Some((ref_doc, reference)) = document.get_reference(refname.as_str())
{
result.push(
Report::build(ReportKind::Warning, token.source(), refname.start())
.with_message("Duplicate reference name")
.with_label(
Label::new((token.source(), refname.range()))
.with_message(format!("Reference with name `{}` is already defined in `{}`",
refname.as_str().fg(parser.colors().highlight),
ref_doc.source().name().as_str().fg(parser.colors().highlight)))
.with_message(format!("`{}` conflicts with previously defined reference to {}",
refname.as_str().fg(parser.colors().highlight),
reference.element_name().fg(parser.colors().highlight)))
.with_color(parser.colors().warning))
.with_label(
Label::new((ref_doc.source(), reference.location().start()+1..reference.location().end() ))
.with_message(format!("`{}` previously defined here",
refname.as_str().fg(parser.colors().highlight)))
.with_color(parser.colors().warning))
.with_note(format!("Previous reference was overwritten"))
.finish());
}
Some(refname.as_str().to_string())
});
// Section kind
let section_kind = match matches.get(3)
{
Some(kind) => {
match kind.as_str() {
"*+" | "+*" => section_kind::NO_NUMBER | section_kind::NO_TOC,
"*" => section_kind::NO_NUMBER,
"+" => section_kind::NO_TOC,
"" => section_kind::NONE,
_ => {
result.push(
Report::build(ReportKind::Error, token.source(), kind.start())
.with_message("Invalid section numbering kind")
.with_label(
Label::new((token.source(), kind.range()))
.with_message(format!("Section numbering kind must be a combination of `{}` for unnumbered, and `{}` for non-listing; got `{}`",
"*".fg(parser.colors().info),
"+".fg(parser.colors().info),
kind.as_str().fg(parser.colors().highlight)))
.with_color(parser.colors().error))
.with_help(format!("Leave empty for a numbered listed section"))
.finish());
return result;
}
}
}
_ => section_kind::NONE,
};
// Spacing + Section name
let section_name = match matches.get(5)
{
Some(name) => {
let split = name.as_str().chars()
.position(|c| !c.is_whitespace())
.unwrap_or(0);
let section_name = &name.as_str()[split..];
if section_name.is_empty() // No name
{
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Missing section name")
.with_label(
Label::new((token.source(), name.range()))
.with_message("Sections require a name before line end")
.with_color(parser.colors().error))
.finish());
return result;
}
// No spacing
if split == 0
{
result.push(
Report::build(ReportKind::Warning, token.source(), name.start())
.with_message("Missing section spacing")
.with_label(
Label::new((token.source(), name.range()))
.with_message("Sections require at least one whitespace before the section's name")
.with_color(parser.colors().warning))
.with_help(format!("Add a space before `{}`", section_name.fg(parser.colors().highlight)))
.finish());
return result;
}
section_name.to_string()
},
_ => panic!("Empty section name")
};
parser.push(document, Box::new(
Section::new(
token.clone(),
section_name,
section_depth,
section_kind,
section_refname
)
));
return result;
}
}

185
src/elements/style.rs Normal file
View file

@ -0,0 +1,185 @@
use regex::{Captures, Regex};
use crate::{compiler::compiler::{Compiler, Target}, document::element::{ElemKind, Element}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, state::State}};
use ariadne::{Fmt, Label, Report, ReportKind};
use crate::document::document::Document;
use crate::parser::state::Scope;
use std::{cell::RefCell, ops::Range, rc::Rc};
use lazy_static::lazy_static;
use super::paragraph::Paragraph;
#[derive(Debug)]
pub struct Style {
location: Token,
kind: usize,
close: bool,
}
impl Style
{
pub fn new(location: Token, kind: usize, close: bool) -> Self {
Self { location, kind, close }
}
}
impl Element for Style
{
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { ElemKind::Inline }
fn element_name(&self) -> &'static str { "Section" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, _document: &Document) -> Result<String, String> {
match compiler.target()
{
Target::HTML => {
Ok([
// Bold
"<b>", "</b>",
// Italic
"<i>", "</i>",
// Underline
"<u>", "</u>",
// Code
"<em>", "</em>",
][self.kind*2 + self.close as usize].to_string())
}
Target::LATEX => Err("Unimplemented compiler".to_string())
}
}
}
struct StyleState
{
toggled: [Option<Token>; 4]
}
impl StyleState {
const NAMES : [&'static str; 4] = ["Bold", "Italic", "Underline", "Code"];
fn new() -> Self {
Self { toggled: [None, None, None, None] }
}
}
impl State for StyleState
{
fn scope(&self) -> Scope { Scope::PARAGRAPH }
fn on_remove<'a>(&self, parser: &dyn Parser, document: &Document) -> Vec<Report<'a, (Rc<dyn Source>, Range<usize>)>> {
let mut result = Vec::new();
self.toggled
.iter()
.zip(StyleState::NAMES)
.for_each(|(token, name)|
{
if token.is_none() { return } // Style not enabled
let token = token.as_ref().unwrap();
//let range = range.as_ref().unwrap();
//let active_range = range.start .. paragraph.location().end()-1;
let paragraph = document.last_element::<Paragraph>(false).unwrap();
let paragraph_end = paragraph.content.last()
.and_then(|last| Some((last.location().source(), last.location().end()-1 .. last.location().end())))
.unwrap();
// TODO: Allow style to span multiple documents if they don't break paragraph.
result.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Unterminated style")
//.with_label(
// Label::new((document.source(), active_range.clone()))
// .with_order(0)
// .with_message(format!("Style {} is not terminated before the end of paragraph",
// name.fg(parser.colors().info)))
// .with_color(parser.colors().error))
.with_label(
Label::new((token.source(), token.range.clone()))
.with_order(1)
.with_message(format!("Style {} starts here",
name.fg(parser.colors().info)))
.with_color(parser.colors().info))
.with_label(
Label::new(paragraph_end)
.with_order(1)
.with_message(format!("Paragraph ends here"))
.with_color(parser.colors().info))
.with_note("Styles cannot span multiple documents (i.e @import)")
.finish());
});
return result;
}
}
pub struct StyleRule {
re: [Regex; 4],
}
impl StyleRule {
pub fn new() -> Self {
Self {
re: [
// Bold
Regex::new(r"\*\*").unwrap(),
// Italic
Regex::new(r"\*").unwrap(),
// Underline
Regex::new(r"__").unwrap(),
// Code
Regex::new(r"`").unwrap()
]
}
}
}
lazy_static! {
static ref STATE_NAME : String = "elements.style".to_string();
}
impl RegexRule for StyleRule
{
fn name(&self) -> &'static str { "Style" }
fn regexes(&self) -> &[regex::Regex] { &self.re }
fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, _matches: Captures) -> Vec<Report<(Rc<dyn Source>, Range<usize>)>> {
let result = vec![];
let query = parser.state().query(&STATE_NAME);
let state = match query
{
Some(state) => state,
None => { // Insert as a new state
match parser.state_mut().insert(STATE_NAME.clone(), Rc::new(RefCell::new(StyleState::new())))
{
Err(_) => panic!("Unknown error"),
Ok(state) => state,
}
}
};
if let Some(style_state) = state
.borrow_mut()
.as_any_mut()
.downcast_mut::<StyleState>()
{
style_state.toggled[index] = style_state.toggled[index].clone().map_or(Some(token.clone()), |_| None);
parser.push(document, Box::new(
Style::new(
token.clone(),
index,
!style_state.toggled[index].is_some()
)
));
}
else
{
panic!("Invalid state at `{}`", STATE_NAME.as_str());
}
return result;
}
}

263
src/elements/tex.rs Normal file
View file

@ -0,0 +1,263 @@
use std::{io::{Read, Write}, ops::Range, process::{Command, Stdio}, rc::Rc, sync::Once};
use ariadne::{Fmt, Label, Report, ReportKind};
use crypto::{digest::Digest, sha2::Sha512};
use regex::{Captures, Regex};
use crate::{cache::cache::{Cached, CachedError}, compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util}};
#[derive(Debug, PartialEq, Eq)]
enum TexKind
{
Block,
Inline,
}
impl From<&TexKind> for ElemKind
{
fn from(value: &TexKind) -> Self {
match value {
TexKind::Inline => ElemKind::Inline,
_ => ElemKind::Block
}
}
}
#[derive(Debug)]
struct Tex
{
location: Token,
block: TexKind,
env: String,
tex: String,
caption: Option<String>,
}
impl Tex {
fn new(location: Token, block: TexKind, env: String, tex: String, caption: Option<String>) -> Self {
Self { location, block, env, tex, caption }
}
fn format_latex(fontsize: &String, preamble: &String, tex: &String) -> FormattedTex
{
FormattedTex(format!(r"\documentclass[{}pt,preview]{{standalone}}
{}
\begin{{document}}
\begin{{preview}}
{}
\end{{preview}}
\end{{document}}",
fontsize, preamble, tex))
}
}
struct FormattedTex(String);
impl FormattedTex
{
/// Renders latex to svg
fn latex_to_svg(&self, exec: &String, fontsize: &String) -> Result<String, String>
{
print!("Rendering LaTex `{}`... ", self.0);
let process = match Command::new(exec)
.arg("--fontsize").arg(fontsize)
.stdout(Stdio::piped())
.stdin(Stdio::piped())
.spawn()
{
Err(e) => return Err(format!("Could not spawn `{exec}`: {}", e)),
Ok(process) => process
};
if let Err(e) = process.stdin.unwrap().write_all(self.0.as_bytes())
{
panic!("Unable to write to `latex2svg`'s stdin: {}", e);
}
let mut result = String::new();
match process.stdout.unwrap().read_to_string(&mut result)
{
Err(e) => panic!("Unable to read `latex2svg` stdout: {}", e),
Ok(_) => {}
}
println!("Done!");
Ok(result)
}
}
impl Cached for FormattedTex
{
type Key = String;
type Value = String;
fn sql_table() -> &'static str {
"CREATE TABLE IF NOT EXISTS cached_tex (
digest TEXT PRIMARY KEY,
svg BLOB NOT NULL);"
}
fn sql_get_query() -> &'static str {
"SELECT svg FROM cached_tex WHERE digest = (?1)"
}
fn sql_insert_query() -> &'static str {
"INSERT INTO cached_tex (digest, svg) VALUES (?1, ?2)"
}
fn key(&self) -> <Self as Cached>::Key {
let mut hasher = Sha512::new();
hasher.input(self.0.as_bytes());
hasher.result_str()
}
}
impl Element for Tex {
fn location(&self) -> &Token { &self.location }
fn kind(&self) -> ElemKind { (&self.block).into() }
fn element_name(&self) -> &'static str { "LaTeX" }
fn to_string(&self) -> String { format!("{self:#?}") }
fn compile(&self, compiler: &Compiler, document: &Document)
-> Result<String, String> {
match compiler.target() {
Target::HTML => {
static CACHE_INIT : Once = Once::new();
CACHE_INIT.call_once(|| if let Some(mut con) = compiler.cache() {
if let Err(e) = FormattedTex::init(&mut con)
{
eprintln!("Unable to create cache table: {e}");
}
});
let exec = document.get_variable(format!("tex.{}.exec", self.env))
.map_or("latex2svg".to_string(), |(_, var)| var.to_string());
// FIXME: Because fontsize is passed as an arg, verify that it cannot be used to execute python/shell code
let fontsize = document.get_variable(format!("tex.{}.fontsize", self.env))
.map_or("12".to_string(), |(_, var)| var.to_string());
let preamble = document.get_variable(format!("tex.{}.preamble", self.env))
.map_or("".to_string(), |(_, var)| var.to_string());
let prepend = if self.block == TexKind::Inline { "".to_string() }
else
{
document.get_variable(format!("tex.{}.block_prepend", self.env))
.map_or("".to_string(), |(_, var)| var.to_string()+"\n")
};
let latex = match self.block
{
TexKind::Inline => Tex::format_latex(
&fontsize,
&preamble,
&format!("${{{}}}$", self.tex)),
_ => Tex::format_latex(
&fontsize,
&preamble,
&format!("{prepend}{}", self.tex))
};
if let Some(mut con) = compiler.cache()
{
match latex.cached(&mut con, |s| s.latex_to_svg(&exec, &fontsize))
{
Ok(s) => Ok(s),
Err(e) => match e
{
CachedError::SqlErr(e) => Err(format!("Querying the cache failed: {e}")),
CachedError::GenErr(e) => Err(e)
}
}
}
else
{
latex.latex_to_svg(&exec, &fontsize)
}
}
_ => todo!("Unimplemented")
}
}
}
pub struct TexRule {
re: [Regex; 2],
}
impl TexRule {
pub fn new() -> Self {
Self {
re: [
Regex::new(r"\$\|(?:\[(.*)\])?(?:((?:\\.|[^\\\\])*?)\|\$)?").unwrap(),
Regex::new(r"\$(?:\[(.*)\])?(?:((?:\\.|[^\\\\])*?)\$)?").unwrap(),
],
}
}
}
impl RegexRule for TexRule
{
fn name(&self) -> &'static str { "Tex" }
fn regexes(&self) -> &[regex::Regex] { &self.re }
fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures)
-> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut reports = vec![];
let tex_env = matches.get(1)
.and_then(|env| Some(env.as_str().trim_start().trim_end()))
.and_then(|env| (!env.is_empty()).then_some(env))
.unwrap_or("main");
let tex_content = match matches.get(2)
{
// Unterminated `$`
None => {
reports.push(
Report::build(ReportKind::Error, token.source(), token.start())
.with_message("Unterminated Tex Code")
.with_label(
Label::new((token.source().clone(), token.range.clone()))
.with_message(format!("Missing terminating `{}` after first `{}`",
["|$", "$"][index].fg(parser.colors().info),
["$|", "$"][index].fg(parser.colors().info)))
.with_color(parser.colors().error))
.finish());
return reports;
}
Some(content) => {
let processed = util::process_escaped('\\', ["|$", "$"][index],
content.as_str().trim_start().trim_end());
if processed.is_empty()
{
reports.push(
Report::build(ReportKind::Warning, token.source(), content.start())
.with_message("Empty Tex Code")
.with_label(
Label::new((token.source().clone(), content.range()))
.with_message("Tex code is empty")
.with_color(parser.colors().warning))
.finish());
}
processed
}
};
// TODO: Caption
parser.push(document, Box::new(Tex::new(
token,
if index == 1 { TexKind::Inline } else { TexKind::Block },
tex_env.to_string(),
tex_content,
None,
)));
reports
}
}

329
src/elements/variable.rs Normal file
View file

@ -0,0 +1,329 @@
use regex::Regex;
use crate::parser::{parser::{Parser, ReportColors}, rule::RegexRule, source::{Source, Token}};
use ariadne::{Report, Fmt, Label, ReportKind};
use crate::document::{document::Document, variable::{BaseVariable, PathVariable, Variable}};
use std::{ops::Range, rc::Rc};
pub struct VariableRule {
re: [Regex; 1],
kinds: Vec<(String, String)>,
}
impl VariableRule {
pub fn new() -> Self {
Self {
re: [Regex::new(r"(?:^|\n)@([^[:alpha:]])?(.*)=((?:\\\n|.)*)").unwrap()],
kinds: vec![
("".into(), "Regular".into()),
("'".into(), "Path".into())
]
}
}
pub fn make_variable(&self, colors: &ReportColors, location: Token, kind: usize, name: String, value: String) -> Result<Rc<dyn Variable>, String>
{
match self.kinds[kind].0.as_str()
{
"" => {
Ok(Rc::new(BaseVariable::new(location, name, value)))
}
"'" => {
match std::fs::canonicalize(value.as_str()) // TODO: not canonicalize
{
Ok(path) => Ok(Rc::new(PathVariable::new(location, name, path))),
Err(e) => Err(format!("Unable to canonicalize path `{}`: {}",
value.fg(colors.highlight),
e.to_string()))
}
}
_ => panic!("Unhandled variable kind")
}
}
// Trim and check variable name for validity
pub fn validate_name<'a>(colors: &ReportColors, original_name: &'a str) -> Result<&'a str, String>
{
let name = original_name.trim_start().trim_end();
if name.contains("%")
{
return Err(format!("Name cannot contain '{}'",
"%".fg(colors.info)));
}
return Ok(name);
}
pub fn validate_value(_colors: &ReportColors, original_value: &str) -> Result<String, String>
{
let mut escaped = 0usize;
let mut result = String::new();
for c in original_value.trim_start().trim_end().chars() {
if c == '\\' { escaped += 1 }
else if c == '\n' {
match escaped {
0 => return Err("Unknown error wile capturing variable".to_string()),
// Remove '\n'
1 => {},
// Insert '\n'
_ => {
result.push(c);
(0..escaped-2).for_each(|_| result.push('\\'));
}
}
escaped = 0;
}
else {
(0..escaped).for_each(|_| result.push('\\'));
escaped = 0;
result.push(c);
}
}
(0..escaped).for_each(|_| result.push('\\'));
Ok(result)
}
}
impl RegexRule for VariableRule {
fn name(&self) -> &'static str { "Variable" }
fn regexes(&self) -> &[Regex] { &self.re }
fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>
{
let mut result = vec![];
// [Optional] variable kind
let var_kind = match matches.get(1)
{
Some(kind) => {
// Find kind
let r = self.kinds.iter().enumerate().find(|(_i, (ref char, ref _name))| {
char == kind.as_str() });
// Unknown kind specified
if r.is_none()
{
result.push(
Report::build(ReportKind::Error, token.source(), kind.start())
.with_message("Unknown variable kind")
.with_label(
Label::new((token.source(), kind.range()))
.with_message(format!("Variable kind `{}` is unknown",
kind.as_str().fg(parser.colors().highlight)))
.with_color(parser.colors().error))
.with_help(format!("Leave empty for regular variables. Available variable kinds:{}",
self.kinds.iter().skip(1).fold("".to_string(), |acc, (char, name)| {
acc + format!("\n - `{}` : {}",
char.fg(parser.colors().highlight),
name.fg(parser.colors().info)).as_str()
})))
.finish());
return result;
}
r.unwrap().0
}
None => 0,
};
let var_name = match matches.get(2)
{
Some(name) => {
match VariableRule::validate_name(&parser.colors(), name.as_str())
{
Ok(var_name) => var_name,
Err(msg) => {
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Invalid variable name")
.with_label(
Label::new((token.source(), name.range()))
.with_message(format!("Variable name `{}` is not allowed. {msg}",
name.as_str().fg(parser.colors().highlight)))
.with_color(parser.colors().error))
.finish());
return result;
},
}
},
_ => panic!("Unknown variable name")
};
let var_value = match matches.get(3)
{
Some(value) => {
match VariableRule::validate_value(&parser.colors(), value.as_str())
{
Ok(var_value) => var_value,
Err(msg ) => {
result.push(
Report::build(ReportKind::Error, token.source(), value.start())
.with_message("Invalid variable value")
.with_label(
Label::new((token.source(), value.range()))
.with_message(format!("Variable value `{}` is not allowed. {msg}",
value.as_str().fg(parser.colors().highlight)))
.with_color(parser.colors().error))
.finish());
return result;
}
}
}
_ => panic!("Invalid variable value")
};
match self.make_variable(&parser.colors(), token.clone(), var_kind, var_name.to_string(), var_value)
{
Ok(variable) => document.add_variable(variable),
Err(msg) => {
let m = matches.get(0).unwrap();
result.push(
Report::build(ReportKind::Error, token.source(), m.start())
.with_message("Unable to create variable")
.with_label(
Label::new((token.source(), m.start()+1 .. m.end() ))
.with_message(format!("Unable to create variable `{}`. {}",
var_name.fg(parser.colors().highlight),
msg))
.with_color(parser.colors().error))
.finish());
return result;
}
}
return result;
}
}
pub struct VariableSubstitutionRule
{
re: [Regex; 1],
}
impl VariableSubstitutionRule {
pub fn new() -> Self {
Self {
re: [Regex::new(r"%(.*?)%").unwrap()],
}
}
}
impl RegexRule for VariableSubstitutionRule
{
fn name(&self) -> &'static str { "Variable Substitution" }
fn regexes(&self) -> &[regex::Regex] { &self.re }
fn on_regex_match(&self, _index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>> {
let mut result = vec![];
let variable = match matches.get(1)
{
Some(name) => {
// Empty name
if name.as_str().is_empty()
{
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Empty variable name")
.with_label(
Label::new((token.source(), matches.get(0).unwrap().range()))
.with_message(format!("Missing variable name for substitution"))
.with_color(parser.colors().error))
.finish());
return result;
}
// Leading spaces
else if name.as_str().trim_start() != name.as_str()
{
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Invalid variable name")
.with_label(
Label::new((token.source(), name.range()))
.with_message(format!("Variable names contains leading spaces"))
.with_color(parser.colors().error))
.with_help("Remove leading spaces")
.finish());
return result;
}
// Trailing spaces
else if name.as_str().trim_end() != name.as_str()
{
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Invalid variable name")
.with_label(
Label::new((token.source(), name.range()))
.with_message(format!("Variable names contains trailing spaces"))
.with_color(parser.colors().error))
.with_help("Remove trailing spaces")
.finish());
return result;
}
// Invalid name
match VariableRule::validate_name(&parser.colors(), name.as_str())
{
Err(msg) =>
{
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Invalid variable name")
.with_label(
Label::new((token.source(), name.range()))
.with_message(msg)
.with_color(parser.colors().error))
.finish());
return result;
}
_ => {},
}
// Get variable
match document.get_variable(name.as_str())
{
None => {
result.push(
Report::build(ReportKind::Error, token.source(), name.start())
.with_message("Unknown variable name")
.with_label(
Label::new((token.source(), name.range()))
.with_message(format!("Unable to find variable with name: `{}`",
name.as_str().fg(parser.colors().highlight)))
.with_color(parser.colors().error))
.finish());
return result;
}
Some((_, var)) => var,
}
},
_ => panic!("Unknown error")
};
variable.parse(token, parser, document);
//let parsed = variable.parse(
// token,
// parser,
// document
//);
////document.merge(parsed, None);
//parsed.content.borrow_mut()
// .drain(..)
// .for_each(|elem| parser.push(document, elem));
//parser.push(document, )
// TODO: Full rework of document
// parser shound parse into previous document, and not into a new document
// This should prevent having to sue `recurse: bool` in the last_element getters
return result;
}
}

View file

@ -1,3 +0,0 @@
pub mod file;
pub mod cursor;
pub mod token;

View file

@ -1,28 +0,0 @@
use super::file::File;
#[derive(Debug)]
pub struct Cursor<'a>
{
pub file: &'a File,
pub content: String,
pub position: usize,
}
impl<'a> Cursor<'a>
{
pub fn new(_file: &'a File) -> Result<Cursor<'a>, std::io::Error>
{
let _content = match std::fs::read_to_string(&_file.path)
{
Ok(content) => content,
Err(error) => return Err(error),
};
Ok(Cursor
{
file: _file,
content: _content,
position: 0usize,
})
}
}

View file

@ -1,17 +0,0 @@
use std::path::Path;
#[derive(Debug)]
pub struct File
{
pub path: String,
}
impl File
{
pub fn new(_path: String) -> File
{
File {
path: _path,
}
}
}

View file

@ -1,30 +0,0 @@
use super::file::File;
use super::cursor::Cursor;
pub struct Token<'a>
{
file: &'a File,
start: usize,
len: usize,
}
impl<'a> Token<'a>
{
pub fn new(_file: &'a File, _start: usize, _len: usize) -> Token<'a>
{
Token {
file: _file,
start: _start,
len: _len,
}
}
pub fn from(cursor: &'a Cursor, mat: regex::Match<'a>) -> Token<'a>
{
Token {
file: cursor.file,
start: cursor.position,
len: mat.len(),
}
}
}

1
src/lsp/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod parser;

30
src/lsp/parser.rs Normal file
View file

@ -0,0 +1,30 @@
use std::{cell::RefCell, collections::HashMap};
use crate::{elements::registrar::register, lua::kernel::Kernel, parser::{rule::Rule, state::StateHolder}};
struct LSParser
{
rules: Vec<Box<dyn Rule>>,
// Parser state
pub state: RefCell<StateHolder>,
//pub kernels: RefCell<HashMap<String, Kernel>>,
}
impl LSParser {
pub fn default() -> Self
{
let mut parser = LSParser {
rules: vec![],
state: RefCell::new(StateHolder::new()),
//kernels: RefCell::new(HashMap::new()),
};
// TODO: Main kernel
//register(&mut parser);
parser
}
}

21
src/lua/kernel.rs Normal file
View file

@ -0,0 +1,21 @@
use std::cell::RefMut;
use mlua::Lua;
pub struct Kernel
{
pub lua: Lua,
}
impl Kernel {
pub fn new() -> Self {
Self { lua: Lua::new() }
}
}
pub trait KernelHolder
{
fn get_kernel(&self, name: &str) -> Option<RefMut<'_, Kernel>>;
fn insert_kernel(&self, name: String, kernel: Kernel) -> RefMut<'_, Kernel>;
}

1
src/lua/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod kernel;

View file

@ -1,110 +1,110 @@
#![feature(char_indices_offset)]
mod document;
mod compiler;
mod parser;
use self::parser::rule::SyntaxRule;
use self::parser::section::SectionRule;
mod files;
use self::files::file::File;
use self::files::cursor::Cursor;
mod syntax;
use syntax::element::Element;
use syntax::element::Text;
mod elements;
mod lua;
mod cache;
use std::{env, rc::Rc};
use compiler::compiler::Compiler;
use getopts::Options;
use parser::{langparser::LangParser, parser::Parser};
use crate::parser::source::SourceFile;
extern crate getopts;
fn print_usage(program: &str, opts: Options) {
let brief = format!("Usage: {} -i FILE [options]", program);
print!("{}", opts.usage(&brief));
}
fn print_version()
{
print!("NML -- Not a Markup Language
Copyright (c) 2024
NML is licensed under the GNU Affero General Public License version 3 (AGPLv3),
under the terms of the Free Software Foundation <https://www.gnu.org/licenses/agpl-3.0.en.html>.
This program is free software; you may modify and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
NML version: 0.4\n");
}
fn main() {
let file = File::new(String::from("./test.nml"));
let mut cursor = Cursor::new(&file).unwrap();
cursor.position = 5;
let args: Vec<String> = env::args().collect();
let program = args[0].clone();
let rule_se = SectionRule::new();
let (token, res) = rule_se.on_match(&cursor).unwrap();
println!("{}", res.elements.len());
let mut opts = Options::new();
opts.optopt("i", "", "Input file", "FILE");
opts.optopt("d", "database", "Cache database location", "PATH");
opts.optmulti("z", "debug", "Debug options", "OPTS");
opts.optflag("h", "help", "Print this help menu");
opts.optflag("v", "version", "Print program version and licenses");
/*
let re_sections = regex::Regex::new(r"(?:^|\n)(#{1,})(\*|\+)((?:\t| ){0,})(.*)").unwrap();
//let mut validators = Vec::<Box<dyn GroupValidator>>::new();
let f = File::new(Box::new(std::path::Path::new("./test.nml")));
let content = std::fs::read_to_string(*f.path).unwrap();
let grammar = vec![re_sections];
let mut positions = [0usize; 1];
let mut i = 0;
while i < content.len()
let matches = match opts.parse(&args[1..]) {
Ok(m) => { m }
Err(f) => { panic!("{}", f.to_string()) }
};
if matches.opt_present("v")
{
// Update every positions
for k in 0..grammar.len()
print_version();
return;
}
if matches.opt_present("h") {
print_usage(&program, opts);
return;
}
if !matches.opt_present("i") {
print_usage(&program, opts);
return;
}
let input = matches.opt_str("i").unwrap();
let debug_opts = matches.opt_strs("z");
let db_path = matches.opt_str("d");
let parser = LangParser::default();
// Parse
let source = SourceFile::new(input.to_string(), None).unwrap();
let doc = parser.parse(Rc::new(source), None);
if debug_opts.contains(&"ast".to_string())
{
let rule = &grammar[k];
let position = &mut positions[k];
if *position == std::usize::MAX { continue };
println!("-- BEGIN AST DEBUGGING --");
doc.content.borrow().iter().for_each(|elem| {
println!("{}", (elem).to_string())
});
println!("-- END AST DEBUGGING --");
}
match rule.find_at(&content, i)
if debug_opts.contains(&"ref".to_string())
{
Some(mat) => *position = mat.start(),
None => *position = std::usize::MAX,
println!("-- BEGIN REFERENCES DEBUGGING --");
let sc = doc.scope.borrow();
sc.referenceable.iter().for_each(|(name, pos)| {
println!(" - {name}: `{:#?}`", doc.content.borrow()[*pos]);
});
println!("-- END REFERENCES DEBUGGING --");
}
println!("{position}");
}
// Gets closest match
let mut next_position = std::usize::MAX;
let mut closest_match = std::usize::MAX;
for k in 0..grammar.len()
if debug_opts.contains(&"var".to_string())
{
if positions[k] >= next_position { continue; }
next_position = positions[k];
closest_match = k;
println!("-- BEGIN VARIABLES DEBUGGING --");
let sc = doc.scope.borrow();
sc.variables.iter().for_each(|(_name, var)| {
println!(" - `{:#?}`", var);
});
println!("-- END VARIABLES DEBUGGING --");
}
println!("Unmatched: {}", &content[i..next_position]);
// No matches left
if closest_match == std::usize::MAX
{
println!("Done");
break;
}
// Extract matches from rule
i = next_position; // Set to begining of match
let mat = &grammar[closest_match].captures_at(&content, i).unwrap(); // Capture match
for m in 0..mat.len()
{
match mat.get(m)
{
Some(s) => {
println!("Group {m}: `{}`", s.as_str());
},
None => println!("Group {m}: None"),
}
}
i += mat.get(0).unwrap().len(); // Add match length
println!("Left={}", &content[i..]);
println!("pos={i}");
let mut s = String::new();
std::io::stdin().read_line(&mut s).expect("Did not enter a correct string");
}
*/
/*
validators.push(Box::new(StringValidator::new("Depth".to_string(), |_group| -> ValidationStatus {
ValidationStatus::Ok()
})));
validators.push(Box::new(StringValidator::new("Index Type".to_string(), |group| -> ValidationStatus {
match group
{
"" => ValidationStatus::Ok(),
"*" => ValidationStatus::Ok(),
_ => ValidationStatus::Error("")
}
ValidationStatus::Ok()
})));
*/
//let _sec_rule = SyntaxRule::new("Section".to_string(), r"(?m)(?:^|\n)(#{1,})(\\*|\\+)((?:\t| ){0,})(.*)", validators).unwrap();
let compiler = Compiler::new(compiler::compiler::Target::HTML, db_path);
let out = compiler.compile(&doc);
std::fs::write("a.html", out).unwrap();
}

View file

@ -1,3 +0,0 @@
pub mod rule;
pub mod section;

277
src/parser/langparser.rs Normal file
View file

@ -0,0 +1,277 @@
use std::{cell::{RefCell, RefMut}, collections::{HashMap, HashSet}, ops::Range, rc::Rc};
use ariadne::{Label, Report};
use crate::{document::{document::Document, element::{ElemKind, Element, Text}}, elements::{paragraph::Paragraph, registrar::register}, lua::kernel::{Kernel, KernelHolder}, parser::source::{SourceFile, VirtualSource}};
use super::{parser::{Parser, ReportColors}, rule::Rule, source::{Cursor, Source, Token}, state::StateHolder, util};
/// Parser for the language
pub struct LangParser
{
rules: Vec<Box<dyn Rule>>,
colors: ReportColors,
// Parser state
pub err_flag: RefCell<bool>,
pub state: RefCell<StateHolder>,
pub kernels: RefCell<HashMap<String, Kernel>>,
}
impl LangParser
{
pub fn default() -> Self
{
let mut s = Self {
rules: vec![],
colors: ReportColors::with_colors(),
err_flag: RefCell::new(false),
state: RefCell::new(StateHolder::new()),
kernels: RefCell::new(HashMap::new()),
};
register(&mut s);
s.kernels.borrow_mut()
.insert("main".to_string(), Kernel::new());
s
}
fn handle_reports<'a>(&self, _source: Rc<dyn Source>, reports: Vec<Report<'a, (Rc<dyn Source>, Range<usize>)>>)
{
for mut report in reports
{
let mut sources: HashSet<Rc<dyn Source>> = HashSet::new();
fn recurse_source(sources: &mut HashSet<Rc<dyn Source>>, source: Rc<dyn Source>) {
sources.insert(source.clone());
match source.location()
{
Some(parent) => {
let parent_source = parent.source();
if sources.get(&parent_source).is_none()
{
recurse_source(sources, parent_source);
}
}
None => {}
}
}
report.labels.iter().for_each(|label| {
recurse_source(&mut sources, label.span.0.clone());
});
let cache = sources.iter()
.map(|source| (source.clone(), source.content().clone()))
.collect::<Vec<(Rc<dyn Source>, String)>>();
cache.iter()
.for_each(|(source, _)| {
if let Some (location) = source.location()
{
if let Some(_s) = source.downcast_ref::<SourceFile>()
{
report.labels.push(
Label::new((location.source(), location.start()+1 .. location.end()))
.with_message("In file included from here")
.with_order(-1)
);
};
if let Some(_s) = source.downcast_ref::<VirtualSource>()
{
let start = location.start() + (location.source().content().as_bytes()[location.start()] == '\n' as u8)
.then_some(1)
.unwrap_or(0);
report.labels.push(
Label::new((location.source(), start .. location.end()))
.with_message("In evaluation of")
.with_order(-1)
);
};
}
});
report.eprint(ariadne::sources(cache)).unwrap()
}
}
}
impl Parser for LangParser
{
fn colors(&self) -> &ReportColors { &self.colors }
fn rules(&self) -> &Vec<Box<dyn Rule>> { &self.rules }
fn add_rule(&mut self, rule: Box<dyn Rule>, after: Option<&'static str>)
{
// Error on duplicate rule
let rule_name = (*rule).name();
self.rules.iter().for_each(|rule| {
if (*rule).name() != rule_name { return; }
panic!("Attempted to introduce duplicate rule: `{rule_name}`");
});
match after
{
Some(name) => {
let before = self.rules.iter()
.enumerate()
.find(|(_pos, r)| (r).name() == name);
match before
{
Some((pos, _)) => self.rules.insert(pos+1, rule),
_ => panic!("Unable to find rule named `{name}`, to insert rule `{}` after it", rule.name())
}
}
_ => self.rules.push(rule)
}
}
fn state(&self) -> std::cell::Ref<'_, StateHolder> { self.state.borrow() }
fn state_mut(&self) -> std::cell::RefMut<'_, StateHolder> { self.state.borrow_mut() }
/// Add an [`Element`] to the [`Document`]
fn push<'a>(&self, doc: &'a Document<'a>, elem: Box<dyn Element>)
{
if elem.kind() == ElemKind::Inline || elem.kind() == ElemKind::Invisible
{
let mut paragraph = doc.last_element_mut::<Paragraph>(false)
.or_else(|| {
doc.push(Box::new(Paragraph::new(elem.location().clone())));
doc.last_element_mut::<Paragraph>(false)
}).unwrap();
paragraph.push(elem);
}
else
{
// Process paragraph events
if doc.last_element_mut::<Paragraph>(false)
.is_some_and(|_| true)
{
self.handle_reports(doc.source(),
self.state_mut().on_scope_end(self, &doc, super::state::Scope::PARAGRAPH));
}
doc.push(elem);
}
}
fn parse<'a>(&self, source: Rc<dyn Source>, parent: Option<&'a Document<'a>>) -> Document<'a>
{
let doc = Document::new(source.clone(), parent);
let mut matches = Vec::new();
for _ in 0..self.rules.len() {
matches.push((0usize, None));
}
let content = source.content();
let mut cursor = Cursor::new(0usize, doc.source()); // Cursor in file
if parent.is_some() // Terminate parent's paragraph state
{
self.handle_reports(parent.as_ref().unwrap().source(),
self.state_mut().on_scope_end(self, parent.as_ref().unwrap(), super::state::Scope::PARAGRAPH));
}
loop
{
let (rule_pos, rule, match_data) = self.update_matches(&cursor, &mut matches);
// Unmatched content
let text_content = util::process_text(&doc, &content.as_str()[cursor.pos..rule_pos.pos]);
if !text_content.is_empty()
{
self.push(&doc, Box::new(Text::new(
Token::new(cursor.pos..rule_pos.pos, source.clone()),
text_content
)));
}
if let Some(rule) = rule
{
// Rule callback
let (new_cursor, reports) = (*rule).on_match(self, &doc, rule_pos, match_data);
self.handle_reports(doc.source(), reports);
// Advance
cursor = new_cursor;
}
else // No rules left
{
break;
}
}
// State
self.handle_reports(doc.source(),
self.state_mut().on_scope_end(self, &doc, super::state::Scope::DOCUMENT));
return doc;
}
fn parse_into<'a>(&self, source: Rc<dyn Source>, document: &'a Document<'a>)
{
let mut matches = Vec::new();
for _ in 0..self.rules.len() {
matches.push((0usize, None));
}
let content = source.content();
let mut cursor = Cursor::new(0usize, source.clone());
loop
{
let (rule_pos, rule, match_data) = self.update_matches(&cursor, &mut matches);
// Unmatched content
let text_content = util::process_text(&document, &content.as_str()[cursor.pos..rule_pos.pos]);
if !text_content.is_empty()
{
self.push(&document, Box::new(Text::new(
Token::new(cursor.pos..rule_pos.pos, source.clone()),
text_content
)));
}
if let Some(rule) = rule
{
// Rule callback
let (new_cursor, reports) = (*rule).on_match(self, &document, rule_pos, match_data);
self.handle_reports(document.source(), reports);
// Advance
cursor = new_cursor;
}
else // No rules left
{
break;
}
}
// State
//self.handle_reports(source.clone(),
// self.state_mut().on_scope_end(&self, &document, super::state::Scope::DOCUMENT));
//return doc;
}
}
impl KernelHolder for LangParser
{
fn get_kernel(&self, name: &str)
-> Option<RefMut<'_, Kernel>> {
RefMut::filter_map(self.kernels.borrow_mut(),
|map| map.get_mut(name)).ok()
}
fn insert_kernel(&self, name: String, kernel: Kernel)
-> RefMut<'_, Kernel> {
//TODO do not get
self.kernels.borrow_mut()
.insert(name.clone(), kernel);
self.get_kernel(name.as_str()).unwrap()
}
}

6
src/parser/mod.rs Normal file
View file

@ -0,0 +1,6 @@
pub mod source;
pub mod parser;
pub mod langparser;
pub mod rule;
pub mod state;
pub mod util;

126
src/parser/parser.rs Normal file
View file

@ -0,0 +1,126 @@
use std::any::Any;
use std::cell::{Ref, RefCell, RefMut};
use std::collections::{HashMap, HashSet};
use std::ops::Range;
use std::rc::Rc;
use unicode_segmentation::UnicodeSegmentation;
use super::rule::Rule;
use super::source::{Cursor, Source};
use super::state::StateHolder;
use crate::document::document::Document;
use crate::document::element::Element;
use ariadne::Color;
use crate::lua::kernel::{Kernel, KernelHolder};
pub struct ReportColors
{
pub error: Color,
pub warning: Color,
pub info: Color,
pub highlight: Color,
}
impl ReportColors {
pub fn with_colors() -> Self {
Self {
error: Color::Red,
warning: Color::Yellow,
info: Color::BrightBlue,
highlight: Color::BrightMagenta,
}
}
pub fn without_colors() -> Self {
Self {
error: Color::Primary,
warning: Color::Primary,
info: Color::Primary,
highlight: Color::Primary,
}
}
}
pub trait Parser: KernelHolder
{
/// Gets the colors for formatting errors
///
/// When colors are disabled, all colors should resolve to empty string
fn colors(&self) -> &ReportColors;
fn rules(&self) -> &Vec<Box<dyn Rule>>;
fn add_rule(&mut self, rule: Box<dyn Rule>, after: Option<&'static str>);
fn state(&self) -> Ref<'_, StateHolder>;
fn state_mut(&self) -> RefMut<'_, StateHolder>;
// Update [`matches`] and returns the position of the next matched rule.
// If rule is empty, it means that there are no rules left to parse (i.e
// end of document).
fn update_matches(&self, cursor: &Cursor, matches: &mut Vec<(usize, Option<Box<dyn Any>>)>)
-> (Cursor, Option<&Box<dyn Rule>>, Option<Box<dyn Any>>)
{
// Update matches
// TODO: Trivially parellalizable
self.rules().iter().zip(matches.iter_mut()).for_each(
|(rule, (matched_at, match_data))| {
// Don't upate if not stepped over yet
if *matched_at > cursor.pos { return }
(*matched_at, *match_data) = match rule.next_match(cursor)
{
None => (usize::MAX, None),
Some((mut pos, mut data)) =>
{
// Check if escaped
while pos != usize::MAX
{
let content = cursor.source.content().as_str();
let mut graphemes = content[0 .. pos].graphemes(true);
let mut escaped = false;
'inner: loop
{
let g = graphemes.next_back();
if !g.is_some() || g.unwrap() != "\\" { break 'inner; }
escaped = !escaped;
}
if !escaped { break; }
// Find next potential match
(pos, data) = match rule.next_match(&cursor.at(pos+1)) {
Some((new_pos, new_data)) => (new_pos, new_data),
None => (usize::MAX, data) // Stop iterating
}
}
(pos, (pos != usize::MAX).then_some(data))
}
}
});
// Get winning match
let (winner, (next_pos, _match_data)) = matches.iter()
.enumerate()
.min_by_key(|(_, (pos, _match_data))| pos).unwrap();
if *next_pos == usize::MAX // No rule has matched
{
let content = cursor.source.content();
// No winners, i.e no matches left
return (cursor.at(content.len()), None, None);
}
(cursor.at(*next_pos),
Some(&self.rules()[winner]),
std::mem::replace(&mut matches[winner].1, None))
}
/// Add an [`Element`] to the [`Document`]
fn push<'a>(&self, doc: &'a Document<'a>, elem: Box<dyn Element>);
/// Parse [`Source`] into a new [`Document`]
fn parse<'a>(&self, source: Rc<dyn Source>, parent: Option<&'a Document<'a>>) -> Document<'a>;
/// Parse [`Source`] into an already existing [`Document`]
fn parse_into<'a>(&self, source: Rc<dyn Source>, document: &'a Document<'a>);
}

View file

@ -1,51 +1,103 @@
use regex::Captures;
use super::super::syntax::element::Element;
use super::super::files::cursor::Cursor;
use super::super::files::token::Token;
use super::parser::Parser;
use super::source::{Cursor, Source, Token};
use ariadne::Report;
use crate::document::document::Document;
pub struct RuleResult
{
length: usize,
pub elements: Vec<Box<dyn Element>>,
use std::any::Any;
use std::ops::Range;
use std::rc::Rc;
pub trait Rule {
/// Returns rule's name
fn name(&self) -> &'static str;
/// Finds the next match starting from [`cursor`]
fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box<dyn Any>)>;
/// Callback when rule matches
fn on_match(&self, parser: &dyn Parser, document: &Document, cursor: Cursor, match_data: Option<Box<dyn Any>>) -> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>);
}
impl RuleResult
{
pub fn new(_length: usize, elem: Box<dyn Element>) -> RuleResult
{
RuleResult
{
length: _length,
elements: vec![elem],
}
}
}
#[derive(Debug)]
pub struct RuleError<'a>
{
// where: token
cursor: &'a Cursor<'a>,
mat: Option<regex::Match<'a>>,
message: String,
}
impl<'a> RuleError<'a>
{
pub fn new(_cursor: &'a Cursor<'a>, _match: Option<regex::Match<'a>>, _message: String) -> RuleError<'a>
{
RuleError
{
cursor: _cursor,
mat: _match,
message: _message,
}
}
}
pub trait SyntaxRule
/*
pub trait RegexRule: Rule
{
fn name(&self) -> &'static str;
fn next_match<'a>(&self, cursor: &'a Cursor) -> Option<usize>;
fn on_match<'a>(&self, cursor: &'a Cursor) -> Result<(Token<'a>, RuleResult), RuleError<'a>>;
/// Returns the rule's regex
fn regex(&self) -> &regex::Regex;
/// Callback on regex rule match
fn on_regex_match<'a>(&self, parser: &Parser, document: &Document, token: Token<'a>, matches: regex::Captures) -> Vec<Report<'a, (String, Range<usize>)>>;
}
impl<T: RegexRule> Rule for T {
fn name(&self) -> &'static str { RegexRule::name(self) }
/// Finds the next match starting from [`cursor`]
fn next_match<'a>(&self, cursor: &'a Cursor) -> Option<usize>
{
let re = self.regex();
let content = cursor.file.content.as_ref().unwrap();
match re.find_at(content.as_str(), cursor.pos)
{
Some(m) => Some(m.start()),
None => None,
}
}
fn on_match<'a>(&self, parser: &Parser, document: &Document, cursor: Cursor<'a>) -> (Cursor<'a>, Vec<Report<'a, (String, Range<usize>)>>)
{
let content = cursor.file.content.as_ref().unwrap();
let matches = self.regex().captures_at(content.as_str(), cursor.pos).unwrap();
let token = Token::new(cursor.pos, matches.get(0).unwrap().len(), cursor.file);
let token_end = token.end();
(cursor.at(token_end), self.on_regex_match(parser, document, token, matches))
}
}
*/
pub trait RegexRule
{
fn name(&self) -> &'static str;
/// Returns the rule's regexes
fn regexes(&self) -> &[regex::Regex];
/// Callback on regex rule match
fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>;
}
impl<T: RegexRule> Rule for T {
fn name(&self) -> &'static str { RegexRule::name(self) }
/// Finds the next match starting from [`cursor`]
fn next_match(&self, cursor: &Cursor)
-> Option<(usize, Box<dyn Any>)> {
let content = cursor.source.content();
let mut found: Option<(usize, usize)> = None;
self.regexes().iter().enumerate().for_each(|(id, re)| {
if let Some(m) = re.find_at(content.as_str(), cursor.pos)
{
found = found
.and_then(|(f_pos, f_id)|
if f_pos > m.start() { Some((m.start(), id)) } else { Some((f_pos, f_id)) } )
.or(Some((m.start(), id)));
}
});
return found.map(|(pos, id)|
(pos, Box::new(id) as Box<dyn Any>));
}
fn on_match(&self, parser: &dyn Parser, document: &Document, cursor: Cursor, match_data: Option<Box<dyn Any>>)
-> (Cursor, Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>) {
let content = cursor.source.content();
let index = unsafe { match_data.unwrap_unchecked().downcast::<usize>().unwrap_unchecked() };
let re = &self.regexes()[*index];
let captures = re.captures_at(content.as_str(), cursor.pos).unwrap();
let token = Token::new(captures.get(0).unwrap().range(), cursor.source.clone());
let token_end = token.end();
return (cursor.at(token_end), self.on_regex_match(*index, parser, document, token, captures));
}
}

View file

@ -1,151 +0,0 @@
use regex::Regex;
use super::rule::{RuleResult, RuleError, SyntaxRule};
use super::super::files::cursor::Cursor;
use super::super::files::token::Token;
use super::super::syntax::element::{Element, ReferenceableElement};
pub mod SectionKind
{
pub const NONE : u8 = 0x00;
pub const NO_TOC : u8 = 0x01;
pub const NO_NUMBER : u8 = 0x02;
}
pub struct Section
{
title: String,
reference: Option<String>,
section_kind: u8,
depth: usize,
}
impl Section
{
pub fn new<'h>(_title: String, _reference: Option<String>, kind: u8, _depth: usize) -> Section
{
Section {
title: _title,
reference: _reference,
section_kind: kind,
depth: _depth,
}
}
}
impl Element for Section
{
fn element_name(&self) -> &'static str { "Section" }
}
impl ReferenceableElement for Section
{
fn reference_name(&self) -> Option<&String> { self.reference.as_ref() }
}
// TODO: Single file for grammar + element, and add `Rule` suffix for rules
pub struct SectionRule
{
regex: Regex,
}
impl SectionRule
{
pub fn new() -> SectionRule
{
SectionRule
{
regex: regex::Regex::new(r"(?:^|\n)(#{1,})(\{.*\})?((?:\*|\+){0,})?((?:\t| ){0,})(.*)").unwrap()
}
}
}
impl SyntaxRule for SectionRule
{
fn name(&self) -> &'static str { "Section" }
fn next_match<'a>(&self, cursor: &'a Cursor) -> Option<usize>
{
match self.regex.find_at(&cursor.content, cursor.position)
{
Some(m) => Some(m.start()),
None => None
}
}
fn on_match<'a>(&self, cursor: &'a Cursor) -> Result<(Token<'a>, RuleResult), RuleError<'a>>
{
let m = self.regex.captures_at(&cursor.content, cursor.position).unwrap(); // Capture match
let section_depth = match m.get(1)
{
Some(depth) => {
if depth.len() > 6
{
return Err(RuleError::new(&cursor, m.get(1),
format!("Section depth must not be greater than 6, got `{}` (depth: {})", depth.as_str(), depth.len())))
}
depth.len()
}
_ => return Err(RuleError::new(&cursor, m.get(1), String::from("Empty section depth")))
};
// Spacing
match m.get(4)
{
Some(spacing) => {
if spacing.as_str().is_empty() || !spacing.as_str().chars().all(|c| c == ' ' || c == '\t')
{
return Err(RuleError::new(&cursor, m.get(4),
format!("Sections require spacing made of spaces or tab before the section's title, got: `{}`", spacing.as_str())))
}
}
_ => return Err(RuleError::new(&cursor, m.get(4),
String::from("Sections require spacing made of spaces or tab before the section's title")))
}
let section_refname = match m.get(2)
{
Some(reference) => {
// TODO: Validate reference name
// TODO: After parsing, check for duplicate references
Some(String::from(reference.as_str()))
}
_ => None
};
let section_kind = match m.get(3)
{
Some(kind) => {
match kind.as_str() {
"*+" => SectionKind::NO_NUMBER | SectionKind::NO_TOC,
"*" => SectionKind::NO_NUMBER,
"+" => SectionKind::NO_TOC,
"" => SectionKind::NONE,
_ => return Err(RuleError::new(&cursor, m.get(3),
format!("Section kind must be either `*` for unnumbered, `+` to hide from TOC or `*+`, got `{}`. Leave empty for normal sections", kind.as_str())))
}
}
_ => SectionKind::NONE,
};
let section_title = match m.get(5) {
Some(title) => match title.as_str() {
"" => return Err(RuleError::new(&cursor, m.get(5),
String::from("Sections require a non-empty title"))),
_ => String::from(title.as_str())
}
_ => return Err(RuleError::new(&cursor, m.get(5),
String::from("Sections require a non-empty title")))
};
let section = Box::new(Section::new(
section_title,
section_refname,
section_kind,
section_depth));
Ok((Token::from(cursor, m.get(0).unwrap()), RuleResult::new(m.get(0).unwrap().len(), section)))
}
}

177
src/parser/source.rs Normal file
View file

@ -0,0 +1,177 @@
use std::{fs, ops::Range, rc::Rc};
use core::fmt::Debug;
use downcast_rs::{impl_downcast, Downcast};
/// Trait for source content
pub trait Source: Downcast
{
/// Gets the source's location
fn location(&self) -> Option<&Token>;
/// Gets the source's name
fn name(&self) -> &String;
/// Gets the source's content
fn content(&self) -> &String;
}
impl_downcast!(Source);
impl core::fmt::Display for dyn Source
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name())
}
}
impl core::fmt::Debug for dyn Source
{
// TODO
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Source{{{}}}", self.name())
}
}
impl std::cmp::PartialEq for dyn Source
{
fn eq(&self, other: &Self) -> bool {
self.name() == other.name()
}
}
impl std::cmp::Eq for dyn Source {}
impl std::hash::Hash for dyn Source
{
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.name().hash(state)
}
}
pub struct SourceFile
{
location: Option<Token>,
path: String,
content: String,
}
impl SourceFile
{
// TODO: Create a SourceFileRegistry holding already loaded files to avoid reloading them
pub fn new(path: String, location: Option<Token>) -> Result<Self, String>
{
match fs::read_to_string(&path)
{
Err(_) => return Err(String::from(format!("Unable to read file content: `{}`", path))),
Ok(content) => Ok(Self{
location,
path,
content,
}),
}
}
}
impl Source for SourceFile
{
fn location(&self) -> Option<&Token> { self.location.as_ref() }
fn name(&self) -> &String { &self.path }
fn content(&self) -> &String { &self.content }
}
pub struct VirtualSource
{
location: Token,
name: String,
content: String,
}
impl VirtualSource
{
pub fn new(location: Token, name: String, content: String) -> Self
{
Self { location, name, content }
}
}
impl Source for VirtualSource
{
fn location(&self) -> Option<&Token> { Some(&self.location) }
fn name(&self) -> &String { &self.name }
fn content(&self) -> &String { &self.content }
}
#[derive(Debug)]
pub struct Cursor
{
pub pos: usize,
pub source: Rc<dyn Source>,
}
impl Cursor {
pub fn new(pos: usize, source: Rc<dyn Source>) -> Self {
Self { pos, source }
}
/// Creates [`cursor`] at [`new_pos`] in the same [`file`]
pub fn at(&self, new_pos: usize) -> Self
{
Self {
pos: new_pos,
source: self.source.clone(),
}
}
}
impl Clone for Cursor
{
fn clone(&self) -> Self {
Self {
pos: self.pos,
source: self.source.clone(),
}
}
fn clone_from(&mut self, source: &Self) {
*self = source.clone()
}
}
#[derive(Debug, Clone)]
pub struct Token
{
pub range: Range<usize>,
source: Rc<dyn Source>,
}
impl Token
{
pub fn new(range: Range<usize>, source: Rc<dyn Source>) -> Self {
Self { range, source }
}
pub fn source(&self) -> Rc<dyn Source>
{
return self.source.clone()
}
/// Construct Token from a range
pub fn from(start: &Cursor, end: &Cursor) -> Self
{
assert!(Rc::ptr_eq(&start.source, &end.source));
Self {
range: start.pos .. end.pos,
source: start.source.clone()
}
}
pub fn start(&self) -> usize
{
return self.range.start;
}
pub fn end(&self) -> usize
{
return self.range.end;
}
}

87
src/parser/state.rs Normal file
View file

@ -0,0 +1,87 @@
use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
use ariadne::Report;
use downcast_rs::{impl_downcast, Downcast};
use crate::document::document::Document;
use super::{parser::Parser, source::Source};
/// Scope for state objects
#[derive(PartialEq, PartialOrd)]
pub enum Scope
{
/// Global state
GLOBAL = 0,
/// Document-local state
DOCUMENT = 1,
/// Paragraph-local state
/// NOTE: Even though paragraph may span across multiple documents,
/// a paragraph-local state should be removed when importing a new document
PARAGRAPH = 2,
}
pub trait State: Downcast
{
/// Returns the state's [`Scope`]
fn scope(&self) -> Scope;
/// Callback called when state goes out of scope
fn on_remove<'a>(&self, parser: &dyn Parser, document: &Document) -> Vec<Report<'a, (Rc<dyn Source>, Range<usize>)>>;
}
impl_downcast!(State);
/// Object owning all the states
pub struct StateHolder
{
data: HashMap<String, Rc<RefCell<dyn State>>>
}
impl StateHolder
{
pub fn new() -> Self {
Self {
data: HashMap::new(),
}
}
// Attempts to push [`state`]. On collision, returns an error with the already present state
pub fn insert(&mut self, name: String, state: Rc<RefCell<dyn State>>) -> Result<Rc<RefCell<dyn State>>, Rc<RefCell<dyn State>>>
{
match self.data.insert(name, state.clone())
{
Some(state) => Err(state),
_ => Ok(state)
}
}
pub fn query(&self, name: &String) -> Option<Rc<RefCell<dyn State>>>
{
self.data
.get(name)
.map_or(None, |st| Some(st.clone()))
}
pub fn on_scope_end(&mut self, parser: &dyn Parser, document: &Document, scope: Scope) -> Vec<Report<'_, (Rc<dyn Source>, Range<usize>)>>
{
let mut result = vec![];
self.data
.retain(|_name, state|
{
if state.borrow().scope() >= scope
{
state.borrow().on_remove(parser, document)
.drain(..)
.for_each(|report| result.push(report));
false
}
else
{
true
}
});
return result;
}
}

343
src/parser/util.rs Normal file
View file

@ -0,0 +1,343 @@
use std::collections::HashMap;
use unicode_segmentation::UnicodeSegmentation;
use crate::{document::{document::Document, element::ElemKind}, elements::paragraph::Paragraph};
/// Processes text for escape characters and paragraphing
pub fn process_text(document: &Document, content: &str) -> String
{
let mut escaped = false;
let mut newlines = 0usize; // Consecutive newlines
//println!("Processing: [{content}]");
let processed = content
.grapheme_indices(true)
.fold((String::new(), None),
|(mut out, prev), (_pos, g)| {
if newlines != 0 && g != "\n"
{
newlines = 0;
// Add a whitespace if necessary
match out.chars().last()
{
Some(c) => {
// NOTE: \n is considered whitespace, so previous codepoint can be \n
// (Which can only be done by escaping it)
if !c.is_whitespace() || c == '\n'
{
out += " ";
}
}
None => {
if document.last_element::<Paragraph>(false)
.and_then(|par| par.find_back(|e| e.kind() != ElemKind::Invisible)
.and_then(|e| Some(e.kind() == ElemKind::Inline)))
.unwrap_or(false)
{
out += " ";
}
} // Don't output anything
}
}
// Output grapheme literally when escaped
if escaped
{
escaped = false;
return (out + g, Some(g));
}
// Increment newlines counter
else if g == "\n"
{
newlines += 1;
return (out, Some(g));
}
// Determine if escaped
else if g == "\\"
{
escaped = !escaped;
return (out, Some(g));
}
// Whitespaces
else if g.chars().count() == 1 && g.chars().last().unwrap().is_whitespace()
{
// Content begins with whitespace
if prev.is_none()
{
if document.last_element::<Paragraph>(false).is_some()
{
return (out+g, Some(g));
}
else
{
return (out, Some(g));
}
}
// Consecutive whitespaces are converted to a single whitespace
else if prev.unwrap().chars().count() == 1 &&
prev.unwrap().chars().last().unwrap().is_whitespace()
{
return (out, Some(g));
}
}
return (out + g, Some(g));
}).0.to_string();
return processed;
}
/// Processed a string and escapes a single token out of it
/// Escaped characters other than the [`token`] will be not be treated as escaped
///
/// # Example
/// ```
/// assert_eq!(process_escaped('\\', "%", "escaped: \\%, also escaped: \\\\\\%, untouched: \\a"),
/// "escaped: %, also escaped: \\%, untouched \\a");
/// ```
pub fn process_escaped<S: AsRef<str>>(escape: char, token: &'static str, content: S) -> String
{
let mut processed = String::new();
let mut escaped = 0;
let mut token_it = token.chars().peekable();
for c in content.as_ref().chars()
.as_str()
.trim_start()
.trim_end()
.chars()
{
if c == escape
{
escaped += 1;
}
else if escaped % 2 == 1 && token_it.peek().map_or(false, |p| *p == c)
{
let _ = token_it.next();
if token_it.peek() == None
{
(0..((escaped-1)/2))
.for_each(|_| processed.push(escape));
escaped = 0;
token_it = token.chars().peekable();
processed.push_str(token);
}
}
else
{
if escaped != 0
{
// Add untouched escapes
(0..escaped).for_each(|_| processed.push('\\'));
token_it = token.chars().peekable();
escaped = 0;
}
processed.push(c);
}
}
// Add trailing escapes
(0..escaped).for_each(|_| processed.push('\\'));
processed
}
#[derive(Debug)]
pub struct Property
{
required: bool,
description: String,
default: Option<String>,
}
impl Property {
pub fn new(required: bool, description: String, default: Option<String>) -> Self {
Self { required, description, default }
}
}
impl core::fmt::Display for Property
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.default.as_ref()
{
None => write!(f, "{} {}",
["[Opt]", "[Req]"][self.required as usize],
self.description),
Some(default) => write!(f, "{} {} (Deafult: {})",
["[Opt]", "[Req]"][self.required as usize],
self.description,
default)
}
}
}
#[derive(Debug)]
pub struct PropertyMap<'a>
{
pub(crate) properties: HashMap<String, (&'a Property, String)>
}
impl<'a> PropertyMap<'a> {
pub fn new() -> Self {
Self { properties: HashMap::new() }
}
pub fn get<T, Error, F: FnOnce(&'a Property, &String) -> Result<T, Error>>(&self, name: &str, f: F)
-> Result<(&'a Property, T), Error> {
let (prop, value) = self.properties.get(name).unwrap();
f(prop, value).and_then(|value| Ok((*prop, value)))
}
}
pub struct PropertyParser {
properties: HashMap<String, Property>,
}
impl PropertyParser {
pub fn new(properties: HashMap<String, Property>) -> Self {
Self { properties }
}
/// Attempts to build a default propertymap
///
/// Returns an error if at least one [`Property`] is required and doesn't provide a default
pub fn default(&self) -> Result<PropertyMap<'_>, String> {
let mut properties = PropertyMap::new();
for (name, prop) in &self.properties
{
match (prop.required, prop.default.as_ref())
{
(true, None) => return Err(format!("Missing property `{name}` {prop}")),
(false, None) => {},
(_, Some(default)) => {
properties.properties.insert(
name.clone(),
(prop, default.clone())
);
}
}
}
Ok(properties)
}
/// Parses properties string "prop1=value1, prop2 = val\,2" -> {prop1: value1, prop2: val,2}
///
/// # Key-value pair
///
/// Property names/values are separated by a single '=' that cannot be escaped.
/// Therefore names cannot contain the '=' character.
///
/// # Example
///
/// ```
/// let properties = HashMap::new();
/// properties.insert("width", Property::new(true, "Width of the element in em", None));
///
/// let parser = PropertyParser::new(properties);
/// let pm = parser.parse("width=15").unwrap();
///
/// assert!(pm.get("width", |_, val| val.parse::<i32>()) == Ok(15));
/// ```
/// # Return value
///
/// Returns the parsed property map, or an error if either:
/// * A required property is missing
/// * An unknown property is present
/// * A duplicate property is present
///
/// Note: Only ',' inside values can be escaped, other '\' are treated literally
pub fn parse(&self, content: &str) -> Result<PropertyMap<'_>, String> {
let mut properties = PropertyMap::new();
let mut try_insert = |name: &String, value: &String|
-> Result<(), String> {
let trimmed_name = name.trim_end().trim_start();
let trimmed_value = value.trim_end().trim_start();
let prop = match self.properties.get(trimmed_name)
{
None => return Err(format!("Unknown property name: `{trimmed_name}` (with value: `{trimmed_value}`). Valid properties are:\n{}",
self.properties.iter().fold(String::new(),
|out, (name, prop)| out + format!(" - {name}: {prop}\n").as_str()))),
Some(prop) => prop
};
if let Some((_, previous)) = properties.properties.insert(
trimmed_name.to_string(),
(prop, trimmed_value.to_string()))
{
return Err(format!("Duplicate property `{trimmed_name}`, previous value: `{previous}` current value: `{trimmed_value}`"))
}
Ok(())
};
let mut in_name = true;
let mut name = String::new();
let mut value = String::new();
let mut escaped = 0usize;
for c in content.chars()
{
if c == '\\'
{
escaped += 1;
}
else if c == '=' && in_name
{
in_name = false;
(0..escaped).for_each(|_| name.push('\\'));
escaped = 0;
}
else if c == ',' && !in_name
{
if escaped % 2 == 0 // Not escaped
{
(0..escaped/2).for_each(|_| value.push('\\'));
escaped = 0;
in_name = true;
if let Err(e) = try_insert(&name, &value) {
return Err(e)
}
name.clear();
value.clear();
}
else
{
(0..(escaped-1)/2).for_each(|_| value.push('\\'));
value.push(',');
escaped = 0;
}
}
else
{
if in_name {
(0..escaped).for_each(|_| name.push('\\'));
name.push(c)
}
else {
(0..escaped).for_each(|_| value.push('\\'));
value.push(c)
}
escaped = 0;
}
}
if !in_name && value.trim_end().trim_start().is_empty()
{
return Err("Expected a value after last `=`".to_string())
}
else if name.is_empty() || value.is_empty()
{
return Err("Expected non empty property list.".to_string());
}
if let Err(e) = try_insert(&name, &value) {
return Err(e)
}
// TODO: Missing properties
Ok(properties)
}
}

236
src/server.rs Normal file
View file

@ -0,0 +1,236 @@
#![feature(char_indices_offset)]
mod document;
mod compiler;
mod parser;
mod elements;
mod lua;
mod cache;
mod lsp;
use std::collections::HashMap;
use std::rc::Rc;
use std::sync::Arc;
use dashmap::DashMap;
use document::variable::Variable;
use tower_lsp::jsonrpc::Result;
use tower_lsp::lsp_types::*;
use tower_lsp::{Client, LanguageServer, LspService, Server};
#[derive(Debug)]
struct Backend {
client: Client,
document_map: DashMap<String, String>,
//variables: DashMap<String, HashMap<String, Arc<dyn Variable + Send + Sync + 'static>>>,
}
#[derive(Debug)]
struct TextDocumentItem {
uri: Url,
text: String,
version: i32,
}
impl Backend {
async fn on_change(&self, params: TextDocumentItem) {
self.document_map
.insert(params.uri.to_string(), params.text.clone());
let ParserResult {
ast,
parse_errors,
semantic_tokens,
} = parse(&params.text);
let diagnostics = parse_errors
.into_iter()
.filter_map(|item| {
let (message, span) = match item.reason() {
chumsky::error::SimpleReason::Unclosed { span, delimiter } => {
(format!("Unclosed delimiter {}", delimiter), span.clone())
}
chumsky::error::SimpleReason::Unexpected => (
format!(
"{}, expected {}",
if item.found().is_some() {
"Unexpected token in input"
} else {
"Unexpected end of input"
},
if item.expected().len() == 0 {
"something else".to_string()
} else {
item.expected()
.map(|expected| match expected {
Some(expected) => expected.to_string(),
None => "end of input".to_string(),
})
.collect::<Vec<_>>()
.join(", ")
}
),
item.span(),
),
chumsky::error::SimpleReason::Custom(msg) => (msg.to_string(), item.span()),
};
|| -> Option<Diagnostic> {
// let start_line = rope.try_char_to_line(span.start)?;
// let first_char = rope.try_line_to_char(start_line)?;
// let start_column = span.start - first_char;
let start_position = offset_to_position(span.start, &rope)?;
let end_position = offset_to_position(span.end, &rope)?;
// let end_line = rope.try_char_to_line(span.end)?;
// let first_char = rope.try_line_to_char(end_line)?;
// let end_column = span.end - first_char;
Some(Diagnostic::new_simple(
Range::new(start_position, end_position),
message,
))
}()
})
.collect::<Vec<_>>();
self.client
.publish_diagnostics(params.uri.clone(), diagnostics, Some(params.version))
.await;
if let Some(ast) = ast {
self.ast_map.insert(params.uri.to_string(), ast);
}
// self.client
// .log_message(MessageType::INFO, &format!("{:?}", semantic_tokens))
// .await;
self.semantic_token_map
.insert(params.uri.to_string(), semantic_tokens);
}
}
#[tower_lsp::async_trait]
impl LanguageServer for Backend {
async fn initialize(&self, _: InitializeParams) -> Result<InitializeResult> {
Ok(InitializeResult {
server_info: None,
capabilities: ServerCapabilities {
text_document_sync: Some(TextDocumentSyncCapability::Kind(
TextDocumentSyncKind::FULL,
)),
completion_provider: Some(CompletionOptions {
resolve_provider: Some(false),
trigger_characters: Some(vec!["%".to_string()]),
work_done_progress_options: Default::default(),
all_commit_characters: None,
completion_item: None,
}),
semantic_tokens_provider: Some(
SemanticTokensServerCapabilities::SemanticTokensRegistrationOptions(
SemanticTokensRegistrationOptions {
text_document_registration_options: {
TextDocumentRegistrationOptions {
document_selector: Some(vec![DocumentFilter {
language: Some("nml".to_string()),
scheme: Some("file".to_string()),
pattern: None,
}]),
}
},
semantic_tokens_options: SemanticTokensOptions {
work_done_progress_options: WorkDoneProgressOptions::default(),
legend: SemanticTokensLegend {
token_types: vec![SemanticTokenType::COMMENT, SemanticTokenType::MACRO],
token_modifiers: vec![],
},
range: None, //Some(true),
full: Some(SemanticTokensFullOptions::Bool(true)),
},
static_registration_options: StaticRegistrationOptions::default(),
},
),
),
..ServerCapabilities::default()
},
})
}
async fn initialized(&self, _: InitializedParams) {
self.client
.log_message(MessageType::INFO, "server initialized!")
.await;
}
async fn shutdown(&self) -> Result<()> {
Ok(())
}
async fn did_open(&self, params: DidOpenTextDocumentParams) {
self.client
.log_message(MessageType::INFO, "file opened!")
.await;
self.on_change(TextDocumentItem {
uri: params.text_document.uri,
text: params.text_document.text,
version: params.text_document.version,
})
.await
}
async fn did_change(&self, mut params: DidChangeTextDocumentParams) {
self.on_change(TextDocumentItem {
uri: params.text_document.uri,
text: std::mem::take(&mut params.content_changes[0].text),
version: params.text_document.version,
})
.await
}
async fn completion(&self, params: CompletionParams) -> Result<Option<CompletionResponse>> {
let uri = params.text_document_position.text_document.uri;
let position = params.text_document_position.position;
let completions = || -> Option<Vec<CompletionItem>> {
let mut ret = Vec::with_capacity(0);
Some(ret)
}();
Ok(completions.map(CompletionResponse::Array))
}
async fn semantic_tokens_full(
&self,
params: SemanticTokensParams,
) -> Result<Option<SemanticTokensResult>> {
let uri = params.text_document.uri.to_string();
self.client
.log_message(MessageType::LOG, "semantic_token_full")
.await;
let semantic_tokens = || -> Option<Vec<SemanticToken>> {
let semantic_tokens = vec![
SemanticToken {
delta_line: 1,
delta_start: 2,
length: 5,
token_type: 1,
token_modifiers_bitset: 0,
}
];
Some(semantic_tokens)
}();
if let Some(semantic_token) = semantic_tokens {
return Ok(Some(SemanticTokensResult::Tokens(SemanticTokens {
result_id: None,
data: semantic_token,
})));
}
Ok(None)
}
}
#[tokio::main]
async fn main() {
let stdin = tokio::io::stdin();
let stdout = tokio::io::stdout();
let (service, socket) = LspService::new(
|client|
Backend {
client
});
Server::new(stdin, stdout, socket).serve(service).await;
}

View file

@ -1 +0,0 @@
pub mod element;

View file

@ -1,4 +0,0 @@
struct Document
{
}

View file

@ -1,30 +0,0 @@
pub trait Element
{
fn element_name(&self) -> &'static str;
fn token(&'a self) -> Token<'a>
}
pub trait ReferenceableElement : Element
{
fn reference_name(&self) -> Option<&String>;
}
pub struct Text
{
content: String,
}
impl Text
{
pub fn new<'h>(_content: &'h str) -> Text
{
Text {
content: String::from(_content)
}
}
}
impl Element for Text
{
fn element_name(&self) -> &'static str { "Text" }
}