diff --git a/Cargo.toml b/Cargo.toml index 5f72ae2..217e1bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,37 @@ [package] -name = "rust_learn" +name = "nml" version = "0.1.0" edition = "2021" +license = "GNU AGPL3" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[[bin]] +name = "nml" +path = "src/main.rs" + +[[bin]] +name = "nmlls" +path = "src/server.rs" + +[profile.profiling] +inherits = "release" +debug = true [dependencies] -crossbeam-utils = "0.8.19" -files = "2.2.3" +ariadne = "0.4.1" +dashmap = "6.0.1" +downcast-rs = "1.2.1" +getopts = "0.2.21" +lazy_static = "1.5.0" +lsp-server = "0.7.6" +lsp-types = "0.97.0" +mlua = { version = "0.9.9", features = ["lua54", "vendored"] } regex = "1.10.3" +rusqlite = "0.31.0" +rust-crypto = "0.2.36" +serde = "1.0.204" +serde_json = "1.0.120" +syntect = "5.2.0" +tokio = { version = "1.38.1", features = ["macros", "rt-multi-thread", "io-std"]} + +tower-lsp = "0.20.0" +unicode-segmentation = "1.11.0" diff --git a/src/cache/cache.rs b/src/cache/cache.rs new file mode 100644 index 0000000..3c0494a --- /dev/null +++ b/src/cache/cache.rs @@ -0,0 +1,97 @@ +use std::{error::Error, path::PathBuf}; + +use rusqlite::{types::FromSql, Connection, Params, ToSql}; + +struct Cache { + con: Connection +} + +impl Cache { + fn new(file: PathBuf) -> Result { + match Connection::open(file) + { + Err(e) => return Err(format!("Could not connect to cache database: {}", e.to_string())), + Ok(con) => Ok(Self { con }) + } + } +} + +pub enum CachedError +{ + SqlErr(rusqlite::Error), + GenErr(E) +} + +pub trait Cached +{ + type Key; + type Value; + + /// SQL Query to create the cache table + /// Note: You must use `IF NOT EXIST` + fn sql_table() -> &'static str; + + /// SQL Get query + fn sql_get_query() -> &'static str; + + /// SQL insert query + fn sql_insert_query() -> &'static str; + + fn key(&self) -> ::Key; + + fn init(con: &mut Connection) -> Result<(), rusqlite::Error> + { + con.execute(::sql_table(), ()) + .map(|_| ()) + } + + fn cached(&self, con: &mut Connection, f: F) + -> Result<::Value, CachedError> + where + ::Key: ToSql, + ::Value: FromSql + ToSql, + F: FnOnce(&Self) -> Result<::Value, E>, + { + let key = self.key(); + + // Find in cache + let mut query = match con.prepare(::sql_get_query()) + { + Ok(query) => query, + Err(e) => return Err(CachedError::SqlErr(e)) + }; + + let value = query.query_row([&key], |row| + { + Ok(row.get_unwrap::<_, ::Value>(0)) + }).ok(); + + if let Some(value) = value + { + // Found in cache + return Ok(value) + } + else + { + // Compute a value + let value = match f(&self) + { + Ok(val) => val, + Err(e) => return Err(CachedError::GenErr(e)) + }; + + // Try to insert + let mut query = match con.prepare(::sql_insert_query()) + { + Ok(query) => query, + Err(e) => return Err(CachedError::SqlErr(e)) + }; + + match query.execute((&key, &value)) + { + Ok(_) => Ok(value), + Err(e) => Err(CachedError::SqlErr(e)) + } + } + } +} diff --git a/src/cache/mod.rs b/src/cache/mod.rs new file mode 100644 index 0000000..a5c08fd --- /dev/null +++ b/src/cache/mod.rs @@ -0,0 +1 @@ +pub mod cache; diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs new file mode 100644 index 0000000..5e1768e --- /dev/null +++ b/src/compiler/compiler.rs @@ -0,0 +1,153 @@ +use std::{cell::{RefCell, RefMut}, rc::Rc}; + + +use rusqlite::Connection; + +use crate::document::{document::Document, variable::Variable}; + +#[derive(Clone, Copy)] +pub enum Target +{ + HTML, + LATEX, +} + +pub struct Compiler +{ + target: Target, + cache: Option>, +} + +impl Compiler +{ + pub fn new(target: Target, db_path: Option) -> Self { + let cache = match db_path + { + None => None, + Some(path) => { + match Connection::open(path) + { + Err(e) => panic!("Cannot connect to database: {e}"), + Ok(con) => Some(con), + } + } + }; + Self { + target, + cache: cache.map(|con| RefCell::new(con)), + } + } + + pub fn target(&self) -> Target + { + self.target + } + + pub fn cache(&self) -> Option> + { + self.cache + .as_ref() + .map(RefCell::borrow_mut) + } + + pub fn sanitize>(&self, str: S) -> String { + match self.target + { + Target::HTML => str.as_ref() + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """), + _ => todo!("Sanitize not implemented") + } + } + + pub fn header(&self, document: &Document) -> String + { + pub fn get_variable_or_error(document: &Document, var_name: &'static str) -> Option> + { + document.get_variable(var_name) + .and_then(|(_, var)| Some(var)) + .or_else(|| { + println!("Missing variable `{var_name}` in {}", document.source().name()); + None + }) + } + + let mut result = String::new(); + match self.target() + { + Target::HTML => { + result += ""; + result += ""; + if let Some(page_title) = get_variable_or_error(document, "html.page_title") + { + result += format!("{}", self.sanitize(page_title.to_string())).as_str(); + } + + if let Some((_, css)) = document.get_variable("html.css") + { + result += format!("", self.sanitize(css.to_string())).as_str(); + } + result += ""; + + // TODO: TOC + // TODO: Author, Date, Title, Div + }, + Target::LATEX => { + + }, + } + result + } + + pub fn footer(&self, _document: &Document) -> String + { + let mut result = String::new(); + match self.target() + { + Target::HTML => { + result += ""; + }, + Target::LATEX => { + + }, + } + result + } + + pub fn compile(&self, document: &Document) -> String + { + let mut out = String::new(); + let borrow = document.content.borrow(); + + // Header + out += self.header(document).as_str(); + + // Body + for i in 0 .. borrow.len() + { + let elem = &borrow[i]; + //let prev = match i + //{ + // 0 => None, + // _ => borrow.get(i-1), + //}; + //let next = borrow.get(i+1); + + match elem.compile(self, document) + { + Ok(result) => { + //println!("Elem: {}\nCompiled to: {result}", elem.to_string()); + out.push_str(result.as_str()) + }, + Err(err) => println!("Unable to compile element: {err}\n{}", elem.to_string()) + } + } + + // Footer + out += self.footer(document).as_str(); + + out + } +} diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs new file mode 100644 index 0000000..59d8df7 --- /dev/null +++ b/src/compiler/mod.rs @@ -0,0 +1 @@ +pub mod compiler; diff --git a/src/document/document.rs b/src/document/document.rs new file mode 100644 index 0000000..5abf275 --- /dev/null +++ b/src/document/document.rs @@ -0,0 +1,210 @@ +use std::cell::{Ref, RefCell, RefMut}; +use std::collections::hash_map::HashMap; +use std::rc::Rc; + +use crate::parser::source::Source; + +use super::element::Element; +use super::variable::Variable; + + +#[derive(Debug)] +pub struct Scope { + /// List of all referenceable elements in current scope. + /// All elements in this should return a non empty + pub referenceable: HashMap, + pub variables: HashMap>, +} + +impl Scope { + fn new() -> Self { + Self { + referenceable: HashMap::new(), + variables: HashMap::new(), + } + } + + pub fn merge(&mut self, other: &mut Scope, merge_as: &String, ref_offset: usize) + { + match merge_as.is_empty() + { + true => { + // References + self.referenceable.extend(other.referenceable.drain() + .map(|(name, idx)| + (name, idx+ref_offset))); + + // Variables + self.variables.extend(other.variables.drain() + .map(|(name, var)| + (name, var))); + }, + false => { + // References + self.referenceable.extend(other.referenceable.drain() + .map(|(name, idx)| + (format!("{merge_as}.{name}"), idx+ref_offset))); + + // Variables + self.variables.extend(other.variables.drain() + .map(|(name, var)| + (format!("{merge_as}.{name}"), var))); + } + } + } +} + +#[derive(Debug)] +pub struct Document<'a> { + source: Rc, + parent: Option<&'a Document<'a>>, /// Document's parent + + // FIXME: Render these fields private + pub content: RefCell>>, + pub scope: RefCell, +} + +impl<'a> Document<'a> { + pub fn new(source: Rc, parent: Option<&'a Document<'a>>) -> Self + { + Self { + source: source, + parent: parent, + content: RefCell::new(Vec::new()), + scope: RefCell::new(Scope::new()), + } + } + + pub fn source(&self) -> Rc { self.source.clone() } + + pub fn parent(&self) -> Option<&Document> { self.parent } + + /// Push an element [`elem`] to content. [`in_paragraph`] is true if a paragraph is active + pub fn push(&self, elem: Box) + { + // Add index of current element to scope's reference table + if let Some(referenceable) = elem.as_referenceable() + { + // Only add if referenceable holds a reference + if let Some(ref_name) = referenceable.reference_name() + { + self.scope.borrow_mut().referenceable.insert(ref_name.clone(), self.content.borrow().len()); + } + } + + self.content.borrow_mut().push(elem); + } + + pub fn last_element(&self, recurse: bool) -> Option> + { + let elem = Ref::filter_map(self.content.borrow(), + |content| content.last() + .and_then(|last| last.downcast_ref::())).ok(); + + if elem.is_some() || !recurse { return elem } + + match self.parent + { + None => None, + Some(parent) => parent.last_element(true), + } + } + + pub fn last_element_mut(&self, recurse: bool) -> Option> + { + let elem = RefMut::filter_map(self.content.borrow_mut(), + |content| content.last_mut() + .and_then(|last| last.downcast_mut::())).ok(); + + if elem.is_some() || !recurse { return elem } + + match self.parent + { + None => None, + Some(parent) => parent.last_element_mut(true), + } + } + + pub fn get_reference(&self, ref_name: &str) -> Option<(&Document<'a>, std::cell::Ref<'_, Box>)> { + match self.scope.borrow().referenceable.get(ref_name) { + // Return if found + Some(elem) => { + return Some((&self, + std::cell::Ref::map(self.content.borrow(), + |m| &m[*elem]))) + }, + + // Continue search recursively + None => match self.parent { + Some(parent) => return parent.get_reference(ref_name), + + // Not found + None => return None, + } + } + } + + pub fn add_variable(&self, variable: Rc) + { + self.scope.borrow_mut().variables.insert( + variable.name().to_string(), + variable); + } + + pub fn get_variable>(&self, name: S) -> Option<(&Document<'a>, Rc)> + { + match self.scope.borrow().variables.get(name.as_ref()) + { + Some(variable) => { + return Some((&self, variable.clone())); + }, + + // Continue search recursively + None => match self.parent { + Some(parent) => return parent.get_variable(name), + + // Not found + None => return None, + } + } + } + + pub fn remove_variable>(&self, name: S) -> Option<(&Document<'a>, Rc)> + { + match self.scope.borrow_mut().variables.remove(name.as_ref()) + { + Some(variable) => { + return Some((&self, variable.clone())); + }, + + // Continue search recursively + None => match self.parent { + Some(parent) => return parent.remove_variable(name), + + // Not found + None => return None, + } + } + } + + /// Merges [`other`] into [`self`] + pub fn merge(&self, other: Document, merge_as: Option<&String>) + { + match merge_as + { + Some(merge_as) => self.scope.borrow_mut() + .merge( + &mut *other.scope.borrow_mut(), + merge_as, + self.content.borrow().len()+1), + _ => {}, + } + + // Content + self.content.borrow_mut().extend((other.content.borrow_mut()) + .drain(..) + .map(|value| value)); + } +} + + diff --git a/src/document/element.rs b/src/document/element.rs new file mode 100644 index 0000000..7befd80 --- /dev/null +++ b/src/document/element.rs @@ -0,0 +1,95 @@ +use std::str::FromStr; + +use downcast_rs::{impl_downcast, Downcast}; +use crate::{compiler::compiler::Compiler, parser::source::Token}; + +use super::document::Document; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum ElemKind { + /// An invisible element (i.e comment) + Invisible, + /// Special elements don't trigger special formatting events + Special, + /// Inline elements don't break paragraphing + Inline, + /// Block elements are outside of paragraphs + Block, +} + +impl FromStr for ElemKind { + type Err = String; + + fn from_str(s: &str) -> Result { + match s + { + "invisible" => Ok(ElemKind::Invisible), + "special" => Ok(ElemKind::Special), + "inline" => Ok(ElemKind::Inline), + "block" => Ok(ElemKind::Block), + _ => Err(format!("Unknown ElemKind: {s}")) + } + } +} + +pub trait Element: Downcast +{ + /// Gets the element defined location i.e token without filename + fn location(&self) -> &Token; + + fn kind(&self) -> ElemKind; + + /// Get the element's name + fn element_name(&self) -> &'static str; + + /// Outputs element to string for debug purposes + fn to_string(&self) -> String; + + fn as_referenceable(&self) -> Option<&dyn ReferenceableElement> { None } + + /// Compiles element + fn compile(&self, compiler: &Compiler, document: &Document) -> Result; +} +impl_downcast!(Element); + +impl core::fmt::Debug for dyn Element +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.to_string()) + } +} + +pub trait ReferenceableElement : Element { + /// Reference name + fn reference_name(&self) -> Option<&String>; +} + +#[derive(Debug)] +pub struct Text +{ + location: Token, + content: String, +} + +impl Text +{ + pub fn new(location: Token, content: String) -> Text + { + Text { + location: location, + content: content + } + } +} + +impl Element for Text +{ + fn location(&self) -> &Token { &self.location } + fn kind(&self) -> ElemKind { ElemKind::Inline } + fn element_name(&self) -> &'static str { "Text" } + fn to_string(&self) -> String { format!("{self:#?}") } + + fn compile(&self, compiler: &Compiler, _document: &Document) -> Result { + Ok(compiler.sanitize(self.content.as_str())) + } +} diff --git a/src/document/mod.rs b/src/document/mod.rs new file mode 100644 index 0000000..387c3c9 --- /dev/null +++ b/src/document/mod.rs @@ -0,0 +1,3 @@ +pub mod document; +pub mod element; +pub mod variable; diff --git a/src/document/variable.rs b/src/document/variable.rs new file mode 100644 index 0000000..d2eb70c --- /dev/null +++ b/src/document/variable.rs @@ -0,0 +1,146 @@ +use std::{path::PathBuf, rc::Rc}; +use crate::parser::{parser::Parser, source::{Source, Token, VirtualSource}}; +use super::{document::Document, element::Text}; + + +// TODO enforce to_string(from_string(to_string())) == to_string() +pub trait Variable +{ + fn location(&self) -> &Token; + + fn name(&self) -> &str; + /// Parse variable from string, returns an error message on failure + fn from_string(&mut self, str: &str) -> Option; + + /// Converts variable to a string + fn to_string(&self) -> String; + + fn parse<'a>(&self, location: Token, parser: &dyn Parser, document: &'a Document); +} + +impl core::fmt::Debug for dyn Variable +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}{{{}}}", self.name(), self.to_string()) + } +} + +#[derive(Debug)] +pub struct BaseVariable +{ + location: Token, + name: String, + value: String, +} + +impl BaseVariable { + pub fn new(location: Token, name: String, value: String) -> Self { + Self { location, name, value } + } +} + +impl Variable for BaseVariable +{ + fn location(&self) -> &Token { &self.location } + + fn name(&self) -> &str { self.name.as_str() } + + fn from_string(&mut self, str: &str) -> Option { + self.value = str.to_string(); + None + } + + fn to_string(&self) -> String { self.value.clone() } + + fn parse<'a>(&self, _location: Token, parser: &dyn Parser, document: &'a Document) { + let source = Rc::new(VirtualSource::new( + self.location().clone(), + self.name().to_string(), + self.to_string())); + + parser.parse_into(source, document); + } +} + +#[derive(Debug)] +pub struct PathVariable +{ + location: Token, + name: String, + path: PathBuf, +} + +impl PathVariable +{ + pub fn new(location: Token, name: String, path: PathBuf) -> Self { + Self { location, name, path } + } +} + +impl Variable for PathVariable +{ + fn location(&self) -> &Token { &self.location } + + fn name(&self) -> &str { self.name.as_str() } + + fn from_string(&mut self, str: &str) -> Option { + self.path = PathBuf::from(std::fs::canonicalize(str).unwrap()); + None + } + + fn to_string(&self) -> String { self.path.to_str().unwrap().to_string() } + + fn parse<'a>(&self, location: Token, parser: &dyn Parser, document: &'a Document){ + // TODO: Avoid copying the location twice... + // Maybe create a special VirtualSource where the `content()` method + // calls `Variable::to_string()` + let source = Rc::new(VirtualSource::new( + location.clone(), + self.name().to_string(), + self.to_string())); + + parser.push(document, Box::new(Text::new( + Token::new(0..source.content().len(), source), + self.to_string() + ))); + } +} + +/* +struct ConfigVariable +{ + value: T, + name: String, + + desc: String, + validator: Box Option<&String>>, +} + +impl ConfigVariable +{ + fn description(&self) -> &String { &self.desc } +} + +impl Variable for ConfigVariable +where T: FromStr + Display +{ + fn name(&self) -> &str { self.name.as_str() } + + /// Parse variable from string, returns an error message on failure + fn from_string(&mut self, str: &str) -> Option { + match str.parse::() + { + Ok(value) => { + (self.validator)(self, &value).or_else(|| { + self.value = value; + None + }) + }, + Err(_) => return Some(format!("Unable to parse `{str}` into variable `{}`", self.name)) + } + } + + /// Converts variable to a string + fn to_string(&self) -> String { self.value.to_string() } +} +*/ diff --git a/src/elements/code.rs b/src/elements/code.rs new file mode 100644 index 0000000..a3ad380 --- /dev/null +++ b/src/elements/code.rs @@ -0,0 +1,390 @@ +use std::{collections::HashMap, ops::Range, rc::Rc, sync::Once}; + +use ariadne::{Fmt, Label, Report, ReportKind}; +use crypto::{digest::Digest, sha2::Sha512}; +use regex::{Captures, Regex}; +use syntect::{easy::HighlightLines, highlighting::ThemeSet, parsing::SyntaxSet}; + +use crate::{cache::cache::{Cached, CachedError}, compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util::{self, Property, PropertyParser}}}; +use lazy_static::lazy_static; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum CodeKind +{ + FullBlock, + MiniBlock, + Inline, +} + +#[derive(Debug)] +struct Code +{ + location: Token, + block: CodeKind, + language: String, + name: Option, + code: String, + theme: Option, + line_offset: usize, +} + +impl Code { + fn new(location: Token, block: CodeKind, language: String, name: Option, code: String, theme: Option, line_offset: usize) -> Self { + Self { location, block, language, name, code, theme, line_offset } + } + + fn highlight_html(&self, compiler: &Compiler) -> Result + { + lazy_static! { + static ref syntax_set : SyntaxSet = SyntaxSet::load_defaults_newlines(); + static ref theme_set : ThemeSet = ThemeSet::load_defaults(); + } + let syntax = match syntax_set.find_syntax_by_name(self.language.as_str()) + { + Some(syntax) => syntax, + None => return Err(format!("Unable to find syntax for language: {}", self.language)) + }; + + let theme_string = match self.theme.as_ref() + { + Some(theme) => theme.as_str(), + None => "base16-ocean.dark", + }; + let mut h = HighlightLines::new(syntax, &theme_set.themes[theme_string]); + + let mut result = String::new(); + if self.block == CodeKind::FullBlock + { + result += "
"; + if let Some(name) = &self.name + { + result += format!("
{}
", + compiler.sanitize(name.as_str())).as_str(); + } + + result += format!("
").as_str(); + for (line_id, line) in self.code.split(|c| c == '\n').enumerate() + { + result += ""; + } + + result += "
"; + + // Line number + result += format!("
{}
", line_id+self.line_offset).as_str(); + + // Code + result += "
";
+				match h.highlight_line(line, &syntax_set)
+				{
+					Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e.to_string())),
+					Ok(regions) => {
+						match syntect::html::styled_line_to_highlighted_html(®ions[..], syntect::html::IncludeBackground::No)
+						{
+							Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())),
+							Ok(highlighted) => result += if highlighted.is_empty() { "
" } else { highlighted.as_str() } + } + } + } + result += "
"; + } + else if self.block == CodeKind::MiniBlock + { + result += "
"; + + for line in self.code.split(|c| c == '\n') + { + result += ""; + } + result += "
";
+				// Code
+				match h.highlight_line(line, &syntax_set)
+				{
+					Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e.to_string())),
+					Ok(regions) => {
+						match syntect::html::styled_line_to_highlighted_html(®ions[..], syntect::html::IncludeBackground::No)
+						{
+							Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())),
+							Ok(highlighted) => result += if highlighted.is_empty() { "
" } else { highlighted.as_str() } + } + } + } + result += "
"; + } + else if self.block == CodeKind::Inline + { + result += ""; + match h.highlight_line(self.code.as_str(), &syntax_set) + { + Err(e) => return Err(format!("Error highlighting line `{}`: {}", self.code, e.to_string())), + Ok(regions) => { + match syntect::html::styled_line_to_highlighted_html(®ions[..], syntect::html::IncludeBackground::No) + { + Err(e) => return Err(format!("Error highlighting code: {}", e.to_string())), + Ok(highlighted) => result += highlighted.as_str() + } + } + } + result += ""; + } + + Ok(result) + } +} + +impl Cached for Code +{ + type Key = String; + type Value = String; + + fn sql_table() -> &'static str { + "CREATE TABLE IF NOT EXISTS cached_code ( + digest TEXT PRIMARY KEY, + highlighted BLOB NOT NULL);" + } + + fn sql_get_query() -> &'static str { + "SELECT highlighted FROM cached_code WHERE digest = (?1)" + } + + fn sql_insert_query() -> &'static str { + "INSERT INTO cached_code (digest, highlighted) VALUES (?1, ?2)" + } + + fn key(&self) -> ::Key { + let mut hasher = Sha512::new(); + hasher.input((self.block as usize).to_be_bytes().as_slice()); + hasher.input((self.line_offset as usize).to_be_bytes().as_slice()); + self.theme.as_ref().map(|theme| hasher.input(theme.as_bytes())); + self.name.as_ref().map(|name| hasher.input(name.as_bytes())); + hasher.input(self.language.as_bytes()); + hasher.input(self.code.as_bytes()); + + hasher.result_str() + } +} + +impl Element for Code { + fn location(&self) -> &Token { &self.location } + + fn kind(&self) -> ElemKind { if self.block == CodeKind::Inline { ElemKind::Inline } else { ElemKind::Block } } + + fn element_name(&self) -> &'static str { "Code Block" } + + fn to_string(&self) -> String { format!("{self:#?}") } + + fn compile(&self, compiler: &Compiler, _document: &Document) + -> Result { + + match compiler.target() + { + Target::HTML => { + static CACHE_INIT : Once = Once::new(); + CACHE_INIT.call_once(|| if let Some(mut con) = compiler.cache() { + if let Err(e) = Code::init(&mut con) + { + eprintln!("Unable to create cache table: {e}"); + } + }); + + if let Some(mut con) = compiler.cache() + { + match self.cached(&mut con, |s| s.highlight_html(compiler)) + { + Ok(s) => Ok(s), + Err(e) => match e + { + CachedError::SqlErr(e) => Err(format!("Querying the cache failed: {e}")), + CachedError::GenErr(e) => Err(e) + } + } + } + else + { + self.highlight_html(compiler) + } + } + Target::LATEX => { todo!("") } + } + } +} + +pub struct CodeRule { + re: [Regex; 2], + properties: PropertyParser, +} + +impl CodeRule { + pub fn new() -> Self { + let mut props = HashMap::new(); + props.insert("line_offset".to_string(), + Property::new( + true, + "Line number offset".to_string(), + Some("1".to_string()))); + Self { + re: [ + Regex::new(r"(?:^|\n)```(?:\[((?:\\.|[^\\\\])*?)\])?(.*?)(?:,(.*))?\n((?:\\(?:.|\n)|[^\\\\])*?)```").unwrap(), + Regex::new(r"``(?:\[((?:\\.|[^\[\]\\])*?)\])?(?:(.*)(?:\n|,))?((?:\\(?:.|\n)|[^\\\\])*?)``").unwrap(), + ], + properties: PropertyParser::new(props) + } + } +} + +impl RegexRule for CodeRule +{ + fn name(&self) -> &'static str { "Code" } + + fn regexes(&self) -> &[regex::Regex] { &self.re } + + fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures) + -> Vec, Range)>> { + let mut reports = vec![]; + + let properties = match matches.get(1) + { + None => match self.properties.default() { + Ok(properties) => properties, + Err(e) => { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Invalid code") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message(format!("Code is missing properties: {e}")) + .with_color(parser.colors().error)) + .finish()); + return reports; + }, + } + Some(props) => { + let processed = util::process_escaped('\\', "]", + props.as_str().trim_start().trim_end()); + match self.properties.parse(processed.as_str()) + { + Err(e) => { + reports.push( + Report::build(ReportKind::Error, token.source(), props.start()) + .with_message("Invalid Code Properties") + .with_label( + Label::new((token.source().clone(), props.range())) + .with_message(e) + .with_color(parser.colors().error)) + .finish()); + return reports; + } + Ok(properties) => properties + } + } + }; + + let code_lang = match matches.get(2) + { + None => "Plain Text".to_string(), + Some(lang) => { + let code_lang = lang.as_str().trim_end().trim_start().to_string(); + if code_lang.is_empty() + { + reports.push( + Report::build(ReportKind::Error, token.source(), lang.start()) + .with_message("Missing code language") + .with_label( + Label::new((token.source().clone(), lang.range())) + .with_message("No language specified") + .with_color(parser.colors().error)) + .finish()); + + return reports; + } + + // TODO: validate language + + code_lang + } + }; + + let mut code_content = if index == 0 + { util::process_escaped('\\',"```", matches.get(4).unwrap().as_str()) } + else + { util::process_escaped('\\',"``", matches.get(3).unwrap().as_str()) }; + if code_content.bytes().last() == Some('\n' as u8) // Remove newline + { + code_content.pop(); + } + + if code_content.is_empty() + { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Missing code content") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message("Code content cannot be empty") + .with_color(parser.colors().error)) + .finish()); + return reports; + } + + let theme = document.get_variable("code.theme") + .and_then(|(_doc, var)| Some(var.to_string())); + + if index == 0 // Block + { + let code_name = matches.get(3) + .and_then(|name| { + let code_name = name.as_str().trim_end().trim_start().to_string(); + (!code_name.is_empty()).then_some(code_name) + }); + let line_offset = match properties.get("line_offset", + |prop, value| value.parse::().map_err(|e| (prop, e))) + { + Ok((_prop, offset)) => offset, + Err((prop, e)) => { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Invalid Code Property") + .with_label( + Label::new((token.source().clone(), token.start()+1..token.end())) + .with_message(format!("Property `line_offset: {}` cannot be converted: {}", + prop.fg(parser.colors().info), + e.fg(parser.colors().error))) + .with_color(parser.colors().warning)) + .finish()); + return reports; + } + }; + + parser.push(document, Box::new( + Code::new( + token.clone(), + CodeKind::FullBlock, + code_lang, + code_name, + code_content, + theme, + line_offset + ) + )); + } + else // Maybe inline + { + let block = if code_content.contains('\n') { CodeKind::MiniBlock } + else { CodeKind::Inline }; + + parser.push(document, Box::new( + Code::new( + token.clone(), + block, + code_lang, + None, + code_content, + theme, + 1, + ) + )); + } + + reports + } +} diff --git a/src/elements/comment.rs b/src/elements/comment.rs new file mode 100644 index 0000000..15836f7 --- /dev/null +++ b/src/elements/comment.rs @@ -0,0 +1,81 @@ +use regex::{Captures, Regex}; +use crate::parser::{parser::Parser, rule::RegexRule, source::{Source, Token}}; +use ariadne::{Report, Label, ReportKind}; +use crate::{compiler::compiler::Compiler, document::{document::Document, element::{ElemKind, Element}}}; +use std::{ops::Range, rc::Rc}; + +#[derive(Debug)] +pub struct Comment { + location: Token, + content: String, +} + +impl Comment +{ + pub fn new(location: Token, content: String ) -> Self { + Self { location: location, content } + } +} + +impl Element for Comment +{ + fn location(&self) -> &Token { &self.location } + fn kind(&self) -> ElemKind { ElemKind::Invisible } + fn element_name(&self) -> &'static str { "Comment" } + fn to_string(&self) -> String { format!("{self:#?}") } + fn compile(&self, _compiler: &Compiler, _document: &Document) + -> Result { + Ok("".to_string()) + } +} + +pub struct CommentRule { + re: [Regex; 1], +} + +impl CommentRule { + pub fn new() -> Self { + Self { re: [Regex::new(r"\s*::(.*)").unwrap()] } + } +} + +impl RegexRule for CommentRule { + fn name(&self) -> &'static str { "Comment" } + + fn regexes(&self) -> &[Regex] { &self.re } + + fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures) + -> Vec, Range)>> { + let mut reports = vec![]; + + let content = match matches.get(1) + { + None => panic!("Unknown error"), + Some(comment) => { + let trimmed = comment.as_str().trim_start().trim_end().to_string(); + if trimmed.is_empty() + { + reports.push( + Report::build(ReportKind::Warning, token.source(), comment.start()) + .with_message("Empty comment") + .with_label( + Label::new((token.source(), comment.range())) + .with_message("Comment is empty") + .with_color(parser.colors().warning)) + .finish()); + } + + trimmed + } + }; + + parser.push(document, Box::new( + Comment::new( + token.clone(), + content + ) + )); + + return reports; + } +} diff --git a/src/elements/import.rs b/src/elements/import.rs new file mode 100644 index 0000000..7db46f4 --- /dev/null +++ b/src/elements/import.rs @@ -0,0 +1,155 @@ +use regex::Regex; +use crate::parser::{parser::{Parser, ReportColors}, rule::RegexRule, source::{Source, SourceFile, Token}}; +use ariadne::{Report, Fmt, Label, ReportKind}; +use crate::document::document::Document; +use std::{ops::Range, rc::Rc}; + +use super::paragraph::Paragraph; + +pub struct ImportRule { + re: [Regex; 1], +} + +impl ImportRule { + pub fn new() -> Self { + Self { + re: [Regex::new(r"(?:^|\n)@import(?:\[(.*)\])?[^\S\r\n]+(.*)").unwrap()], + } + } + + pub fn validate_name(_colors: &ReportColors, name: &str) -> Result + { + Ok(name.to_string()) + } + + pub fn validate_as(_colors: &ReportColors, as_name: &str) -> Result + { + // TODO: Use variable name validation rules + Ok(as_name.to_string()) + } +} + +impl RegexRule for ImportRule { + fn name(&self) -> &'static str { "Import" } + + fn regexes(&self) -> &[Regex] { &self.re } + + fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec, Range)>> + { + let mut result = vec![]; + + // Path + let import_file = match matches.get(2) + { + Some(name) => { + match ImportRule::validate_name(parser.colors(), name.as_str()) + { + Err(msg) => { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Invalid name for import") + .with_label( + Label::new((token.source(), name.range())) + .with_message(format!("Import name `{}` is invalid. {msg}", + name.as_str().fg(parser.colors().highlight))) + .with_color(parser.colors().error)) + .finish()); + + return result; + }, + Ok(filename) => { + let meta = match std::fs::metadata(filename.as_str()) + { + Err(_) => { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Invalid import path") + .with_label( + Label::new((token.source(), name.range())) + .with_message(format!("Unable to access file `{}`", + filename.fg(parser.colors().highlight))) + .with_color(parser.colors().error)) + .finish()); + return result; + }, + Ok(meta) => meta + }; + + if !meta.is_file() + { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Invalid import path") + .with_label( + Label::new((token.source(), name.range())) + .with_message(format!("Path `{}` is not a file!", + filename.fg(parser.colors().highlight))) + .with_color(parser.colors().error)) + .finish()); + return result; + } + + filename + }, + } + } + _ => panic!("Invalid name for import") + }; + + // [Optional] import as + let import_as = match matches.get(1) + { + Some(as_name) => { + match ImportRule::validate_as(parser.colors(), as_name.as_str()) + { + Ok(as_name) => as_name, + Err(msg) => { + result.push( + Report::build(ReportKind::Error, token.source(), as_name.start()) + .with_message("Invalid name for import as") + .with_label( + Label::new((token.source(), as_name.range())) + .with_message(format!("Canot import `{import_file}` as `{}`. {msg}", + as_name.as_str().fg(parser.colors().highlight))) + .with_color(parser.colors().error)) + .finish()); + + return result; + }, + } + } + _ => "".to_string() + }; + + let import = match SourceFile::new(import_file, Some(token.clone())) + { + Ok(import) => Rc::new(import), + Err(path) => { + result.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Unable to read file content") + .with_label( + Label::new((token.source(), token.range)) + .with_message(format!("Failed to read content from path `{path}`")) + .with_color(parser.colors().error)) + .finish()); + return result; + } + }; + + // TODO + let import_doc = parser.parse(import, Some(&document)); + + document.merge(import_doc, Some(&import_as)); + + // Close paragraph + if document.last_element::(false).is_some() + { + parser.push(document, Box::new(Paragraph::new( + Token::new(token.end()..token.end(), token.source()) + ))); + } + + return result; + } +} diff --git a/src/elements/link.rs b/src/elements/link.rs new file mode 100644 index 0000000..634dbab --- /dev/null +++ b/src/elements/link.rs @@ -0,0 +1,149 @@ +use regex::Regex; +use crate::parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util}; +use ariadne::{Report, Fmt, Label, ReportKind}; +use crate::{compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}}; +use std::{ops::Range, rc::Rc}; + +#[derive(Debug)] +pub struct Link { + location: Token, + name: String, // Link name + url: String, // Link url +} + +impl Link +{ + pub fn new(location: Token, name: String, url: String) -> Self { + Self { location: location, name, url } + } +} + +impl Element for Link +{ + fn location(&self) -> &Token { &self.location } + fn kind(&self) -> ElemKind { ElemKind::Inline } + fn element_name(&self) -> &'static str { "Link" } + fn to_string(&self) -> String { format!("{self:#?}") } + fn compile(&self, compiler: &Compiler, _document: &Document) -> Result { + match compiler.target() + { + Target::HTML => { + Ok(format!("{}", + compiler.sanitize(self.url.as_str()), + compiler.sanitize(self.name.as_str()), + )) + }, + Target::LATEX => { + Ok(format!("\\href{{{}}}{{{}}}", + compiler.sanitize(self.url.as_str()), + compiler.sanitize(self.name.as_str()), + )) + }, + } + } +} + +pub struct LinkRule { + re: [Regex; 1], +} + +impl LinkRule { + pub fn new() -> Self { + Self { re: [Regex::new(r"(?:^|\n)```(.*?)(?:,(.*))?\n((?:\\.|[^\[\]\\])*?)```").unwrap()] } + } +} + +impl RegexRule for LinkRule { + fn name(&self) -> &'static str { "Link" } + + fn regexes(&self) -> &[Regex] { &self.re } + + fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec, Range)>> + { + let mut result = vec![]; + let link_name = match matches.get(1) + { + Some(name) => { + if name.as_str().is_empty() + { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Empty link name") + .with_label( + Label::new((token.source().clone(), name.range())) + .with_message("Link name is empty") + .with_color(parser.colors().error)) + .finish()); + return result; + } + // TODO: process into separate document... + let text_content = util::process_text(document, name.as_str()); + + if text_content.as_str().is_empty() + { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Empty link name") + .with_label( + Label::new((token.source(), name.range())) + .with_message(format!("Link name is empty. Once processed, `{}` yields `{}`", + name.as_str().fg(parser.colors().highlight), + text_content.as_str().fg(parser.colors().highlight), + )) + .with_color(parser.colors().error)) + .finish()); + return result; + } + text_content + }, + _ => panic!("Empty link name"), + }; + + let link_url = match matches.get(2) + { + Some(url) => { + if url.as_str().is_empty() + { + result.push( + Report::build(ReportKind::Error, token.source(), url.start()) + .with_message("Empty link url") + .with_label( + Label::new((token.source(), url.range())) + .with_message("Link url is empty") + .with_color(parser.colors().error)) + .finish()); + return result; + } + let text_content = util::process_text(document, url.as_str()); + + if text_content.as_str().is_empty() + { + result.push( + Report::build(ReportKind::Error, token.source(), url.start()) + .with_message("Empty link url") + .with_label( + Label::new((token.source(), url.range())) + .with_message(format!("Link url is empty. Once processed, `{}` yields `{}`", + url.as_str().fg(parser.colors().highlight), + text_content.as_str().fg(parser.colors().highlight), + )) + .with_color(parser.colors().error)) + .finish()); + return result; + } + text_content + }, + _ => panic!("Empty link url"), + }; + + parser.push(document, Box::new( + Link::new( + token.clone(), + link_name, + link_url + ) + )); + + return result; + } +} diff --git a/src/elements/list.rs b/src/elements/list.rs new file mode 100644 index 0000000..0b44ddd --- /dev/null +++ b/src/elements/list.rs @@ -0,0 +1,335 @@ +use std::{any::Any, cell::Ref, ops::Range, rc::Rc}; + +use crate::{compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::Rule, source::{Cursor, Source, Token, VirtualSource}}}; +use ariadne::{Label, Report, ReportKind}; +use regex::Regex; + +use super::paragraph::Paragraph; + +#[derive(Debug)] +pub struct ListEntry { + location: Token, + numbering: Vec<(bool, usize)>, + content: Vec>, + + // TODO bullet_maker : FnMut<...> +} + +impl ListEntry { + pub fn new(location: Token, numbering: Vec<(bool, usize)>, content: Vec>) -> Self { + Self { location, numbering, content } + } +} + +#[derive(Debug)] +pub struct List +{ + location: Token, + entries: Vec +} + +impl List +{ + pub fn new(location: Token) -> Self + { + Self + { + location, + entries: Vec::new() + } + } + + pub fn push(&mut self, entry: ListEntry) + { + self.location.range = self.location.start()..entry.location.end(); + self.entries.push(entry); + } +} + +impl Element for List +{ + fn location(&self) -> &Token { &self.location } + + fn kind(&self) -> ElemKind { ElemKind::Block } + + fn element_name(&self) -> &'static str { "List" } + + fn to_string(&self) -> String { format!("{self:#?}") } + + fn compile(&self, compiler: &Compiler, document: &Document) -> Result { + match compiler.target() + { + Target::HTML => { + let mut result = String::new(); + + //TODO: Do something about indexing + let mut current_list: Vec = vec![]; + let mut match_stack = |result: &mut String, target: &Vec<(bool, usize)>| { + + // Find index after which current_list and target differ + let mut match_idx = 0usize; + for i in 0..current_list.len() + { + if i >= target.len() || current_list[i] != target[i].0 { break } + else { match_idx = i+1; } + } + + // Close until same match + for _ in match_idx..current_list.len() + { + result.push_str(["", ""][current_list.pop().unwrap() as usize]); + } + + // Open + for i in match_idx..target.len() + { + result.push_str(["
    ", "
      "][target[i].0 as usize]); + current_list.push(target[i].0); + } + }; + + match self.entries.iter() + .try_for_each(|ent| + { + match_stack(&mut result, &ent.numbering); + result.push_str("
    1. "); + match ent.content.iter().enumerate() + .try_for_each(|(idx, elem)| { + match elem.compile(compiler, document) { + Err(e) => Err(e), + Ok(s) => { result.push_str(s.as_str()); Ok(()) } + } + }) + { + Err(e) => Err(e), + _ => { + result.push_str("
    2. "); + Ok(()) + } + } + }) + { + Err(e) => return Err(e), + _ => {} + } + match_stack(&mut result, &Vec::<(bool, usize)>::new()); + + Ok(result) + } + Target::LATEX => Err("Unimplemented compiler".to_string()) + } + } +} + +/* +impl Element for ListEntry +{ + fn location(&self) -> &Token { &self.location } + fn kind(&self) -> ElemKind { ElemKind::Inline } + fn element_name(&self) -> &'static str { "List" } + fn to_string(&self) -> String { format!("{self:#?}") } + fn compile(&self, compiler: &Compiler) -> Result { + lazy_static! { + static ref STATE_NAME : &'static str = "list.state"; + static ref LIST_OPEN : [&'static str; 2] = ["
        ", "
          "]; + static ref LIST_CLOSE : [&'static str; 2] = ["
      ", "
    "]; + } + + // TODO: State.shouldpreserve? + // Called upon every element + //let state = compiler.get_state_mut::(*STATE_NAME) + //.or_else(|| { + // compiler.insert_state(STATE_NAME.to_string(), Box::new(ListState(Vec::new())) as Box); + // compiler.get_state_mut::(*STATE_NAME) + //}).unwrap(); + + match compiler.target() + { + Target::HTML => { + let mut result = String::new(); + + //TODO: Do something about indexing + //&self.numbering.iter() + // .zip(&state.0) + // .for_each(|((wants_numbered, _), is_numbered)| + // { + // + // }); + + result.push_str("
  • "); + match self.content.iter() + .try_for_each(|ent| match ent.compile(compiler) { + Err(e) => Err(e), + Ok(s) => Ok(result.push_str(s.as_str())), + }) + { + Err(e) => return Err(e), + _ => {} + } + result.push_str("
  • "); + //result.push_str(LIST_OPEN[self.numbered as usize]); + //self.entries.iter() + // .for_each(|(_index, entry)| + // result.push_str(format!("
  • {}
  • ", compiler.compile(entry)).as_str())); + //result.push_str(LIST_CLOSE[self.numbered as usize]); + Ok(result) + } + Target::LATEX => Err("Unimplemented compiler".to_string()) + } + } +} +*/ + +pub struct ListRule +{ + start_re: Regex, + continue_re: Regex +} + +impl ListRule { + pub fn new() -> Self { + Self { + start_re: Regex::new(r"(?:^|\n)(?:[^\S\r\n]+)([*-]+).*").unwrap(), + continue_re: Regex::new(r"(?:^|\n)([^\S\r\n]+).*").unwrap(), + } + + } + + fn parse_depth(depth: &str, document: &Document) -> Vec<(bool, usize)> + { + let mut parsed = vec![]; + let prev_entry = document.last_element::(true) + .and_then(|list| Ref::filter_map(list, |m| m.entries.last() ).ok() ) + .and_then(|entry| Ref::filter_map(entry, |e| Some(&e.numbering)).ok() ); + + let mut continue_match = true; + depth.chars().enumerate().for_each(|(idx, c)| + { + let number = prev_entry.as_ref() + .and_then(|v| { + if !continue_match { return None } + let numbered = c == '-'; + + match v.get(idx) + { + None => None, + Some((prev_numbered, prev_idx)) => { + if *prev_numbered != numbered { continue_match = false; None } // New depth + else if idx+1 == v.len() { Some(prev_idx+1) } // Increase from previous + else { Some(*prev_idx) } // Do nothing + } + } + }) + .or(Some(0usize)) + .unwrap(); + + match c + { + '*' => parsed.push((false, number)), + '-' => parsed.push((true, number)), + _ => panic!("Unimplemented") + } + }); + + return parsed; + } +} + +impl Rule for ListRule +{ + fn name(&self) -> &'static str { "List" } + + fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box)> { + self.start_re.find_at(cursor.source.content(), cursor.pos) + .map_or(None, + |m| Some((m.start(), Box::new([false;0]) as Box)) ) + } + + fn on_match<'a>(&self, parser: &dyn Parser, document: &'a Document<'a>, cursor: Cursor, _match_data: Option>) -> (Cursor, Vec, Range)>>) { + let mut reports = vec![]; + let content = cursor.source.content(); + let (end_cursor, numbering, source) = match self.start_re.captures_at(content, cursor.pos) { + None => panic!("Unknown error"), + Some(caps) => { + let mut end_pos = caps.get(0).unwrap().end(); + + let mut spacing = None; // Spacing used to continue list entry + loop { + // If another entry starts on the next line, don't continue matching + match self.next_match(&cursor.at(end_pos)) + { + Some((pos, _)) => { + if pos == end_pos { break } + } + None => {}, + } + + // Continue matching as current entry + match self.continue_re.captures_at(content, end_pos) { + None => break, + Some(continue_caps) => { + if continue_caps.get(0).unwrap().start() != end_pos { break } + + // Get the spacing + let cap_spacing = continue_caps.get(1).unwrap(); + match &spacing { + None => spacing = Some(cap_spacing.range()), + Some(spacing) => 'some: { + if content[cap_spacing.range()] == content[spacing.clone()] { break 'some } + + reports.push( + Report::build(ReportKind::Warning, cursor.source.clone(), continue_caps.get(1).unwrap().start()) + .with_message("Invalid list entry spacing") + .with_label( + Label::new((cursor.source.clone(), cap_spacing.range())) + .with_message("Spacing for list entries must match") + .with_color(parser.colors().warning)) + .with_label( + Label::new((cursor.source.clone(), spacing.clone())) + .with_message("Previous spacing") + .with_color(parser.colors().warning)) + .finish()); + }, + } + end_pos = continue_caps.get(0).unwrap().end(); + } + } + } + + let start_pos = caps.get(1).unwrap().end(); + let source = VirtualSource::new( + Token::new(start_pos..end_pos, cursor.source.clone()), + "List Entry".to_string(), + content.as_str()[start_pos..end_pos].to_string(), + ); + + (cursor.at(end_pos), + ListRule::parse_depth(caps.get(1).unwrap().as_str(), document), + source) + }, + }; + + let parsed_entry = parser.parse(Rc::new(source), Some(&document)); + let mut parsed_paragraph = parsed_entry.last_element_mut::(false).unwrap(); // Extract content from paragraph + let entry = ListEntry::new( + Token::new(cursor.pos..end_cursor.pos, cursor.source.clone()), + numbering, + std::mem::replace(&mut parsed_paragraph.content, Vec::new()) + ); + + // Ger previous list, if none insert a new list + let mut list = match document.last_element_mut::(false) + { + Some(last) => last, + None => { + parser.push(document, + Box::new(List::new( + Token::new(cursor.pos..end_cursor.pos, cursor.source.clone())))); + document.last_element_mut::(false).unwrap() + } + }; + list.push(entry); + + (end_cursor, reports) + } +} diff --git a/src/elements/mod.rs b/src/elements/mod.rs new file mode 100644 index 0000000..af33474 --- /dev/null +++ b/src/elements/mod.rs @@ -0,0 +1,13 @@ +pub mod registrar; +pub mod comment; +pub mod paragraph; +pub mod variable; +pub mod import; +pub mod script; +pub mod list; +pub mod style; +pub mod section; +pub mod link; +pub mod code; +pub mod tex; +pub mod raw; diff --git a/src/elements/paragraph.rs b/src/elements/paragraph.rs new file mode 100644 index 0000000..449a452 --- /dev/null +++ b/src/elements/paragraph.rs @@ -0,0 +1,127 @@ +use std::{any::Any, ops::Range, rc::Rc}; + +use ariadne::Report; +use regex::Regex; + +use crate::{compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::Rule, source::{Cursor, Source, Token}}}; + +// TODO: Full refactor +// Problem is that document parsed from other sources i.e by variables +// are not merged correctly into existing paragraph +// A solution would be to use the "(\n){2,}" regex to split paragraph, which would reduce the work needed for process_text +// Another fix would be to keep parsing (recursively) into the same document (like previous version) +// The issue is that this would break the current `Token` implementation +// Which would need to be reworked +#[derive(Debug)] +pub struct Paragraph +{ + location: Token, + pub content: Vec> +} + +impl Paragraph +{ + pub fn new(location: Token) -> Self { + Self { location, content: Vec::new() } + } + + pub fn is_empty(&self) -> bool { self.content.is_empty() } + + pub fn push(&mut self, elem: Box) + { + if elem.location().source() == self.location().source() + { + self.location.range = self.location.start() .. elem.location().end(); + } + self.content.push(elem); + } + + pub fn find_back) -> bool>(&self, mut predicate: P) + -> Option<&Box> { + self.content.iter().rev() + .find(predicate) + } +} + +impl Element for Paragraph +{ + fn location(&self) -> &Token { &self.location } + + fn kind(&self) -> ElemKind { ElemKind::Special } + + fn element_name(&self) -> &'static str { "Paragraph" } + + fn to_string(&self) -> String { format!("{:#?}", self) } + + fn compile(&self, compiler: &Compiler, document: &Document) -> Result { + if self.content.is_empty() { return Ok(String::new()) } + + match compiler.target() + { + Target::HTML => { + let mut result = String::new(); + //if prev.is_none() || prev.unwrap().downcast_ref::().is_none() + { result.push_str("

    "); } + //else + //{ result.push_str(" "); } + + let err = self.content.iter().try_for_each(|elem| { + match elem.compile(compiler, document) + { + Err(e) => return Err(e), + Ok(content) => { result.push_str(content.as_str()); Ok(()) }, + } + }); + //if next.is_none() || next.unwrap().downcast_ref::().is_none() + { result.push_str("

    "); } + + match err + { + Err(e) => Err(e), + Ok(()) => Ok(result), + } + } + Target::LATEX => todo!("Unimplemented compiler") + } + } +} + +pub struct ParagraphRule +{ + re: Regex, +} + +impl ParagraphRule { + pub fn new() -> Self { + Self { + re: Regex::new(r"\n{2,}").unwrap() + } + } +} + +impl Rule for ParagraphRule +{ + fn name(&self) -> &'static str { "Paragraphing" } + + fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box)> { + self.re.find_at(cursor.source.content(), cursor.pos) + .and_then(|m| Some((m.start(), Box::new([false;0]) as Box)) ) + } + + fn on_match(&self, parser: &dyn Parser, document: &Document, cursor: Cursor, _match_data: Option>) + -> (Cursor, Vec, Range)>>) { + + let end_cursor = match self.re.captures_at(cursor.source.content(), cursor.pos) + { + None => panic!("Unknown error"), + Some(capture) => + cursor.at(capture.get(0).unwrap().end()-1) + }; + + parser.push(document, Box::new(Paragraph::new( + Token::new(cursor.pos..end_cursor.pos, cursor.source.clone()) + ))); + + (end_cursor, Vec::new()) + } +} diff --git a/src/elements/raw.rs b/src/elements/raw.rs new file mode 100644 index 0000000..ed53894 --- /dev/null +++ b/src/elements/raw.rs @@ -0,0 +1,164 @@ +use regex::{Captures, Regex}; +use crate::{compiler::compiler::Compiler, document::element::{ElemKind, Element}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util::{self, Property, PropertyParser}}}; +use ariadne::{Fmt, Label, Report, ReportKind}; +use crate::document::document::Document; +use std::{collections::HashMap, ops::Range, rc::Rc, str::FromStr}; + +#[derive(Debug)] +struct Raw { + location: Token, + kind: ElemKind, + content: String, +} + +impl Raw { + fn new(location: Token, kind: ElemKind, content: String) -> Self { + Self { location, kind, content } + } +} + +impl Element for Raw { + fn location(&self) -> &Token { &self.location } + fn kind(&self) -> ElemKind { self.kind.clone() } + + fn element_name(&self) -> &'static str { "Raw" } + + fn to_string(&self) -> String { format!("{self:#?}") } + + fn compile(&self, compiler: &Compiler, _document: &Document) -> Result { + Ok(self.content.clone()) + } +} + +pub struct RawRule { + re: [Regex; 1], + properties: PropertyParser, +} + +impl RawRule { + pub fn new() -> Self { + let mut props = HashMap::new(); + props.insert("kind".to_string(), + Property::new( + true, + "Element display kind".to_string(), + Some("inline".to_string()))); + Self { + re: [ + Regex::new(r"\{\?(?:\[((?:\\.|[^\[\]\\])*?)\])?(?:((?:\\.|[^\\\\])*?)(\?\}))?").unwrap() + ], + properties: PropertyParser::new(props) + } + } +} + +impl RegexRule for RawRule +{ + fn name(&self) -> &'static str { "Raw" } + + fn regexes(&self) -> &[regex::Regex] { &self.re } + + fn on_regex_match(&self, _index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures) + -> Vec, Range)>> { + let mut reports = vec![]; + + let raw_content = match matches.get(2) + { + // Unterminated + None => { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Unterminated Raw Code") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message(format!("Missing terminating `{}` after first `{}`", + "?}".fg(parser.colors().info), + "{?".fg(parser.colors().info))) + .with_color(parser.colors().error)) + .finish()); + return reports; + } + Some(content) => { + let processed = util::process_escaped('\\', "?}", + content.as_str().trim_start().trim_end()); + + if processed.is_empty() + { + reports.push( + Report::build(ReportKind::Warning, token.source(), content.start()) + .with_message("Empty Raw Code") + .with_label( + Label::new((token.source().clone(), content.range())) + .with_message("Raw code is empty") + .with_color(parser.colors().warning)) + .finish()); + } + processed + } + }; + + let properties = match matches.get(1) + { + None => match self.properties.default() { + Ok(properties) => properties, + Err(e) => { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Invalid Raw Code") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message(format!("Raw code is missing properties: {e}")) + .with_color(parser.colors().error)) + .finish()); + return reports; + }, + } + Some(props) => { + let processed = util::process_escaped('\\', "]", + props.as_str().trim_start().trim_end()); + match self.properties.parse(processed.as_str()) + { + Err(e) => { + reports.push( + Report::build(ReportKind::Error, token.source(), props.start()) + .with_message("Invalid Raw Code Properties") + .with_label( + Label::new((token.source().clone(), props.range())) + .with_message(e) + .with_color(parser.colors().error)) + .finish()); + return reports; + } + Ok(properties) => properties + } + } + }; + + let raw_kind : ElemKind = match properties.get("kind", + |prop, value| ElemKind::from_str(value.as_str()).map_err(|e| (prop, e))) + { + Ok((_prop, kind)) => kind, + Err((prop, e)) => { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Invalid Raw Code Property") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message(format!("Property `kind: {}` cannot be converted: {}", + prop.fg(parser.colors().info), + e.fg(parser.colors().error))) + .with_color(parser.colors().warning)) + .finish()); + return reports; + } + }; + + parser.push(document, Box::new(Raw::new( + token.clone(), + raw_kind, + raw_content + ))); + + reports + } +} diff --git a/src/elements/registrar.rs b/src/elements/registrar.rs new file mode 100644 index 0000000..e3b7efc --- /dev/null +++ b/src/elements/registrar.rs @@ -0,0 +1,22 @@ +use crate::parser::parser::Parser; + +use super::{code::CodeRule, comment::CommentRule, import::ImportRule, link::LinkRule, list::ListRule, paragraph::ParagraphRule, raw::RawRule, script::ScriptRule, section::SectionRule, style::StyleRule, tex::TexRule, variable::{VariableRule, VariableSubstitutionRule}}; + + +pub fn register(parser: &mut P) +{ + parser.add_rule(Box::new(CommentRule::new()), None); + parser.add_rule(Box::new(ParagraphRule::new()), None); + parser.add_rule(Box::new(ImportRule::new()), None); + parser.add_rule(Box::new(ScriptRule::new()), None); + parser.add_rule(Box::new(VariableRule::new()), None); + parser.add_rule(Box::new(VariableSubstitutionRule::new()), None); + parser.add_rule(Box::new(RawRule::new()), None); + parser.add_rule(Box::new(ListRule::new()), None); + parser.add_rule(Box::new(CodeRule::new()), None); + parser.add_rule(Box::new(TexRule::new()), None); + + parser.add_rule(Box::new(StyleRule::new()), None); + parser.add_rule(Box::new(SectionRule::new()), None); + parser.add_rule(Box::new(LinkRule::new()), None); +} diff --git a/src/elements/script.rs b/src/elements/script.rs new file mode 100644 index 0000000..01cb39f --- /dev/null +++ b/src/elements/script.rs @@ -0,0 +1,201 @@ +use regex::{Captures, Regex}; +use crate::{document::element::Text, lua::kernel::{Kernel, KernelHolder}, parser::{parser::{Parser, ReportColors}, rule::RegexRule, source::{Source, Token, VirtualSource}, util}}; +use ariadne::{Fmt, Label, Report, ReportKind}; +use crate::document::document::Document; +use std::{ops::Range, rc::Rc}; + +pub struct ScriptRule +{ + re: [Regex; 2], + eval_kinds: [(&'static str, &'static str); 2] +} + +impl ScriptRule { + pub fn new() -> Self { + Self { + re: [ + Regex::new(r"(?:^|\n)@<(?:(.*)\n?)((?:\\.|[^\[\]\\])*?)(?:\n?)>@").unwrap(), + Regex::new(r"%<([^\s[:alpha:]])?(?:\[(.*?)\])?((?:\\.|[^\[\]\\])*?)(?:\n?)>%").unwrap() + ], + eval_kinds: [ + ("", "Eval to text"), + ("!", "Eval and parse"), + ] + } + } + + fn validate_kernel_name(colors: &ReportColors, name: &str) + -> Result { + let trimmed = name.trim_end().trim_start(); + if trimmed.is_empty() { return Ok("main".to_string()) } + else if trimmed.find(|c: char| c.is_whitespace()).is_some() { + return Err(format!("Kernel name `{}` contains whitespaces", + trimmed.fg(colors.highlight))) + } + + Ok(trimmed.to_string()) + } + + fn validate_kind(&self, colors: &ReportColors, kind: &str) + -> Result { + match self.eval_kinds.iter().position(|(kind_symbol, _)| kind == *kind_symbol) + { + Some(id) => Ok(id), + None => Err(format!("Unable to find eval kind `{}`. Available kinds:{}", + kind.fg(colors.highlight), + self.eval_kinds.iter().fold(String::new(), |out, (symbol, name)| { + out + format!("\n - '{symbol}' => {name}").as_str() + }))) + } + } +} + +impl RegexRule for ScriptRule +{ + fn name(&self) -> &'static str { "Script" } + + fn regexes(&self) -> &[regex::Regex] { &self.re } + + fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures) + -> Vec, Range)>> { + let mut reports = vec![]; + + let kernel_name = match matches.get(if index == 0 {1} else {2}) { + None => "main".to_string(), + Some(name) => { + match ScriptRule::validate_kernel_name(parser.colors(), name.as_str()) + { + Ok(name) => name, + Err(e) => { + reports.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Invalid kernel name") + .with_label( + Label::new((token.source(), name.range())) + .with_message(e) + .with_color(parser.colors().error)) + .finish()); + return reports; + } + } + } + }; + let kernel_name = matches.get(if index == 0 {1} else {2}) + .and_then(|name| { + let trimmed = name.as_str().trim_start().trim_end(); + (!trimmed.is_empty()).then_some(trimmed) + }) + .unwrap_or("main"); + let kernel = parser.get_kernel(kernel_name).unwrap_or_else(|| { + parser.insert_kernel(kernel_name.to_string(), Kernel::new()) + }); + + let kernel_data = matches.get(if index == 0 {2} else {3}) + .and_then(|code| { + let trimmed = code.as_str().trim_start().trim_end(); + (!trimmed.is_empty()).then_some((trimmed, code.range())) + }).or_else(|| { + reports.push( + Report::build(ReportKind::Warning, token.source(), token.start()) + .with_message("Invalid kernel code") + .with_label( + Label::new((token.source(), token.start()+1..token.end())) + .with_message("Kernel code is empty") + .with_color(parser.colors().warning)) + .finish()); + + None + }); + + if kernel_data.is_none() { return reports; } + + let (kernel_content, kernel_range) = kernel_data.unwrap(); + let source = Rc::new(VirtualSource::new( + Token::new(kernel_range, token.source()), + format!("{}#{}:lua_kernel@{kernel_name}", token.source().name(), matches.get(0).unwrap().start()), + util::process_escaped('\\', ">@", kernel_content) + )) as Rc; + + let chunk = kernel.lua.load(source.content()) + .set_name(kernel_name); + if index == 0 // @< ... >@ -> Exec + { + match chunk.exec() + { + Ok(_) => {}, + Err(e) => { + reports.push( + Report::build(ReportKind::Error, source.clone(), 0) + .with_message("Invalid kernel code") + .with_label( + Label::new((source.clone(), 0..source.content().len())) + .with_message(format!("Kernel execution failed:\n{}", e.to_string())) + .with_color(parser.colors().error)) + .finish()); + } + } + } + else if index == 1 // %< ... >% -> Eval + { + let kind = match matches.get(1) { + None => 0, + Some(kind) => { + match self.validate_kind(parser.colors(), kind.as_str()) + { + Ok(kind) => kind, + Err(msg) => { + reports.push( + Report::build(ReportKind::Error, token.source(), kind.start()) + .with_message("Invalid kernel code kind") + .with_label( + Label::new((token.source(), kind.range())) + .with_message(msg) + .with_color(parser.colors().error)) + .finish()); + return reports; + } + } + } + }; + + match chunk.eval::() + { + Ok(result) => { + if kind == 0 // Eval to text + { + if !result.is_empty() + { + parser.push(document, Box::new(Text::new( + Token::new(1..source.content().len(), source.clone()), + util::process_text(document, result.as_str()), + ))); + } + } + else if kind == 1 // Eval and Parse + { + let parse_source = Rc::new(VirtualSource::new( + Token::new(0..source.content().len(), source.clone()), + format!("parse({})", source.name()), + result + )) as Rc; + //println!("SRC={parse_source:#?}, {}", parse_source.content()); + + parser.parse_into(parse_source, document); + } + }, + Err(e) => { + reports.push( + Report::build(ReportKind::Error, source.clone(), 0) + .with_message("Invalid kernel code") + .with_label( + Label::new((source.clone(), 0..source.content().len())) + .with_message(format!("Kernel evaluation failed:\n{}", e.to_string())) + .with_color(parser.colors().error)) + .finish()); + } + } + } + + reports + } +} diff --git a/src/elements/section.rs b/src/elements/section.rs new file mode 100644 index 0000000..dfa794f --- /dev/null +++ b/src/elements/section.rs @@ -0,0 +1,208 @@ +use regex::Regex; +use crate::{compiler::compiler::Target, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}}}; +use ariadne::{Report, Fmt, Label, ReportKind}; +use crate::{compiler::compiler::Compiler, document::{document::Document, element::{ElemKind, Element, ReferenceableElement}}}; +use std::{ops::Range, rc::Rc}; + +#[derive(Debug)] +pub struct Section { + location: Token, + title: String, // Section title + depth: usize, // Section depth + kind: u8, // Section kind, e.g numbered, unnumbred, ... + reference: Option, // Section reference name +} + +impl Section +{ + pub fn new(location: Token, title: String, depth: usize, kind: u8, reference: Option) -> Self { + Self { location: location, title, depth, kind, reference } + } +} + +impl Element for Section +{ + fn location(&self) -> &Token { &self.location } + fn kind(&self) -> ElemKind { ElemKind::Block } + fn element_name(&self) -> &'static str { "Section" } + fn to_string(&self) -> String { format!("{self:#?}") } + fn as_referenceable(&self) -> Option<&dyn ReferenceableElement> { Some(self) } + fn compile(&self, compiler: &Compiler, _document: &Document) -> Result { + match compiler.target() + { + Target::HTML => { + Ok(format!("{1}", + self.depth, + compiler.sanitize(self.title.as_str()))) + } + Target::LATEX => Err("Unimplemented compiler".to_string()) + } + } +} + +impl ReferenceableElement for Section +{ + fn reference_name(&self) -> Option<&String> { self.reference.as_ref() } +} + +pub struct SectionRule { + re: [Regex; 1], +} + +impl SectionRule { + pub fn new() -> Self { + Self { re: [Regex::new(r"(?:^|\n)(#{1,})(?:\{(.*)\})?((\*|\+){1,})?(.*)").unwrap()] } + } +} + +pub mod section_kind +{ + pub const NONE : u8 = 0x00; + pub const NO_TOC : u8 = 0x01; + pub const NO_NUMBER : u8 = 0x02; +} + +impl RegexRule for SectionRule { + fn name(&self) -> &'static str { "Section" } + + fn regexes(&self) -> &[Regex] { &self.re } + + fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec, Range)>> + { + let mut result = vec![]; + let section_depth = match matches.get(1) + { + Some(depth) => { + if depth.len() > 6 { + result.push( + Report::build(ReportKind::Error, token.source(), depth.start()) + .with_message("Invalid section depth") + .with_label( + Label::new((token.source(), depth.range())) + .with_message(format!("Section is of depth {}, which is greather than {} (maximum depth allowed)", + depth.len().fg(parser.colors().info), + 6.fg(parser.colors().info))) + .with_color(parser.colors().error)) + .finish()); + return result; + } + + depth.len() + }, + _ => panic!("Empty section depth"), + }; + + // [Optional] Reference name + let section_refname = matches.get(2).map_or_else(|| None, + |refname| { + // Check for duplicate reference + if let Some((ref_doc, reference)) = document.get_reference(refname.as_str()) + { + result.push( + Report::build(ReportKind::Warning, token.source(), refname.start()) + .with_message("Duplicate reference name") + .with_label( + Label::new((token.source(), refname.range())) + .with_message(format!("Reference with name `{}` is already defined in `{}`", + refname.as_str().fg(parser.colors().highlight), + ref_doc.source().name().as_str().fg(parser.colors().highlight))) + .with_message(format!("`{}` conflicts with previously defined reference to {}", + refname.as_str().fg(parser.colors().highlight), + reference.element_name().fg(parser.colors().highlight))) + .with_color(parser.colors().warning)) + .with_label( + Label::new((ref_doc.source(), reference.location().start()+1..reference.location().end() )) + .with_message(format!("`{}` previously defined here", + refname.as_str().fg(parser.colors().highlight))) + .with_color(parser.colors().warning)) + .with_note(format!("Previous reference was overwritten")) + .finish()); + } + Some(refname.as_str().to_string()) + }); + + // Section kind + let section_kind = match matches.get(3) + { + Some(kind) => { + match kind.as_str() { + "*+" | "+*" => section_kind::NO_NUMBER | section_kind::NO_TOC, + "*" => section_kind::NO_NUMBER, + "+" => section_kind::NO_TOC, + "" => section_kind::NONE, + _ => { + result.push( + Report::build(ReportKind::Error, token.source(), kind.start()) + .with_message("Invalid section numbering kind") + .with_label( + Label::new((token.source(), kind.range())) + .with_message(format!("Section numbering kind must be a combination of `{}` for unnumbered, and `{}` for non-listing; got `{}`", + "*".fg(parser.colors().info), + "+".fg(parser.colors().info), + kind.as_str().fg(parser.colors().highlight))) + .with_color(parser.colors().error)) + .with_help(format!("Leave empty for a numbered listed section")) + .finish()); + return result; + } + } + } + _ => section_kind::NONE, + }; + + // Spacing + Section name + let section_name = match matches.get(5) + { + Some(name) => { + let split = name.as_str().chars() + .position(|c| !c.is_whitespace()) + .unwrap_or(0); + + let section_name = &name.as_str()[split..]; + if section_name.is_empty() // No name + { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Missing section name") + .with_label( + Label::new((token.source(), name.range())) + .with_message("Sections require a name before line end") + .with_color(parser.colors().error)) + .finish()); + return result; + } + + // No spacing + if split == 0 + { + result.push( + Report::build(ReportKind::Warning, token.source(), name.start()) + .with_message("Missing section spacing") + .with_label( + Label::new((token.source(), name.range())) + .with_message("Sections require at least one whitespace before the section's name") + .with_color(parser.colors().warning)) + .with_help(format!("Add a space before `{}`", section_name.fg(parser.colors().highlight))) + .finish()); + return result; + } + + + section_name.to_string() + }, + _ => panic!("Empty section name") + }; + + parser.push(document, Box::new( + Section::new( + token.clone(), + section_name, + section_depth, + section_kind, + section_refname + ) + )); + + return result; + } +} diff --git a/src/elements/style.rs b/src/elements/style.rs new file mode 100644 index 0000000..631c147 --- /dev/null +++ b/src/elements/style.rs @@ -0,0 +1,185 @@ +use regex::{Captures, Regex}; +use crate::{compiler::compiler::{Compiler, Target}, document::element::{ElemKind, Element}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, state::State}}; +use ariadne::{Fmt, Label, Report, ReportKind}; +use crate::document::document::Document; +use crate::parser::state::Scope; +use std::{cell::RefCell, ops::Range, rc::Rc}; +use lazy_static::lazy_static; + +use super::paragraph::Paragraph; + +#[derive(Debug)] +pub struct Style { + location: Token, + kind: usize, + close: bool, +} + +impl Style +{ + pub fn new(location: Token, kind: usize, close: bool) -> Self { + Self { location, kind, close } + } +} + +impl Element for Style +{ + fn location(&self) -> &Token { &self.location } + fn kind(&self) -> ElemKind { ElemKind::Inline } + fn element_name(&self) -> &'static str { "Section" } + fn to_string(&self) -> String { format!("{self:#?}") } + fn compile(&self, compiler: &Compiler, _document: &Document) -> Result { + match compiler.target() + { + Target::HTML => { + Ok([ + // Bold + "", "", + // Italic + "", "", + // Underline + "", "", + // Code + "", "", + ][self.kind*2 + self.close as usize].to_string()) + } + Target::LATEX => Err("Unimplemented compiler".to_string()) + } + } +} + +struct StyleState +{ + toggled: [Option; 4] +} + +impl StyleState { + const NAMES : [&'static str; 4] = ["Bold", "Italic", "Underline", "Code"]; + + fn new() -> Self { + Self { toggled: [None, None, None, None] } + } +} + +impl State for StyleState +{ + fn scope(&self) -> Scope { Scope::PARAGRAPH } + + fn on_remove<'a>(&self, parser: &dyn Parser, document: &Document) -> Vec, Range)>> { + let mut result = Vec::new(); + self.toggled + .iter() + .zip(StyleState::NAMES) + .for_each(|(token, name)| + { + if token.is_none() { return } // Style not enabled + let token = token.as_ref().unwrap(); + + //let range = range.as_ref().unwrap(); + + //let active_range = range.start .. paragraph.location().end()-1; + + let paragraph = document.last_element::(false).unwrap(); + let paragraph_end = paragraph.content.last() + .and_then(|last| Some((last.location().source(), last.location().end()-1 .. last.location().end()))) + .unwrap(); + + // TODO: Allow style to span multiple documents if they don't break paragraph. + result.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Unterminated style") + //.with_label( + // Label::new((document.source(), active_range.clone())) + // .with_order(0) + // .with_message(format!("Style {} is not terminated before the end of paragraph", + // name.fg(parser.colors().info))) + // .with_color(parser.colors().error)) + .with_label( + Label::new((token.source(), token.range.clone())) + .with_order(1) + .with_message(format!("Style {} starts here", + name.fg(parser.colors().info))) + .with_color(parser.colors().info)) + .with_label( + Label::new(paragraph_end) + .with_order(1) + .with_message(format!("Paragraph ends here")) + .with_color(parser.colors().info)) + .with_note("Styles cannot span multiple documents (i.e @import)") + .finish()); + }); + + return result; + } +} + +pub struct StyleRule { + re: [Regex; 4], +} + +impl StyleRule { + pub fn new() -> Self { + Self { + re: [ + // Bold + Regex::new(r"\*\*").unwrap(), + // Italic + Regex::new(r"\*").unwrap(), + // Underline + Regex::new(r"__").unwrap(), + // Code + Regex::new(r"`").unwrap() + ] + } + } +} + +lazy_static! { + static ref STATE_NAME : String = "elements.style".to_string(); +} + +impl RegexRule for StyleRule +{ + fn name(&self) -> &'static str { "Style" } + + fn regexes(&self) -> &[regex::Regex] { &self.re } + + fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, _matches: Captures) -> Vec, Range)>> { + let result = vec![]; + + let query = parser.state().query(&STATE_NAME); + let state = match query + { + Some(state) => state, + None => { // Insert as a new state + match parser.state_mut().insert(STATE_NAME.clone(), Rc::new(RefCell::new(StyleState::new()))) + { + Err(_) => panic!("Unknown error"), + Ok(state) => state, + } + } + }; + + if let Some(style_state) = state + .borrow_mut() + .as_any_mut() + .downcast_mut::() + { + style_state.toggled[index] = style_state.toggled[index].clone().map_or(Some(token.clone()), |_| None); + parser.push(document, Box::new( + Style::new( + token.clone(), + index, + !style_state.toggled[index].is_some() + ) + )); + } + else + { + panic!("Invalid state at `{}`", STATE_NAME.as_str()); + } + + return result; + } + +} diff --git a/src/elements/tex.rs b/src/elements/tex.rs new file mode 100644 index 0000000..6e05370 --- /dev/null +++ b/src/elements/tex.rs @@ -0,0 +1,263 @@ +use std::{io::{Read, Write}, ops::Range, process::{Command, Stdio}, rc::Rc, sync::Once}; + +use ariadne::{Fmt, Label, Report, ReportKind}; +use crypto::{digest::Digest, sha2::Sha512}; +use regex::{Captures, Regex}; + +use crate::{cache::cache::{Cached, CachedError}, compiler::compiler::{Compiler, Target}, document::{document::Document, element::{ElemKind, Element}}, parser::{parser::Parser, rule::RegexRule, source::{Source, Token}, util}}; + +#[derive(Debug, PartialEq, Eq)] +enum TexKind +{ + Block, + Inline, +} + +impl From<&TexKind> for ElemKind +{ + fn from(value: &TexKind) -> Self { + match value { + TexKind::Inline => ElemKind::Inline, + _ => ElemKind::Block + } + } +} + +#[derive(Debug)] +struct Tex +{ + location: Token, + block: TexKind, + env: String, + tex: String, + caption: Option, +} + +impl Tex { + fn new(location: Token, block: TexKind, env: String, tex: String, caption: Option) -> Self { + Self { location, block, env, tex, caption } + } + + fn format_latex(fontsize: &String, preamble: &String, tex: &String) -> FormattedTex + { + FormattedTex(format!(r"\documentclass[{}pt,preview]{{standalone}} +{} +\begin{{document}} +\begin{{preview}} +{} +\end{{preview}} +\end{{document}}", + fontsize, preamble, tex)) + } +} + +struct FormattedTex(String); + +impl FormattedTex +{ + /// Renders latex to svg + fn latex_to_svg(&self, exec: &String, fontsize: &String) -> Result + { + print!("Rendering LaTex `{}`... ", self.0); + let process = match Command::new(exec) + .arg("--fontsize").arg(fontsize) + .stdout(Stdio::piped()) + .stdin(Stdio::piped()) + .spawn() + { + Err(e) => return Err(format!("Could not spawn `{exec}`: {}", e)), + Ok(process) => process + }; + + if let Err(e) = process.stdin.unwrap().write_all(self.0.as_bytes()) + { + panic!("Unable to write to `latex2svg`'s stdin: {}", e); + } + + let mut result = String::new(); + match process.stdout.unwrap().read_to_string(&mut result) + { + Err(e) => panic!("Unable to read `latex2svg` stdout: {}", e), + Ok(_) => {} + } + println!("Done!"); + + Ok(result) + } +} + +impl Cached for FormattedTex +{ + type Key = String; + type Value = String; + + fn sql_table() -> &'static str { + "CREATE TABLE IF NOT EXISTS cached_tex ( + digest TEXT PRIMARY KEY, + svg BLOB NOT NULL);" + } + + fn sql_get_query() -> &'static str { + "SELECT svg FROM cached_tex WHERE digest = (?1)" + } + + fn sql_insert_query() -> &'static str { + "INSERT INTO cached_tex (digest, svg) VALUES (?1, ?2)" + } + + fn key(&self) -> ::Key { + let mut hasher = Sha512::new(); + hasher.input(self.0.as_bytes()); + + hasher.result_str() + } +} + +impl Element for Tex { + fn location(&self) -> &Token { &self.location } + + fn kind(&self) -> ElemKind { (&self.block).into() } + + fn element_name(&self) -> &'static str { "LaTeX" } + + fn to_string(&self) -> String { format!("{self:#?}") } + + fn compile(&self, compiler: &Compiler, document: &Document) + -> Result { + + match compiler.target() { + Target::HTML => { + static CACHE_INIT : Once = Once::new(); + CACHE_INIT.call_once(|| if let Some(mut con) = compiler.cache() { + if let Err(e) = FormattedTex::init(&mut con) + { + eprintln!("Unable to create cache table: {e}"); + } + }); + + let exec = document.get_variable(format!("tex.{}.exec", self.env)) + .map_or("latex2svg".to_string(), |(_, var)| var.to_string()); + // FIXME: Because fontsize is passed as an arg, verify that it cannot be used to execute python/shell code + let fontsize = document.get_variable(format!("tex.{}.fontsize", self.env)) + .map_or("12".to_string(), |(_, var)| var.to_string()); + let preamble = document.get_variable(format!("tex.{}.preamble", self.env)) + .map_or("".to_string(), |(_, var)| var.to_string()); + let prepend = if self.block == TexKind::Inline { "".to_string() } + else + { + document.get_variable(format!("tex.{}.block_prepend", self.env)) + .map_or("".to_string(), |(_, var)| var.to_string()+"\n") + }; + + let latex = match self.block + { + TexKind::Inline => Tex::format_latex( + &fontsize, + &preamble, + &format!("${{{}}}$", self.tex)), + _ => Tex::format_latex( + &fontsize, + &preamble, + &format!("{prepend}{}", self.tex)) + }; + + if let Some(mut con) = compiler.cache() + { + match latex.cached(&mut con, |s| s.latex_to_svg(&exec, &fontsize)) + { + Ok(s) => Ok(s), + Err(e) => match e + { + CachedError::SqlErr(e) => Err(format!("Querying the cache failed: {e}")), + CachedError::GenErr(e) => Err(e) + } + } + } + else + { + latex.latex_to_svg(&exec, &fontsize) + } + } + _ => todo!("Unimplemented") + } + } +} + +pub struct TexRule { + re: [Regex; 2], +} + +impl TexRule { + pub fn new() -> Self { + Self { + re: [ + Regex::new(r"\$\|(?:\[(.*)\])?(?:((?:\\.|[^\\\\])*?)\|\$)?").unwrap(), + Regex::new(r"\$(?:\[(.*)\])?(?:((?:\\.|[^\\\\])*?)\$)?").unwrap(), + ], + } + } +} + +impl RegexRule for TexRule +{ + fn name(&self) -> &'static str { "Tex" } + + fn regexes(&self) -> &[regex::Regex] { &self.re } + + fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: Captures) + -> Vec, Range)>> { + let mut reports = vec![]; + + let tex_env = matches.get(1) + .and_then(|env| Some(env.as_str().trim_start().trim_end())) + .and_then(|env| (!env.is_empty()).then_some(env)) + .unwrap_or("main"); + + let tex_content = match matches.get(2) + { + // Unterminated `$` + None => { + reports.push( + Report::build(ReportKind::Error, token.source(), token.start()) + .with_message("Unterminated Tex Code") + .with_label( + Label::new((token.source().clone(), token.range.clone())) + .with_message(format!("Missing terminating `{}` after first `{}`", + ["|$", "$"][index].fg(parser.colors().info), + ["$|", "$"][index].fg(parser.colors().info))) + .with_color(parser.colors().error)) + .finish()); + return reports; + } + Some(content) => { + let processed = util::process_escaped('\\', ["|$", "$"][index], + content.as_str().trim_start().trim_end()); + + if processed.is_empty() + { + reports.push( + Report::build(ReportKind::Warning, token.source(), content.start()) + .with_message("Empty Tex Code") + .with_label( + Label::new((token.source().clone(), content.range())) + .with_message("Tex code is empty") + .with_color(parser.colors().warning)) + .finish()); + } + processed + } + }; + + // TODO: Caption + + parser.push(document, Box::new(Tex::new( + token, + if index == 1 { TexKind::Inline } else { TexKind::Block }, + tex_env.to_string(), + tex_content, + None, + ))); + + reports + } +} diff --git a/src/elements/variable.rs b/src/elements/variable.rs new file mode 100644 index 0000000..afb7f02 --- /dev/null +++ b/src/elements/variable.rs @@ -0,0 +1,329 @@ +use regex::Regex; +use crate::parser::{parser::{Parser, ReportColors}, rule::RegexRule, source::{Source, Token}}; +use ariadne::{Report, Fmt, Label, ReportKind}; +use crate::document::{document::Document, variable::{BaseVariable, PathVariable, Variable}}; +use std::{ops::Range, rc::Rc}; + +pub struct VariableRule { + re: [Regex; 1], + kinds: Vec<(String, String)>, +} + +impl VariableRule { + pub fn new() -> Self { + Self { + re: [Regex::new(r"(?:^|\n)@([^[:alpha:]])?(.*)=((?:\\\n|.)*)").unwrap()], + kinds: vec![ + ("".into(), "Regular".into()), + ("'".into(), "Path".into()) + ] + } + } + + + pub fn make_variable(&self, colors: &ReportColors, location: Token, kind: usize, name: String, value: String) -> Result, String> + { + match self.kinds[kind].0.as_str() + { + "" => { + Ok(Rc::new(BaseVariable::new(location, name, value))) + } + "'" => { + match std::fs::canonicalize(value.as_str()) // TODO: not canonicalize + { + Ok(path) => Ok(Rc::new(PathVariable::new(location, name, path))), + Err(e) => Err(format!("Unable to canonicalize path `{}`: {}", + value.fg(colors.highlight), + e.to_string())) + } + } + _ => panic!("Unhandled variable kind") + } + } + + // Trim and check variable name for validity + pub fn validate_name<'a>(colors: &ReportColors, original_name: &'a str) -> Result<&'a str, String> + { + let name = original_name.trim_start().trim_end(); + if name.contains("%") + { + return Err(format!("Name cannot contain '{}'", + "%".fg(colors.info))); + } + return Ok(name); + } + + pub fn validate_value(_colors: &ReportColors, original_value: &str) -> Result + { + let mut escaped = 0usize; + let mut result = String::new(); + for c in original_value.trim_start().trim_end().chars() { + if c == '\\' { escaped += 1 } + else if c == '\n' { + match escaped { + 0 => return Err("Unknown error wile capturing variable".to_string()), + // Remove '\n' + 1 => {}, + // Insert '\n' + _ => { + result.push(c); + (0..escaped-2).for_each(|_| result.push('\\')); + } + } + escaped = 0; + } + else { + (0..escaped).for_each(|_| result.push('\\')); + escaped = 0; + result.push(c); + } + } + (0..escaped).for_each(|_| result.push('\\')); + + Ok(result) + } +} + +impl RegexRule for VariableRule { + fn name(&self) -> &'static str { "Variable" } + + fn regexes(&self) -> &[Regex] { &self.re } + + fn on_regex_match(&self, _: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec, Range)>> + { + let mut result = vec![]; + // [Optional] variable kind + let var_kind = match matches.get(1) + { + Some(kind) => { + // Find kind + let r = self.kinds.iter().enumerate().find(|(_i, (ref char, ref _name))| { + char == kind.as_str() }); + + // Unknown kind specified + if r.is_none() + { + result.push( + Report::build(ReportKind::Error, token.source(), kind.start()) + .with_message("Unknown variable kind") + .with_label( + Label::new((token.source(), kind.range())) + .with_message(format!("Variable kind `{}` is unknown", + kind.as_str().fg(parser.colors().highlight))) + .with_color(parser.colors().error)) + .with_help(format!("Leave empty for regular variables. Available variable kinds:{}", + self.kinds.iter().skip(1).fold("".to_string(), |acc, (char, name)| { + acc + format!("\n - `{}` : {}", + char.fg(parser.colors().highlight), + name.fg(parser.colors().info)).as_str() + }))) + .finish()); + + return result; + } + + r.unwrap().0 + } + None => 0, + }; + + let var_name = match matches.get(2) + { + Some(name) => { + match VariableRule::validate_name(&parser.colors(), name.as_str()) + { + Ok(var_name) => var_name, + Err(msg) => { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Invalid variable name") + .with_label( + Label::new((token.source(), name.range())) + .with_message(format!("Variable name `{}` is not allowed. {msg}", + name.as_str().fg(parser.colors().highlight))) + .with_color(parser.colors().error)) + .finish()); + + return result; + }, + } + }, + _ => panic!("Unknown variable name") + }; + + let var_value = match matches.get(3) + { + Some(value) => { + match VariableRule::validate_value(&parser.colors(), value.as_str()) + { + Ok(var_value) => var_value, + Err(msg ) => { + result.push( + Report::build(ReportKind::Error, token.source(), value.start()) + .with_message("Invalid variable value") + .with_label( + Label::new((token.source(), value.range())) + .with_message(format!("Variable value `{}` is not allowed. {msg}", + value.as_str().fg(parser.colors().highlight))) + .with_color(parser.colors().error)) + .finish()); + + return result; + } + } + } + _ => panic!("Invalid variable value") + }; + + match self.make_variable(&parser.colors(), token.clone(), var_kind, var_name.to_string(), var_value) + { + Ok(variable) => document.add_variable(variable), + Err(msg) => { + let m = matches.get(0).unwrap(); + result.push( + Report::build(ReportKind::Error, token.source(), m.start()) + .with_message("Unable to create variable") + .with_label( + Label::new((token.source(), m.start()+1 .. m.end() )) + .with_message(format!("Unable to create variable `{}`. {}", + var_name.fg(parser.colors().highlight), + msg)) + .with_color(parser.colors().error)) + .finish()); + + return result; + } + } + + return result; + } +} + +pub struct VariableSubstitutionRule +{ + re: [Regex; 1], +} + +impl VariableSubstitutionRule { + pub fn new() -> Self { + Self { + re: [Regex::new(r"%(.*?)%").unwrap()], + } + } +} + +impl RegexRule for VariableSubstitutionRule +{ + fn name(&self) -> &'static str { "Variable Substitution" } + + fn regexes(&self) -> &[regex::Regex] { &self.re } + + fn on_regex_match(&self, _index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec, Range)>> { + let mut result = vec![]; + + let variable = match matches.get(1) + { + Some(name) => { + // Empty name + if name.as_str().is_empty() + { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Empty variable name") + .with_label( + Label::new((token.source(), matches.get(0).unwrap().range())) + .with_message(format!("Missing variable name for substitution")) + .with_color(parser.colors().error)) + .finish()); + + return result; + } + // Leading spaces + else if name.as_str().trim_start() != name.as_str() + { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Invalid variable name") + .with_label( + Label::new((token.source(), name.range())) + .with_message(format!("Variable names contains leading spaces")) + .with_color(parser.colors().error)) + .with_help("Remove leading spaces") + .finish()); + + return result; + } + // Trailing spaces + else if name.as_str().trim_end() != name.as_str() + { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Invalid variable name") + .with_label( + Label::new((token.source(), name.range())) + .with_message(format!("Variable names contains trailing spaces")) + .with_color(parser.colors().error)) + .with_help("Remove trailing spaces") + .finish()); + + return result; + } + // Invalid name + match VariableRule::validate_name(&parser.colors(), name.as_str()) + { + Err(msg) => + { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Invalid variable name") + .with_label( + Label::new((token.source(), name.range())) + .with_message(msg) + .with_color(parser.colors().error)) + .finish()); + + return result; + } + _ => {}, + } + + // Get variable + match document.get_variable(name.as_str()) + { + None => { + result.push( + Report::build(ReportKind::Error, token.source(), name.start()) + .with_message("Unknown variable name") + .with_label( + Label::new((token.source(), name.range())) + .with_message(format!("Unable to find variable with name: `{}`", + name.as_str().fg(parser.colors().highlight))) + .with_color(parser.colors().error)) + .finish()); + return result; + } + Some((_, var)) => var, + } + }, + _ => panic!("Unknown error") + }; + + variable.parse(token, parser, document); + //let parsed = variable.parse( + // token, + // parser, + // document + //); + ////document.merge(parsed, None); + //parsed.content.borrow_mut() + // .drain(..) + // .for_each(|elem| parser.push(document, elem)); + //parser.push(document, ) + + // TODO: Full rework of document + // parser shound parse into previous document, and not into a new document + // This should prevent having to sue `recurse: bool` in the last_element getters + + return result; + } +} diff --git a/src/files.rs b/src/files.rs deleted file mode 100644 index 05ed3eb..0000000 --- a/src/files.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod file; -pub mod cursor; -pub mod token; diff --git a/src/files/cursor.rs b/src/files/cursor.rs deleted file mode 100644 index bd098bc..0000000 --- a/src/files/cursor.rs +++ /dev/null @@ -1,28 +0,0 @@ -use super::file::File; - -#[derive(Debug)] -pub struct Cursor<'a> -{ - pub file: &'a File, - pub content: String, - pub position: usize, -} - -impl<'a> Cursor<'a> -{ - pub fn new(_file: &'a File) -> Result, std::io::Error> - { - let _content = match std::fs::read_to_string(&_file.path) - { - Ok(content) => content, - Err(error) => return Err(error), - }; - - Ok(Cursor - { - file: _file, - content: _content, - position: 0usize, - }) - } -} diff --git a/src/files/file.rs b/src/files/file.rs deleted file mode 100644 index 2099e3c..0000000 --- a/src/files/file.rs +++ /dev/null @@ -1,17 +0,0 @@ -use std::path::Path; - -#[derive(Debug)] -pub struct File -{ - pub path: String, -} - -impl File -{ - pub fn new(_path: String) -> File - { - File { - path: _path, - } - } -} diff --git a/src/files/token.rs b/src/files/token.rs deleted file mode 100644 index 2f5c9c4..0000000 --- a/src/files/token.rs +++ /dev/null @@ -1,30 +0,0 @@ -use super::file::File; -use super::cursor::Cursor; - -pub struct Token<'a> -{ - file: &'a File, - start: usize, - len: usize, -} - -impl<'a> Token<'a> -{ - pub fn new(_file: &'a File, _start: usize, _len: usize) -> Token<'a> - { - Token { - file: _file, - start: _start, - len: _len, - } - } - - pub fn from(cursor: &'a Cursor, mat: regex::Match<'a>) -> Token<'a> - { - Token { - file: cursor.file, - start: cursor.position, - len: mat.len(), - } - } -} diff --git a/src/lsp/mod.rs b/src/lsp/mod.rs new file mode 100644 index 0000000..67c567f --- /dev/null +++ b/src/lsp/mod.rs @@ -0,0 +1 @@ +pub mod parser; diff --git a/src/lsp/parser.rs b/src/lsp/parser.rs new file mode 100644 index 0000000..09a4146 --- /dev/null +++ b/src/lsp/parser.rs @@ -0,0 +1,30 @@ +use std::{cell::RefCell, collections::HashMap}; + +use crate::{elements::registrar::register, lua::kernel::Kernel, parser::{rule::Rule, state::StateHolder}}; + +struct LSParser +{ + rules: Vec>, + + // Parser state + pub state: RefCell, + //pub kernels: RefCell>, +} + +impl LSParser { + pub fn default() -> Self + { + let mut parser = LSParser { + rules: vec![], + state: RefCell::new(StateHolder::new()), + //kernels: RefCell::new(HashMap::new()), + }; + + // TODO: Main kernel + //register(&mut parser); + + parser + } +} + + diff --git a/src/lua/kernel.rs b/src/lua/kernel.rs new file mode 100644 index 0000000..18070bb --- /dev/null +++ b/src/lua/kernel.rs @@ -0,0 +1,21 @@ +use std::cell::RefMut; + +use mlua::Lua; + +pub struct Kernel +{ + pub lua: Lua, +} + +impl Kernel { + pub fn new() -> Self { + Self { lua: Lua::new() } + } +} + +pub trait KernelHolder +{ + fn get_kernel(&self, name: &str) -> Option>; + + fn insert_kernel(&self, name: String, kernel: Kernel) -> RefMut<'_, Kernel>; +} diff --git a/src/lua/mod.rs b/src/lua/mod.rs new file mode 100644 index 0000000..c3af9f7 --- /dev/null +++ b/src/lua/mod.rs @@ -0,0 +1 @@ +pub mod kernel; diff --git a/src/main.rs b/src/main.rs index 3967246..089dcf3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,110 +1,110 @@ +#![feature(char_indices_offset)] +mod document; +mod compiler; mod parser; -use self::parser::rule::SyntaxRule; -use self::parser::section::SectionRule; -mod files; -use self::files::file::File; -use self::files::cursor::Cursor; -mod syntax; -use syntax::element::Element; -use syntax::element::Text; +mod elements; +mod lua; +mod cache; -fn main() { - let file = File::new(String::from("./test.nml")); - let mut cursor = Cursor::new(&file).unwrap(); - cursor.position = 5; +use std::{env, rc::Rc}; - let rule_se = SectionRule::new(); - let (token, res) = rule_se.on_match(&cursor).unwrap(); - println!("{}", res.elements.len()); +use compiler::compiler::Compiler; +use getopts::Options; +use parser::{langparser::LangParser, parser::Parser}; +use crate::parser::source::SourceFile; +extern crate getopts; - /* - let re_sections = regex::Regex::new(r"(?:^|\n)(#{1,})(\*|\+)((?:\t| ){0,})(.*)").unwrap(); - - //let mut validators = Vec::>::new(); - let f = File::new(Box::new(std::path::Path::new("./test.nml"))); - let content = std::fs::read_to_string(*f.path).unwrap(); - - let grammar = vec![re_sections]; - let mut positions = [0usize; 1]; - - let mut i = 0; - while i < content.len() - { - // Update every positions - for k in 0..grammar.len() - { - let rule = &grammar[k]; - let position = &mut positions[k]; - if *position == std::usize::MAX { continue }; - - match rule.find_at(&content, i) - { - Some(mat) => *position = mat.start(), - None => *position = std::usize::MAX, - } - println!("{position}"); - } - - // Gets closest match - let mut next_position = std::usize::MAX; - let mut closest_match = std::usize::MAX; - for k in 0..grammar.len() - { - if positions[k] >= next_position { continue; } - - next_position = positions[k]; - closest_match = k; - } - - println!("Unmatched: {}", &content[i..next_position]); - - // No matches left - if closest_match == std::usize::MAX - { - println!("Done"); - break; - } - - // Extract matches from rule - i = next_position; // Set to begining of match - let mat = &grammar[closest_match].captures_at(&content, i).unwrap(); // Capture match - for m in 0..mat.len() - { - match mat.get(m) - { - Some(s) => { - println!("Group {m}: `{}`", s.as_str()); - }, - None => println!("Group {m}: None"), - } - } - - i += mat.get(0).unwrap().len(); // Add match length - println!("Left={}", &content[i..]); - println!("pos={i}"); - - let mut s = String::new(); - std::io::stdin().read_line(&mut s).expect("Did not enter a correct string"); - } - */ - - - - /* - validators.push(Box::new(StringValidator::new("Depth".to_string(), |_group| -> ValidationStatus { - ValidationStatus::Ok() - }))); - validators.push(Box::new(StringValidator::new("Index Type".to_string(), |group| -> ValidationStatus { - match group - { - "" => ValidationStatus::Ok(), - "*" => ValidationStatus::Ok(), - _ => ValidationStatus::Error("") - } - ValidationStatus::Ok() - }))); - */ - //let _sec_rule = SyntaxRule::new("Section".to_string(), r"(?m)(?:^|\n)(#{1,})(\\*|\\+)((?:\t| ){0,})(.*)", validators).unwrap(); +fn print_usage(program: &str, opts: Options) { + let brief = format!("Usage: {} -i FILE [options]", program); + print!("{}", opts.usage(&brief)); +} + +fn print_version() +{ + print!("NML -- Not a Markup Language +Copyright (c) 2024 +NML is licensed under the GNU Affero General Public License version 3 (AGPLv3), +under the terms of the Free Software Foundation . + +This program is free software; you may modify and redistribute it. +There is NO WARRANTY, to the extent permitted by law. + +NML version: 0.4\n"); +} + +fn main() { + let args: Vec = env::args().collect(); + let program = args[0].clone(); + + let mut opts = Options::new(); + opts.optopt("i", "", "Input file", "FILE"); + opts.optopt("d", "database", "Cache database location", "PATH"); + opts.optmulti("z", "debug", "Debug options", "OPTS"); + opts.optflag("h", "help", "Print this help menu"); + opts.optflag("v", "version", "Print program version and licenses"); + + let matches = match opts.parse(&args[1..]) { + Ok(m) => { m } + Err(f) => { panic!("{}", f.to_string()) } + }; + if matches.opt_present("v") + { + print_version(); + return; + } + if matches.opt_present("h") { + print_usage(&program, opts); + return; + } + if !matches.opt_present("i") { + print_usage(&program, opts); + return; + } + + let input = matches.opt_str("i").unwrap(); + let debug_opts = matches.opt_strs("z"); + let db_path = matches.opt_str("d"); + + let parser = LangParser::default(); + + // Parse + let source = SourceFile::new(input.to_string(), None).unwrap(); + let doc = parser.parse(Rc::new(source), None); + + if debug_opts.contains(&"ast".to_string()) + { + println!("-- BEGIN AST DEBUGGING --"); + doc.content.borrow().iter().for_each(|elem| { + println!("{}", (elem).to_string()) + }); + println!("-- END AST DEBUGGING --"); + } + + + if debug_opts.contains(&"ref".to_string()) + { + println!("-- BEGIN REFERENCES DEBUGGING --"); + let sc = doc.scope.borrow(); + sc.referenceable.iter().for_each(|(name, pos)| { + println!(" - {name}: `{:#?}`", doc.content.borrow()[*pos]); + }); + println!("-- END REFERENCES DEBUGGING --"); + } + if debug_opts.contains(&"var".to_string()) + { + println!("-- BEGIN VARIABLES DEBUGGING --"); + let sc = doc.scope.borrow(); + sc.variables.iter().for_each(|(_name, var)| { + println!(" - `{:#?}`", var); + }); + println!("-- END VARIABLES DEBUGGING --"); + } + + + let compiler = Compiler::new(compiler::compiler::Target::HTML, db_path); + let out = compiler.compile(&doc); + + std::fs::write("a.html", out).unwrap(); } diff --git a/src/parser.rs b/src/parser.rs deleted file mode 100644 index 0d83205..0000000 --- a/src/parser.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod rule; -pub mod section; - diff --git a/src/parser/langparser.rs b/src/parser/langparser.rs new file mode 100644 index 0000000..57f6e55 --- /dev/null +++ b/src/parser/langparser.rs @@ -0,0 +1,277 @@ +use std::{cell::{RefCell, RefMut}, collections::{HashMap, HashSet}, ops::Range, rc::Rc}; + +use ariadne::{Label, Report}; + +use crate::{document::{document::Document, element::{ElemKind, Element, Text}}, elements::{paragraph::Paragraph, registrar::register}, lua::kernel::{Kernel, KernelHolder}, parser::source::{SourceFile, VirtualSource}}; + +use super::{parser::{Parser, ReportColors}, rule::Rule, source::{Cursor, Source, Token}, state::StateHolder, util}; + +/// Parser for the language +pub struct LangParser +{ + rules: Vec>, + colors: ReportColors, + + // Parser state + pub err_flag: RefCell, + pub state: RefCell, + pub kernels: RefCell>, +} + +impl LangParser +{ + pub fn default() -> Self + { + let mut s = Self { + rules: vec![], + colors: ReportColors::with_colors(), + err_flag: RefCell::new(false), + state: RefCell::new(StateHolder::new()), + kernels: RefCell::new(HashMap::new()), + }; + register(&mut s); + s.kernels.borrow_mut() + .insert("main".to_string(), Kernel::new()); + s + } + + fn handle_reports<'a>(&self, _source: Rc, reports: Vec, Range)>>) + { + for mut report in reports + { + let mut sources: HashSet> = HashSet::new(); + fn recurse_source(sources: &mut HashSet>, source: Rc) { + sources.insert(source.clone()); + match source.location() + { + Some(parent) => { + let parent_source = parent.source(); + if sources.get(&parent_source).is_none() + { + recurse_source(sources, parent_source); + } + } + None => {} + } + } + + report.labels.iter().for_each(|label| { + recurse_source(&mut sources, label.span.0.clone()); + }); + + let cache = sources.iter() + .map(|source| (source.clone(), source.content().clone())) + .collect::, String)>>(); + + cache.iter() + .for_each(|(source, _)| { + if let Some (location) = source.location() + { + if let Some(_s) = source.downcast_ref::() + { + report.labels.push( + Label::new((location.source(), location.start()+1 .. location.end())) + .with_message("In file included from here") + .with_order(-1) + ); + }; + + if let Some(_s) = source.downcast_ref::() + { + let start = location.start() + (location.source().content().as_bytes()[location.start()] == '\n' as u8) + .then_some(1) + .unwrap_or(0); + report.labels.push( + Label::new((location.source(), start .. location.end())) + .with_message("In evaluation of") + .with_order(-1) + ); + }; + } + }); + report.eprint(ariadne::sources(cache)).unwrap() + } + } +} + +impl Parser for LangParser +{ + fn colors(&self) -> &ReportColors { &self.colors } + + fn rules(&self) -> &Vec> { &self.rules } + fn add_rule(&mut self, rule: Box, after: Option<&'static str>) + { + // Error on duplicate rule + let rule_name = (*rule).name(); + self.rules.iter().for_each(|rule| { + if (*rule).name() != rule_name { return; } + + panic!("Attempted to introduce duplicate rule: `{rule_name}`"); + }); + + match after + { + Some(name) => { + let before = self.rules.iter() + .enumerate() + .find(|(_pos, r)| (r).name() == name); + + match before + { + Some((pos, _)) => self.rules.insert(pos+1, rule), + _ => panic!("Unable to find rule named `{name}`, to insert rule `{}` after it", rule.name()) + } + } + _ => self.rules.push(rule) + } + } + + fn state(&self) -> std::cell::Ref<'_, StateHolder> { self.state.borrow() } + fn state_mut(&self) -> std::cell::RefMut<'_, StateHolder> { self.state.borrow_mut() } + + /// Add an [`Element`] to the [`Document`] + fn push<'a>(&self, doc: &'a Document<'a>, elem: Box) + { + if elem.kind() == ElemKind::Inline || elem.kind() == ElemKind::Invisible + { + let mut paragraph = doc.last_element_mut::(false) + .or_else(|| { + doc.push(Box::new(Paragraph::new(elem.location().clone()))); + doc.last_element_mut::(false) + }).unwrap(); + + paragraph.push(elem); + } + else + { + // Process paragraph events + if doc.last_element_mut::(false) + .is_some_and(|_| true) + { + self.handle_reports(doc.source(), + self.state_mut().on_scope_end(self, &doc, super::state::Scope::PARAGRAPH)); + } + + doc.push(elem); + } + } + + fn parse<'a>(&self, source: Rc, parent: Option<&'a Document<'a>>) -> Document<'a> + { + let doc = Document::new(source.clone(), parent); + let mut matches = Vec::new(); + for _ in 0..self.rules.len() { + matches.push((0usize, None)); + } + + let content = source.content(); + let mut cursor = Cursor::new(0usize, doc.source()); // Cursor in file + + if parent.is_some() // Terminate parent's paragraph state + { + self.handle_reports(parent.as_ref().unwrap().source(), + self.state_mut().on_scope_end(self, parent.as_ref().unwrap(), super::state::Scope::PARAGRAPH)); + } + + loop + { + let (rule_pos, rule, match_data) = self.update_matches(&cursor, &mut matches); + + // Unmatched content + let text_content = util::process_text(&doc, &content.as_str()[cursor.pos..rule_pos.pos]); + if !text_content.is_empty() + { + self.push(&doc, Box::new(Text::new( + Token::new(cursor.pos..rule_pos.pos, source.clone()), + text_content + ))); + } + + if let Some(rule) = rule + { + + // Rule callback + let (new_cursor, reports) = (*rule).on_match(self, &doc, rule_pos, match_data); + + self.handle_reports(doc.source(), reports); + + // Advance + cursor = new_cursor; + } + else // No rules left + { + break; + } + } + + // State + self.handle_reports(doc.source(), + self.state_mut().on_scope_end(self, &doc, super::state::Scope::DOCUMENT)); + + return doc; + } + + fn parse_into<'a>(&self, source: Rc, document: &'a Document<'a>) + { + let mut matches = Vec::new(); + for _ in 0..self.rules.len() { + matches.push((0usize, None)); + } + + let content = source.content(); + let mut cursor = Cursor::new(0usize, source.clone()); + + loop + { + let (rule_pos, rule, match_data) = self.update_matches(&cursor, &mut matches); + + // Unmatched content + let text_content = util::process_text(&document, &content.as_str()[cursor.pos..rule_pos.pos]); + if !text_content.is_empty() + { + self.push(&document, Box::new(Text::new( + Token::new(cursor.pos..rule_pos.pos, source.clone()), + text_content + ))); + } + + if let Some(rule) = rule + { + // Rule callback + let (new_cursor, reports) = (*rule).on_match(self, &document, rule_pos, match_data); + + self.handle_reports(document.source(), reports); + + // Advance + cursor = new_cursor; + } + else // No rules left + { + break; + } + } + + // State + //self.handle_reports(source.clone(), + // self.state_mut().on_scope_end(&self, &document, super::state::Scope::DOCUMENT)); + + //return doc; + } +} + +impl KernelHolder for LangParser +{ + fn get_kernel(&self, name: &str) + -> Option> { + RefMut::filter_map(self.kernels.borrow_mut(), + |map| map.get_mut(name)).ok() + } + + fn insert_kernel(&self, name: String, kernel: Kernel) + -> RefMut<'_, Kernel> { + //TODO do not get + self.kernels.borrow_mut() + .insert(name.clone(), kernel); + self.get_kernel(name.as_str()).unwrap() + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..7e8cfdc --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,6 @@ +pub mod source; +pub mod parser; +pub mod langparser; +pub mod rule; +pub mod state; +pub mod util; diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..05ac33f --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,126 @@ +use std::any::Any; +use std::cell::{Ref, RefCell, RefMut}; +use std::collections::{HashMap, HashSet}; +use std::ops::Range; +use std::rc::Rc; +use unicode_segmentation::UnicodeSegmentation; + +use super::rule::Rule; +use super::source::{Cursor, Source}; +use super::state::StateHolder; +use crate::document::document::Document; +use crate::document::element::Element; +use ariadne::Color; +use crate::lua::kernel::{Kernel, KernelHolder}; + +pub struct ReportColors +{ + pub error: Color, + pub warning: Color, + pub info: Color, + pub highlight: Color, +} + +impl ReportColors { + pub fn with_colors() -> Self { + Self { + error: Color::Red, + warning: Color::Yellow, + info: Color::BrightBlue, + highlight: Color::BrightMagenta, + } + } + + pub fn without_colors() -> Self { + Self { + error: Color::Primary, + warning: Color::Primary, + info: Color::Primary, + highlight: Color::Primary, + } + } +} + +pub trait Parser: KernelHolder +{ + /// Gets the colors for formatting errors + /// + /// When colors are disabled, all colors should resolve to empty string + fn colors(&self) -> &ReportColors; + + fn rules(&self) -> &Vec>; + fn add_rule(&mut self, rule: Box, after: Option<&'static str>); + + fn state(&self) -> Ref<'_, StateHolder>; + fn state_mut(&self) -> RefMut<'_, StateHolder>; + + // Update [`matches`] and returns the position of the next matched rule. + // If rule is empty, it means that there are no rules left to parse (i.e + // end of document). + fn update_matches(&self, cursor: &Cursor, matches: &mut Vec<(usize, Option>)>) + -> (Cursor, Option<&Box>, Option>) + { + // Update matches + // TODO: Trivially parellalizable + self.rules().iter().zip(matches.iter_mut()).for_each( + |(rule, (matched_at, match_data))| { + // Don't upate if not stepped over yet + if *matched_at > cursor.pos { return } + + (*matched_at, *match_data) = match rule.next_match(cursor) + { + None => (usize::MAX, None), + Some((mut pos, mut data)) => + { + // Check if escaped + while pos != usize::MAX + { + let content = cursor.source.content().as_str(); + let mut graphemes = content[0 .. pos].graphemes(true); + let mut escaped = false; + 'inner: loop + { + let g = graphemes.next_back(); + if !g.is_some() || g.unwrap() != "\\" { break 'inner; } + + escaped = !escaped; + } + if !escaped { break; } + + // Find next potential match + (pos, data) = match rule.next_match(&cursor.at(pos+1)) { + Some((new_pos, new_data)) => (new_pos, new_data), + None => (usize::MAX, data) // Stop iterating + } + } + + (pos, (pos != usize::MAX).then_some(data)) + } + } + }); + + // Get winning match + let (winner, (next_pos, _match_data)) = matches.iter() + .enumerate() + .min_by_key(|(_, (pos, _match_data))| pos).unwrap(); + if *next_pos == usize::MAX // No rule has matched + { + let content = cursor.source.content(); + // No winners, i.e no matches left + return (cursor.at(content.len()), None, None); + } + + (cursor.at(*next_pos), + Some(&self.rules()[winner]), + std::mem::replace(&mut matches[winner].1, None)) + } + + /// Add an [`Element`] to the [`Document`] + fn push<'a>(&self, doc: &'a Document<'a>, elem: Box); + + /// Parse [`Source`] into a new [`Document`] + fn parse<'a>(&self, source: Rc, parent: Option<&'a Document<'a>>) -> Document<'a>; + + /// Parse [`Source`] into an already existing [`Document`] + fn parse_into<'a>(&self, source: Rc, document: &'a Document<'a>); +} diff --git a/src/parser/rule.rs b/src/parser/rule.rs index 4b48bd2..682f98f 100644 --- a/src/parser/rule.rs +++ b/src/parser/rule.rs @@ -1,51 +1,103 @@ -use regex::Captures; -use super::super::syntax::element::Element; -use super::super::files::cursor::Cursor; -use super::super::files::token::Token; +use super::parser::Parser; +use super::source::{Cursor, Source, Token}; +use ariadne::Report; +use crate::document::document::Document; -pub struct RuleResult -{ - length: usize, - pub elements: Vec>, +use std::any::Any; +use std::ops::Range; +use std::rc::Rc; + +pub trait Rule { + /// Returns rule's name + fn name(&self) -> &'static str; + /// Finds the next match starting from [`cursor`] + fn next_match(&self, cursor: &Cursor) -> Option<(usize, Box)>; + /// Callback when rule matches + fn on_match(&self, parser: &dyn Parser, document: &Document, cursor: Cursor, match_data: Option>) -> (Cursor, Vec, Range)>>); } -impl RuleResult -{ - pub fn new(_length: usize, elem: Box) -> RuleResult - { - RuleResult - { - length: _length, - elements: vec![elem], - } - } -} - -#[derive(Debug)] -pub struct RuleError<'a> -{ - // where: token - cursor: &'a Cursor<'a>, - mat: Option>, - message: String, -} - -impl<'a> RuleError<'a> -{ - pub fn new(_cursor: &'a Cursor<'a>, _match: Option>, _message: String) -> RuleError<'a> - { - RuleError - { - cursor: _cursor, - mat: _match, - message: _message, - } - } -} - -pub trait SyntaxRule +/* +pub trait RegexRule: Rule { fn name(&self) -> &'static str; - fn next_match<'a>(&self, cursor: &'a Cursor) -> Option; - fn on_match<'a>(&self, cursor: &'a Cursor) -> Result<(Token<'a>, RuleResult), RuleError<'a>>; + + /// Returns the rule's regex + fn regex(&self) -> ®ex::Regex; + /// Callback on regex rule match + fn on_regex_match<'a>(&self, parser: &Parser, document: &Document, token: Token<'a>, matches: regex::Captures) -> Vec)>>; +} + +impl Rule for T { + fn name(&self) -> &'static str { RegexRule::name(self) } + + /// Finds the next match starting from [`cursor`] + fn next_match<'a>(&self, cursor: &'a Cursor) -> Option + { + let re = self.regex(); + + let content = cursor.file.content.as_ref().unwrap(); + match re.find_at(content.as_str(), cursor.pos) + { + Some(m) => Some(m.start()), + None => None, + } + } + + fn on_match<'a>(&self, parser: &Parser, document: &Document, cursor: Cursor<'a>) -> (Cursor<'a>, Vec)>>) + { + let content = cursor.file.content.as_ref().unwrap(); + let matches = self.regex().captures_at(content.as_str(), cursor.pos).unwrap(); + let token = Token::new(cursor.pos, matches.get(0).unwrap().len(), cursor.file); + + let token_end = token.end(); + (cursor.at(token_end), self.on_regex_match(parser, document, token, matches)) + } +} +*/ + +pub trait RegexRule +{ + fn name(&self) -> &'static str; + + /// Returns the rule's regexes + fn regexes(&self) -> &[regex::Regex]; + + /// Callback on regex rule match + fn on_regex_match(&self, index: usize, parser: &dyn Parser, document: &Document, token: Token, matches: regex::Captures) -> Vec, Range)>>; +} + +impl Rule for T { + fn name(&self) -> &'static str { RegexRule::name(self) } + + /// Finds the next match starting from [`cursor`] + fn next_match(&self, cursor: &Cursor) + -> Option<(usize, Box)> { + let content = cursor.source.content(); + let mut found: Option<(usize, usize)> = None; + self.regexes().iter().enumerate().for_each(|(id, re)| { + if let Some(m) = re.find_at(content.as_str(), cursor.pos) + { + found = found + .and_then(|(f_pos, f_id)| + if f_pos > m.start() { Some((m.start(), id)) } else { Some((f_pos, f_id)) } ) + .or(Some((m.start(), id))); + } + }); + + return found.map(|(pos, id)| + (pos, Box::new(id) as Box)); + } + + fn on_match(&self, parser: &dyn Parser, document: &Document, cursor: Cursor, match_data: Option>) + -> (Cursor, Vec, Range)>>) { + let content = cursor.source.content(); + let index = unsafe { match_data.unwrap_unchecked().downcast::().unwrap_unchecked() }; + let re = &self.regexes()[*index]; + + let captures = re.captures_at(content.as_str(), cursor.pos).unwrap(); + let token = Token::new(captures.get(0).unwrap().range(), cursor.source.clone()); + + let token_end = token.end(); + return (cursor.at(token_end), self.on_regex_match(*index, parser, document, token, captures)); + } } diff --git a/src/parser/section.rs b/src/parser/section.rs deleted file mode 100644 index 822fca4..0000000 --- a/src/parser/section.rs +++ /dev/null @@ -1,151 +0,0 @@ -use regex::Regex; -use super::rule::{RuleResult, RuleError, SyntaxRule}; -use super::super::files::cursor::Cursor; -use super::super::files::token::Token; -use super::super::syntax::element::{Element, ReferenceableElement}; - -pub mod SectionKind -{ - pub const NONE : u8 = 0x00; - pub const NO_TOC : u8 = 0x01; - pub const NO_NUMBER : u8 = 0x02; -} - -pub struct Section -{ - title: String, - reference: Option, - section_kind: u8, - depth: usize, -} - -impl Section -{ - pub fn new<'h>(_title: String, _reference: Option, kind: u8, _depth: usize) -> Section - { - Section { - title: _title, - reference: _reference, - section_kind: kind, - depth: _depth, - } - } -} - -impl Element for Section -{ - fn element_name(&self) -> &'static str { "Section" } -} - -impl ReferenceableElement for Section -{ - fn reference_name(&self) -> Option<&String> { self.reference.as_ref() } -} - - -// TODO: Single file for grammar + element, and add `Rule` suffix for rules -pub struct SectionRule -{ - regex: Regex, -} - -impl SectionRule -{ - pub fn new() -> SectionRule - { - SectionRule - { - regex: regex::Regex::new(r"(?:^|\n)(#{1,})(\{.*\})?((?:\*|\+){0,})?((?:\t| ){0,})(.*)").unwrap() - } - } -} - -impl SyntaxRule for SectionRule -{ - fn name(&self) -> &'static str { "Section" } - - fn next_match<'a>(&self, cursor: &'a Cursor) -> Option - { - match self.regex.find_at(&cursor.content, cursor.position) - { - Some(m) => Some(m.start()), - None => None - } - } - - fn on_match<'a>(&self, cursor: &'a Cursor) -> Result<(Token<'a>, RuleResult), RuleError<'a>> - { - let m = self.regex.captures_at(&cursor.content, cursor.position).unwrap(); // Capture match - - let section_depth = match m.get(1) - { - Some(depth) => { - if depth.len() > 6 - { - return Err(RuleError::new(&cursor, m.get(1), - format!("Section depth must not be greater than 6, got `{}` (depth: {})", depth.as_str(), depth.len()))) - } - - depth.len() - } - _ => return Err(RuleError::new(&cursor, m.get(1), String::from("Empty section depth"))) - }; - - // Spacing - match m.get(4) - { - Some(spacing) => { - if spacing.as_str().is_empty() || !spacing.as_str().chars().all(|c| c == ' ' || c == '\t') - { - return Err(RuleError::new(&cursor, m.get(4), - format!("Sections require spacing made of spaces or tab before the section's title, got: `{}`", spacing.as_str()))) - } - } - _ => return Err(RuleError::new(&cursor, m.get(4), - String::from("Sections require spacing made of spaces or tab before the section's title"))) - } - - let section_refname = match m.get(2) - { - Some(reference) => { - // TODO: Validate reference name - // TODO: After parsing, check for duplicate references - Some(String::from(reference.as_str())) - } - _ => None - }; - - let section_kind = match m.get(3) - { - Some(kind) => { - match kind.as_str() { - "*+" => SectionKind::NO_NUMBER | SectionKind::NO_TOC, - "*" => SectionKind::NO_NUMBER, - "+" => SectionKind::NO_TOC, - "" => SectionKind::NONE, - _ => return Err(RuleError::new(&cursor, m.get(3), - format!("Section kind must be either `*` for unnumbered, `+` to hide from TOC or `*+`, got `{}`. Leave empty for normal sections", kind.as_str()))) - } - } - _ => SectionKind::NONE, - }; - - let section_title = match m.get(5) { - Some(title) => match title.as_str() { - "" => return Err(RuleError::new(&cursor, m.get(5), - String::from("Sections require a non-empty title"))), - _ => String::from(title.as_str()) - } - _ => return Err(RuleError::new(&cursor, m.get(5), - String::from("Sections require a non-empty title"))) - }; - - let section = Box::new(Section::new( - section_title, - section_refname, - section_kind, - section_depth)); - - Ok((Token::from(cursor, m.get(0).unwrap()), RuleResult::new(m.get(0).unwrap().len(), section))) - } -} diff --git a/src/parser/source.rs b/src/parser/source.rs new file mode 100644 index 0000000..5ed6104 --- /dev/null +++ b/src/parser/source.rs @@ -0,0 +1,177 @@ +use std::{fs, ops::Range, rc::Rc}; +use core::fmt::Debug; + +use downcast_rs::{impl_downcast, Downcast}; + +/// Trait for source content +pub trait Source: Downcast +{ + /// Gets the source's location + fn location(&self) -> Option<&Token>; + /// Gets the source's name + fn name(&self) -> &String; + /// Gets the source's content + fn content(&self) -> &String; +} +impl_downcast!(Source); + +impl core::fmt::Display for dyn Source +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name()) + } +} + +impl core::fmt::Debug for dyn Source +{ + // TODO + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Source{{{}}}", self.name()) + } +} + +impl std::cmp::PartialEq for dyn Source +{ + fn eq(&self, other: &Self) -> bool { + self.name() == other.name() + } +} + +impl std::cmp::Eq for dyn Source {} + +impl std::hash::Hash for dyn Source +{ + fn hash(&self, state: &mut H) { + self.name().hash(state) + } +} + +pub struct SourceFile +{ + location: Option, + path: String, + content: String, +} + + +impl SourceFile +{ + // TODO: Create a SourceFileRegistry holding already loaded files to avoid reloading them + pub fn new(path: String, location: Option) -> Result + { + match fs::read_to_string(&path) + { + Err(_) => return Err(String::from(format!("Unable to read file content: `{}`", path))), + Ok(content) => Ok(Self{ + location, + path, + content, + }), + } + } +} + +impl Source for SourceFile +{ + fn location(&self) -> Option<&Token> { self.location.as_ref() } + fn name(&self) -> &String { &self.path } + fn content(&self) -> &String { &self.content } +} + +pub struct VirtualSource +{ + location: Token, + name: String, + content: String, +} + +impl VirtualSource +{ + pub fn new(location: Token, name: String, content: String) -> Self + { + Self { location, name, content } + } +} + +impl Source for VirtualSource +{ + fn location(&self) -> Option<&Token> { Some(&self.location) } + fn name(&self) -> &String { &self.name } + fn content(&self) -> &String { &self.content } +} + +#[derive(Debug)] +pub struct Cursor +{ + pub pos: usize, + pub source: Rc, +} + +impl Cursor { + pub fn new(pos: usize, source: Rc) -> Self { + Self { pos, source } + } + + /// Creates [`cursor`] at [`new_pos`] in the same [`file`] + pub fn at(&self, new_pos: usize) -> Self + { + Self { + pos: new_pos, + source: self.source.clone(), + } + } +} + +impl Clone for Cursor +{ + fn clone(&self) -> Self { + Self { + pos: self.pos, + source: self.source.clone(), + } + } + + fn clone_from(&mut self, source: &Self) { + *self = source.clone() + } +} + +#[derive(Debug, Clone)] +pub struct Token +{ + pub range: Range, + source: Rc, +} + +impl Token +{ + pub fn new(range: Range, source: Rc) -> Self { + Self { range, source } + } + + pub fn source(&self) -> Rc + { + return self.source.clone() + } + + /// Construct Token from a range + pub fn from(start: &Cursor, end: &Cursor) -> Self + { + assert!(Rc::ptr_eq(&start.source, &end.source)); + + Self { + range: start.pos .. end.pos, + source: start.source.clone() + } + } + + pub fn start(&self) -> usize + { + return self.range.start; + } + + pub fn end(&self) -> usize + { + return self.range.end; + } +} diff --git a/src/parser/state.rs b/src/parser/state.rs new file mode 100644 index 0000000..975500e --- /dev/null +++ b/src/parser/state.rs @@ -0,0 +1,87 @@ +use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc}; + +use ariadne::Report; +use downcast_rs::{impl_downcast, Downcast}; + +use crate::document::document::Document; + +use super::{parser::Parser, source::Source}; + +/// Scope for state objects +#[derive(PartialEq, PartialOrd)] +pub enum Scope +{ + /// Global state + GLOBAL = 0, + /// Document-local state + DOCUMENT = 1, + /// Paragraph-local state + /// NOTE: Even though paragraph may span across multiple documents, + /// a paragraph-local state should be removed when importing a new document + PARAGRAPH = 2, +} + +pub trait State: Downcast +{ + /// Returns the state's [`Scope`] + fn scope(&self) -> Scope; + + /// Callback called when state goes out of scope + fn on_remove<'a>(&self, parser: &dyn Parser, document: &Document) -> Vec, Range)>>; +} +impl_downcast!(State); + +/// Object owning all the states +pub struct StateHolder +{ + data: HashMap>> +} + +impl StateHolder +{ + pub fn new() -> Self { + Self { + data: HashMap::new(), + } + } + + // Attempts to push [`state`]. On collision, returns an error with the already present state + pub fn insert(&mut self, name: String, state: Rc>) -> Result>, Rc>> + { + match self.data.insert(name, state.clone()) + { + Some(state) => Err(state), + _ => Ok(state) + } + } + + pub fn query(&self, name: &String) -> Option>> + { + self.data + .get(name) + .map_or(None, |st| Some(st.clone())) + } + + pub fn on_scope_end(&mut self, parser: &dyn Parser, document: &Document, scope: Scope) -> Vec, Range)>> + { + let mut result = vec![]; + + self.data + .retain(|_name, state| + { + if state.borrow().scope() >= scope + { + state.borrow().on_remove(parser, document) + .drain(..) + .for_each(|report| result.push(report)); + false + } + else + { + true + } + }); + + return result; + } +} diff --git a/src/parser/util.rs b/src/parser/util.rs new file mode 100644 index 0000000..6e5c09f --- /dev/null +++ b/src/parser/util.rs @@ -0,0 +1,343 @@ +use std::collections::HashMap; + +use unicode_segmentation::UnicodeSegmentation; + +use crate::{document::{document::Document, element::ElemKind}, elements::paragraph::Paragraph}; + +/// Processes text for escape characters and paragraphing +pub fn process_text(document: &Document, content: &str) -> String +{ + let mut escaped = false; + let mut newlines = 0usize; // Consecutive newlines + //println!("Processing: [{content}]"); + let processed = content + .grapheme_indices(true) + .fold((String::new(), None), + |(mut out, prev), (_pos, g)| { + if newlines != 0 && g != "\n" + { + newlines = 0; + + // Add a whitespace if necessary + match out.chars().last() + { + Some(c) => { + // NOTE: \n is considered whitespace, so previous codepoint can be \n + // (Which can only be done by escaping it) + if !c.is_whitespace() || c == '\n' + { + out += " "; + } + } + None => { + if document.last_element::(false) + .and_then(|par| par.find_back(|e| e.kind() != ElemKind::Invisible) + .and_then(|e| Some(e.kind() == ElemKind::Inline))) + .unwrap_or(false) + { + out += " "; + } + } // Don't output anything + } + } + + // Output grapheme literally when escaped + if escaped + { + escaped = false; + return (out + g, Some(g)); + } + // Increment newlines counter + else if g == "\n" + { + newlines += 1; + return (out, Some(g)); + } + // Determine if escaped + else if g == "\\" + { + escaped = !escaped; + return (out, Some(g)); + } + // Whitespaces + else if g.chars().count() == 1 && g.chars().last().unwrap().is_whitespace() + { + // Content begins with whitespace + if prev.is_none() + { + if document.last_element::(false).is_some() + { + return (out+g, Some(g)); + } + else + { + return (out, Some(g)); + } + } + // Consecutive whitespaces are converted to a single whitespace + else if prev.unwrap().chars().count() == 1 && + prev.unwrap().chars().last().unwrap().is_whitespace() + { + return (out, Some(g)); + } + } + + return (out + g, Some(g)); + }).0.to_string(); + + return processed; +} + +/// Processed a string and escapes a single token out of it +/// Escaped characters other than the [`token`] will be not be treated as escaped +/// +/// # Example +/// ``` +/// assert_eq!(process_escaped('\\', "%", "escaped: \\%, also escaped: \\\\\\%, untouched: \\a"), +/// "escaped: %, also escaped: \\%, untouched \\a"); +/// ``` +pub fn process_escaped>(escape: char, token: &'static str, content: S) -> String +{ + let mut processed = String::new(); + let mut escaped = 0; + let mut token_it = token.chars().peekable(); + for c in content.as_ref().chars() + .as_str() + .trim_start() + .trim_end() + .chars() + { + if c == escape + { + escaped += 1; + } + else if escaped % 2 == 1 && token_it.peek().map_or(false, |p| *p == c) + { + let _ = token_it.next(); + if token_it.peek() == None + { + (0..((escaped-1)/2)) + .for_each(|_| processed.push(escape)); + escaped = 0; + token_it = token.chars().peekable(); + processed.push_str(token); + } + } + else + { + if escaped != 0 + { + // Add untouched escapes + (0..escaped).for_each(|_| processed.push('\\')); + token_it = token.chars().peekable(); + escaped = 0; + } + processed.push(c); + } + } + // Add trailing escapes + (0..escaped).for_each(|_| processed.push('\\')); + + processed +} + +#[derive(Debug)] +pub struct Property +{ + required: bool, + description: String, + default: Option, +} + +impl Property { + pub fn new(required: bool, description: String, default: Option) -> Self { + Self { required, description, default } + } +} + +impl core::fmt::Display for Property +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.default.as_ref() + { + None => write!(f, "{} {}", + ["[Opt]", "[Req]"][self.required as usize], + self.description), + Some(default) => write!(f, "{} {} (Deafult: {})", + ["[Opt]", "[Req]"][self.required as usize], + self.description, + default) + } + } +} + +#[derive(Debug)] +pub struct PropertyMap<'a> +{ + pub(crate) properties: HashMap +} + +impl<'a> PropertyMap<'a> { + pub fn new() -> Self { + Self { properties: HashMap::new() } + } + + pub fn get Result>(&self, name: &str, f: F) + -> Result<(&'a Property, T), Error> { + let (prop, value) = self.properties.get(name).unwrap(); + + f(prop, value).and_then(|value| Ok((*prop, value))) + } +} + +pub struct PropertyParser { + properties: HashMap, +} + +impl PropertyParser { + pub fn new(properties: HashMap) -> Self { + Self { properties } + } + + /// Attempts to build a default propertymap + /// + /// Returns an error if at least one [`Property`] is required and doesn't provide a default + pub fn default(&self) -> Result, String> { + let mut properties = PropertyMap::new(); + + for (name, prop) in &self.properties + { + match (prop.required, prop.default.as_ref()) + { + (true, None) => return Err(format!("Missing property `{name}` {prop}")), + (false, None) => {}, + (_, Some(default)) => { + properties.properties.insert( + name.clone(), + (prop, default.clone()) + ); + } + } + } + + Ok(properties) + } + + /// Parses properties string "prop1=value1, prop2 = val\,2" -> {prop1: value1, prop2: val,2} + /// + /// # Key-value pair + /// + /// Property names/values are separated by a single '=' that cannot be escaped. + /// Therefore names cannot contain the '=' character. + /// + /// # Example + /// + /// ``` + /// let properties = HashMap::new(); + /// properties.insert("width", Property::new(true, "Width of the element in em", None)); + /// + /// let parser = PropertyParser::new(properties); + /// let pm = parser.parse("width=15").unwrap(); + /// + /// assert!(pm.get("width", |_, val| val.parse::()) == Ok(15)); + /// ``` + /// # Return value + /// + /// Returns the parsed property map, or an error if either: + /// * A required property is missing + /// * An unknown property is present + /// * A duplicate property is present + /// + /// Note: Only ',' inside values can be escaped, other '\' are treated literally + pub fn parse(&self, content: &str) -> Result, String> { + let mut properties = PropertyMap::new(); + let mut try_insert = |name: &String, value: &String| + -> Result<(), String> { + let trimmed_name = name.trim_end().trim_start(); + let trimmed_value = value.trim_end().trim_start(); + let prop = match self.properties.get(trimmed_name) + { + None => return Err(format!("Unknown property name: `{trimmed_name}` (with value: `{trimmed_value}`). Valid properties are:\n{}", + self.properties.iter().fold(String::new(), + |out, (name, prop)| out + format!(" - {name}: {prop}\n").as_str()))), + Some(prop) => prop + }; + + if let Some((_, previous)) = properties.properties.insert( + trimmed_name.to_string(), + (prop, trimmed_value.to_string())) + { + return Err(format!("Duplicate property `{trimmed_name}`, previous value: `{previous}` current value: `{trimmed_value}`")) + } + + Ok(()) + }; + + let mut in_name = true; + let mut name = String::new(); + let mut value = String::new(); + let mut escaped = 0usize; + for c in content.chars() + { + if c == '\\' + { + escaped += 1; + } + else if c == '=' && in_name + { + in_name = false; + (0..escaped).for_each(|_| name.push('\\')); + escaped = 0; + } + else if c == ',' && !in_name + { + if escaped % 2 == 0 // Not escaped + { + (0..escaped/2).for_each(|_| value.push('\\')); + escaped = 0; + in_name = true; + + if let Err(e) = try_insert(&name, &value) { + return Err(e) + } + name.clear(); + value.clear(); + } + else + { + (0..(escaped-1)/2).for_each(|_| value.push('\\')); + value.push(','); + escaped = 0; + } + } + else + { + if in_name { + (0..escaped).for_each(|_| name.push('\\')); + name.push(c) + } + else { + (0..escaped).for_each(|_| value.push('\\')); + value.push(c) + } + escaped = 0; + } + } + if !in_name && value.trim_end().trim_start().is_empty() + { + return Err("Expected a value after last `=`".to_string()) + } + else if name.is_empty() || value.is_empty() + { + return Err("Expected non empty property list.".to_string()); + } + + if let Err(e) = try_insert(&name, &value) { + return Err(e) + } + + // TODO: Missing properties + + Ok(properties) + } +} diff --git a/src/server.rs b/src/server.rs new file mode 100644 index 0000000..abe0004 --- /dev/null +++ b/src/server.rs @@ -0,0 +1,236 @@ +#![feature(char_indices_offset)] +mod document; +mod compiler; +mod parser; +mod elements; +mod lua; +mod cache; +mod lsp; + +use std::collections::HashMap; +use std::rc::Rc; +use std::sync::Arc; + +use dashmap::DashMap; +use document::variable::Variable; +use tower_lsp::jsonrpc::Result; +use tower_lsp::lsp_types::*; +use tower_lsp::{Client, LanguageServer, LspService, Server}; + +#[derive(Debug)] +struct Backend { + client: Client, + document_map: DashMap, + //variables: DashMap>>, +} + +#[derive(Debug)] +struct TextDocumentItem { + uri: Url, + text: String, + version: i32, +} + +impl Backend { + async fn on_change(&self, params: TextDocumentItem) { + self.document_map + .insert(params.uri.to_string(), params.text.clone()); + let ParserResult { + ast, + parse_errors, + semantic_tokens, + } = parse(¶ms.text); + let diagnostics = parse_errors + .into_iter() + .filter_map(|item| { + let (message, span) = match item.reason() { + chumsky::error::SimpleReason::Unclosed { span, delimiter } => { + (format!("Unclosed delimiter {}", delimiter), span.clone()) + } + chumsky::error::SimpleReason::Unexpected => ( + format!( + "{}, expected {}", + if item.found().is_some() { + "Unexpected token in input" + } else { + "Unexpected end of input" + }, + if item.expected().len() == 0 { + "something else".to_string() + } else { + item.expected() + .map(|expected| match expected { + Some(expected) => expected.to_string(), + None => "end of input".to_string(), + }) + .collect::>() + .join(", ") + } + ), + item.span(), + ), + chumsky::error::SimpleReason::Custom(msg) => (msg.to_string(), item.span()), + }; + + || -> Option { + // let start_line = rope.try_char_to_line(span.start)?; + // let first_char = rope.try_line_to_char(start_line)?; + // let start_column = span.start - first_char; + let start_position = offset_to_position(span.start, &rope)?; + let end_position = offset_to_position(span.end, &rope)?; + // let end_line = rope.try_char_to_line(span.end)?; + // let first_char = rope.try_line_to_char(end_line)?; + // let end_column = span.end - first_char; + Some(Diagnostic::new_simple( + Range::new(start_position, end_position), + message, + )) + }() + }) + .collect::>(); + + self.client + .publish_diagnostics(params.uri.clone(), diagnostics, Some(params.version)) + .await; + + if let Some(ast) = ast { + self.ast_map.insert(params.uri.to_string(), ast); + } + // self.client + // .log_message(MessageType::INFO, &format!("{:?}", semantic_tokens)) + // .await; + self.semantic_token_map + .insert(params.uri.to_string(), semantic_tokens); + } +} + +#[tower_lsp::async_trait] +impl LanguageServer for Backend { + async fn initialize(&self, _: InitializeParams) -> Result { + Ok(InitializeResult { + server_info: None, + capabilities: ServerCapabilities { + text_document_sync: Some(TextDocumentSyncCapability::Kind( + TextDocumentSyncKind::FULL, + )), + completion_provider: Some(CompletionOptions { + resolve_provider: Some(false), + trigger_characters: Some(vec!["%".to_string()]), + work_done_progress_options: Default::default(), + all_commit_characters: None, + completion_item: None, + }), + semantic_tokens_provider: Some( + SemanticTokensServerCapabilities::SemanticTokensRegistrationOptions( + SemanticTokensRegistrationOptions { + text_document_registration_options: { + TextDocumentRegistrationOptions { + document_selector: Some(vec![DocumentFilter { + language: Some("nml".to_string()), + scheme: Some("file".to_string()), + pattern: None, + }]), + } + }, + semantic_tokens_options: SemanticTokensOptions { + work_done_progress_options: WorkDoneProgressOptions::default(), + legend: SemanticTokensLegend { + token_types: vec![SemanticTokenType::COMMENT, SemanticTokenType::MACRO], + token_modifiers: vec![], + }, + range: None, //Some(true), + full: Some(SemanticTokensFullOptions::Bool(true)), + }, + static_registration_options: StaticRegistrationOptions::default(), + }, + ), + ), + ..ServerCapabilities::default() + }, + }) + } + + async fn initialized(&self, _: InitializedParams) { + self.client + .log_message(MessageType::INFO, "server initialized!") + .await; + } + + async fn shutdown(&self) -> Result<()> { + Ok(()) + } + + async fn did_open(&self, params: DidOpenTextDocumentParams) { + self.client + .log_message(MessageType::INFO, "file opened!") + .await; + self.on_change(TextDocumentItem { + uri: params.text_document.uri, + text: params.text_document.text, + version: params.text_document.version, + }) + .await + } + + async fn did_change(&self, mut params: DidChangeTextDocumentParams) { + self.on_change(TextDocumentItem { + uri: params.text_document.uri, + text: std::mem::take(&mut params.content_changes[0].text), + version: params.text_document.version, + }) + .await + } + + async fn completion(&self, params: CompletionParams) -> Result> { + let uri = params.text_document_position.text_document.uri; + let position = params.text_document_position.position; + let completions = || -> Option> { + let mut ret = Vec::with_capacity(0); + + Some(ret) + }(); + Ok(completions.map(CompletionResponse::Array)) + } + + async fn semantic_tokens_full( + &self, + params: SemanticTokensParams, + ) -> Result> { + let uri = params.text_document.uri.to_string(); + self.client + .log_message(MessageType::LOG, "semantic_token_full") + .await; + let semantic_tokens = || -> Option> { + let semantic_tokens = vec![ + SemanticToken { + delta_line: 1, + delta_start: 2, + length: 5, + token_type: 1, + token_modifiers_bitset: 0, + } + ]; + Some(semantic_tokens) + }(); + if let Some(semantic_token) = semantic_tokens { + return Ok(Some(SemanticTokensResult::Tokens(SemanticTokens { + result_id: None, + data: semantic_token, + }))); + } + Ok(None) + } +} + +#[tokio::main] +async fn main() { + let stdin = tokio::io::stdin(); + let stdout = tokio::io::stdout(); + + let (service, socket) = LspService::new( + |client| + Backend { + client + }); + Server::new(stdin, stdout, socket).serve(service).await; +} diff --git a/src/syntax.rs b/src/syntax.rs deleted file mode 100644 index a1b2323..0000000 --- a/src/syntax.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod element; diff --git a/src/syntax/document.rs b/src/syntax/document.rs deleted file mode 100644 index 02a6d2f..0000000 --- a/src/syntax/document.rs +++ /dev/null @@ -1,4 +0,0 @@ -struct Document -{ - -} diff --git a/src/syntax/element.rs b/src/syntax/element.rs deleted file mode 100644 index 0e34526..0000000 --- a/src/syntax/element.rs +++ /dev/null @@ -1,30 +0,0 @@ -pub trait Element -{ - fn element_name(&self) -> &'static str; - fn token(&'a self) -> Token<'a> -} - -pub trait ReferenceableElement : Element -{ - fn reference_name(&self) -> Option<&String>; -} - -pub struct Text -{ - content: String, -} - -impl Text -{ - pub fn new<'h>(_content: &'h str) -> Text - { - Text { - content: String::from(_content) - } - } -} - -impl Element for Text -{ - fn element_name(&self) -> &'static str { "Text" } -}