From 527fafcc8b9a39f2d155e4bd5fa4855b12835ae8 Mon Sep 17 00:00:00 2001 From: ef3d0c3e Date: Sun, 20 Oct 2024 10:06:31 +0200 Subject: [PATCH] Semantic progress & fix multiline tokens bug --- src/elements/code.rs | 146 ++++++++++++++++++++++++++++----------- src/elements/import.rs | 21 +++--- src/elements/variable.rs | 43 +++++++----- src/lsp/semantic.rs | 23 +++++- 4 files changed, 162 insertions(+), 71 deletions(-) diff --git a/src/elements/code.rs b/src/elements/code.rs index 87a223e..8cee7f0 100644 --- a/src/elements/code.rs +++ b/src/elements/code.rs @@ -24,6 +24,7 @@ use crate::compiler::compiler::Target; use crate::document::document::Document; use crate::document::element::ElemKind; use crate::document::element::Element; +use crate::lsp::semantic::Semantics; use crate::lua::kernel::CTX; use crate::parser::parser::ParserState; use crate::parser::rule::RegexRule; @@ -122,8 +123,9 @@ impl Code { .as_str(); } - result += - "
".to_string().as_str(); + result += "
" + .to_string() + .as_str(); for (line_id, line) in self.code.split('\n').enumerate() { result += "
"; @@ -134,20 +136,13 @@ impl Code { // Code result += "
";
 				match h.highlight_line(line, Code::get_syntaxes()) {
-					Err(e) => {
-						return Err(format!(
-							"Error highlighting line `{line}`: {}",
-							e
-						))
-					}
+					Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e)),
 					Ok(regions) => {
 						match syntect::html::styled_line_to_highlighted_html(
 							®ions[..],
 							syntect::html::IncludeBackground::No,
 						) {
-							Err(e) => {
-								return Err(format!("Error highlighting code: {}", e))
-							}
+							Err(e) => return Err(format!("Error highlighting code: {}", e)),
 							Ok(highlighted) => {
 								result += if highlighted.is_empty() {
 									"
" @@ -169,20 +164,13 @@ impl Code { result += "
";
 				// Code
 				match h.highlight_line(line, Code::get_syntaxes()) {
-					Err(e) => {
-						return Err(format!(
-							"Error highlighting line `{line}`: {}",
-							e
-						))
-					}
+					Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e)),
 					Ok(regions) => {
 						match syntect::html::styled_line_to_highlighted_html(
 							®ions[..],
 							syntect::html::IncludeBackground::No,
 						) {
-							Err(e) => {
-								return Err(format!("Error highlighting code: {}", e))
-							}
+							Err(e) => return Err(format!("Error highlighting code: {}", e)),
 							Ok(highlighted) => {
 								result += if highlighted.is_empty() {
 									"
" @@ -199,21 +187,13 @@ impl Code { } else if self.block == CodeKind::Inline { result += ""; match h.highlight_line(self.code.as_str(), Code::get_syntaxes()) { - Err(e) => { - return Err(format!( - "Error highlighting line `{}`: {}", - self.code, - e - )) - } + Err(e) => return Err(format!("Error highlighting line `{}`: {}", self.code, e)), Ok(regions) => { match syntect::html::styled_line_to_highlighted_html( ®ions[..], syntect::html::IncludeBackground::No, ) { - Err(e) => { - return Err(format!("Error highlighting code: {}", e)) - } + Err(e) => return Err(format!("Error highlighting code: {}", e)), Ok(highlighted) => result += highlighted.as_str(), } } @@ -245,9 +225,12 @@ impl Cached for Code { let mut hasher = Sha512::new(); hasher.input((self.block as usize).to_be_bytes().as_slice()); hasher.input(self.line_offset.to_be_bytes().as_slice()); - if let Some(theme) = self.theme - .as_ref() { hasher.input(theme.as_bytes()) } - if let Some(name) = self.name.as_ref() { hasher.input(name.as_bytes()) } + if let Some(theme) = self.theme.as_ref() { + hasher.input(theme.as_bytes()) + } + if let Some(name) = self.name.as_ref() { + hasher.input(name.as_bytes()) + } hasher.input(self.language.as_bytes()); hasher.input(self.code.as_bytes()); @@ -262,7 +245,12 @@ impl Element for Code { fn element_name(&self) -> &'static str { "Code Block" } - fn compile(&self, compiler: &Compiler, _document: &dyn Document, _cursor: usize) -> Result { + fn compile( + &self, + compiler: &Compiler, + _document: &dyn Document, + _cursor: usize, + ) -> Result { match compiler.target() { Target::HTML => { static CACHE_INIT: Once = Once::new(); @@ -319,7 +307,7 @@ impl CodeRule { ) .unwrap(), Regex::new( - r"``(?:\[((?:\\.|[^\\\\])*?)\])?(?:(.*?),)?((?:\\(?:.|\n)|[^\\\\])*?)``", + r"``(?:\[((?:\\.|[^\\\\])*?)\])?(?:(.*?)(?:,|\n))?((?:\\(?:.|\n)|[^\\\\])*?)``", ) .unwrap(), ], @@ -452,7 +440,8 @@ impl RegexRule for CodeRule { } let theme = document - .get_variable("code.theme").map(|var| var.to_string()); + .get_variable("code.theme") + .map(|var| var.to_string()); if index == 0 // Block @@ -539,6 +528,45 @@ impl RegexRule for CodeRule { ); } + if let Some((sems, tokens)) = + Semantics::from_source(token.source(), &state.shared.semantics) + { + let range = matches + .get(0) + .map(|m| { + if token.source().content().as_bytes()[m.start()] == b'\n' { + m.start() + 1..m.end() + } else { + m.range() + } + }) + .unwrap(); + sems.add( + range.start..range.start + if index == 0 { 3 } else { 2 }, + tokens.code_sep, + ); + if let Some(props) = matches.get(1).map(|m| m.range()) { + sems.add(props.start - 1..props.start, tokens.code_props_sep); + sems.add(props.clone(), tokens.code_props); + sems.add(props.end..props.end + 1, tokens.code_props_sep); + } + if let Some(lang) = matches.get(2).map(|m| m.range()) { + sems.add(lang.clone(), tokens.code_lang); + } + if index == 0 { + if let Some(title) = matches.get(3).map(|m| m.range()) { + sems.add(title.clone(), tokens.code_title); + } + sems.add(matches.get(4).unwrap().range(), tokens.code_content); + } else { + sems.add(matches.get(3).unwrap().range(), tokens.code_content); + } + sems.add( + range.end - if index == 0 { 3 } else { 2 }..range.end, + tokens.code_sep, + ); + } + reports } @@ -551,7 +579,8 @@ impl RegexRule for CodeRule { ctx.as_ref().map(|ctx| { let theme = ctx .document - .get_variable("code.theme").map(|var| var.to_string()); + .get_variable("code.theme") + .map(|var| var.to_string()); ctx.state.push( ctx.document, @@ -581,7 +610,8 @@ impl RegexRule for CodeRule { ctx.as_ref().map(|ctx| { let theme = ctx .document - .get_variable("code.theme").map(|var| var.to_string()); + .get_variable("code.theme") + .map(|var| var.to_string()); ctx.state.push( ctx.document, @@ -618,7 +648,8 @@ impl RegexRule for CodeRule { ctx.as_ref().map(|ctx| { let theme = ctx .document - .get_variable("code.theme").map(|var| var.to_string()); + .get_variable("code.theme") + .map(|var| var.to_string()); ctx.state.push( ctx.document, @@ -651,6 +682,7 @@ mod tests { use crate::parser::langparser::LangParser; use crate::parser::parser::Parser; use crate::parser::source::SourceFile; + use crate::validate_semantics; #[test] fn code_block() { @@ -754,4 +786,40 @@ fn fact(n: usize) -> usize assert_eq!(found[2].code, "std::vector> u;"); assert_eq!(found[2].line_offset, 1); } + + #[test] + fn semantic() { + let source = Rc::new(SourceFile::with_content( + "".to_string(), + r#" +```[line_offset=15] C, Title +test code +``` +``C, Single Line`` + "# + .to_string(), + None, + )); + let parser = LangParser::default(); + let (_, state) = parser.parse( + ParserState::new_with_semantics(&parser, None), + source.clone(), + None, + ); + validate_semantics!(state, source.clone(), 0, + code_sep { delta_line == 1, delta_start == 0, length == 3 }; + code_props_sep { delta_line == 0, delta_start == 3, length == 1 }; + code_props { delta_line == 0, delta_start == 1, length == 14 }; + code_props_sep { delta_line == 0, delta_start == 14, length == 1 }; + code_lang { delta_line == 0, delta_start == 1, length == 2 }; + code_title { delta_line == 0, delta_start == 3, length == 6 }; + code_content { delta_line == 1, delta_start == 0, length == 10 }; + code_sep { delta_line == 1, delta_start == 0, length == 3 }; + + code_sep { delta_line == 1, delta_start == 0, length == 2 }; + code_lang { delta_line == 0, delta_start == 2, length == 1 }; + code_content { delta_line == 0, delta_start == 2, length == 12 }; + code_sep { delta_line == 0, delta_start == 12, length == 2 }; + ); + } } diff --git a/src/elements/import.rs b/src/elements/import.rs index fe14d72..80b224a 100644 --- a/src/elements/import.rs +++ b/src/elements/import.rs @@ -1,5 +1,6 @@ use crate::document::document::Document; use crate::document::document::DocumentAccessors; +use crate::lsp::semantic::Semantics; use crate::parser::parser::ParserState; use crate::parser::parser::ReportColors; use crate::parser::rule::RegexRule; @@ -180,12 +181,9 @@ impl RegexRule for ImportRule { ); } - /* - if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { - RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) - .ok() - .unwrap() - }) { + + if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics) + { // @import let import = if token.source().content().as_bytes()[matches.get(0).unwrap().start()] == b'\n' { @@ -195,19 +193,18 @@ impl RegexRule for ImportRule { { matches.get(0).unwrap().start() }; - sems.add(token.source(), import..import + 7, sems.token.import_import); + sems.add(import..import + 7, tokens.import_import); if let Some(import_as) = matches.get(1) { - sems.add(token.source(), import_as.start()-1..import_as.start(), sems.token.import_as_sep); - sems.add(token.source(), import_as.range(), sems.token.import_as); - sems.add(token.source(), import_as.end()..import_as.end()+1, sems.token.import_as_sep); + sems.add(import_as.start()-1..import_as.start(), tokens.import_as_sep); + sems.add(import_as.range(), tokens.import_as); + sems.add(import_as.end()..import_as.end()+1, tokens.import_as_sep); } let path = matches.get(2).unwrap().range(); - sems.add(token.source(), path, sems.token.import_path); + sems.add(path, tokens.import_path); } - */ result } diff --git a/src/elements/variable.rs b/src/elements/variable.rs index 161d19b..74625e0 100644 --- a/src/elements/variable.rs +++ b/src/elements/variable.rs @@ -2,6 +2,7 @@ use crate::document::document::Document; use crate::document::variable::BaseVariable; use crate::document::variable::PathVariable; use crate::document::variable::Variable; +use crate::lsp::semantic::Semantics; use crate::lua::kernel::CTX; use crate::parser::parser::ParserState; use crate::parser::parser::ReportColors; @@ -15,7 +16,6 @@ use ariadne::ReportKind; use mlua::Function; use mlua::Lua; use regex::Regex; -use std::cell::RefMut; use std::ops::Range; use std::rc::Rc; use std::str::FromStr; @@ -257,23 +257,20 @@ impl RegexRule for VariableRule { } } - //if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { - // RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) - // .ok() - // .unwrap() - //}) { - // let name = matches.get(2).unwrap().range(); - // if let Some(kind) = matches.get(1).map(|m| m.range()) { - // sems.add(token.source(), kind.start-1..kind.start, sems.token.variable_operator); - // sems.add(token.source(), kind, sems.token.variable_kind); - // } else { - // sems.add(token.source(), name.start-1..name.start, sems.token.variable_operator); - // } - // sems.add(token.source(), name.clone(), sems.token.variable_name); - // sems.add(token.source(), name.end..name.end+1, sems.token.variable_sep); - // let value = matches.get(3).unwrap().range(); - // sems.add(token.source(), value.clone(), sems.token.variable_value); - //} + if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics) + { + let name = matches.get(2).unwrap().range(); + if let Some(kind) = matches.get(1).map(|m| m.range()) { + sems.add(kind.start-1..kind.start, tokens.variable_operator); + sems.add(kind, tokens.variable_kind); + } else { + sems.add(name.start-1..name.start, tokens.variable_operator); + } + sems.add(name.clone(), tokens.variable_name); + sems.add(name.end..name.end+1, tokens.variable_sep); + let value = matches.get(3).unwrap().range(); + sems.add(value.clone(), tokens.variable_value); + } result } @@ -434,7 +431,15 @@ impl RegexRule for VariableSubstitutionRule { _ => panic!("Unknown error"), }; - variable.parse(state, token, document); + variable.parse(state, token.clone(), document); + + if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics) + { + let name = matches.get(1).unwrap().range(); + sems.add(name.start-1..name.start, tokens.variable_sub_sep); + sems.add(name.clone(), tokens.variable_sub_name); + sems.add(name.end..name.end+1, tokens.variable_sub_sep); + } result } diff --git a/src/lsp/semantic.rs b/src/lsp/semantic.rs index 880dc6c..a1e3bca 100644 --- a/src/lsp/semantic.rs +++ b/src/lsp/semantic.rs @@ -85,6 +85,7 @@ macro_rules! token { }; } +/// Predefined list of tokens #[derive(Debug)] pub struct Tokens { pub section_heading: (u32, u32), @@ -118,6 +119,16 @@ pub struct Tokens { pub variable_name: (u32, u32), pub variable_sep: (u32, u32), pub variable_value: (u32, u32), + + pub variable_sub_sep: (u32, u32), + pub variable_sub_name: (u32, u32), + + pub code_sep: (u32, u32), + pub code_props_sep: (u32, u32), + pub code_props: (u32, u32), + pub code_lang: (u32, u32), + pub code_title: (u32, u32), + pub code_content: (u32, u32), } impl Tokens { @@ -154,6 +165,16 @@ impl Tokens { variable_name: token!("macro"), variable_sep: token!("operator"), variable_value: token!("function"), + + variable_sub_sep: token!("operator"), + variable_sub_name: token!("macro"), + + code_sep: token!("operator"), + code_props_sep: token!("operator"), + code_props: token!("enum"), + code_lang: token!("function"), + code_title: token!("number"), + code_content: token!("string"), } } } @@ -248,7 +269,7 @@ impl<'a> Semantics<'a> { while cursor.pos != range.end { let end = self.source.content()[cursor.pos..range.end] .find('\n') - .unwrap_or(self.source.content().len() - cursor.pos); + .unwrap_or(self.source.content().len() - 1) + 1; let len = usize::min(range.end - cursor.pos, end); let clen = self.source.content()[cursor.pos..cursor.pos + len] .chars()