Semantic progress & fix multiline tokens bug

This commit is contained in:
ef3d0c3e 2024-10-20 10:06:31 +02:00
parent f1de61409a
commit 527fafcc8b
4 changed files with 162 additions and 71 deletions

View file

@ -24,6 +24,7 @@ use crate::compiler::compiler::Target;
use crate::document::document::Document; use crate::document::document::Document;
use crate::document::element::ElemKind; use crate::document::element::ElemKind;
use crate::document::element::Element; use crate::document::element::Element;
use crate::lsp::semantic::Semantics;
use crate::lua::kernel::CTX; use crate::lua::kernel::CTX;
use crate::parser::parser::ParserState; use crate::parser::parser::ParserState;
use crate::parser::rule::RegexRule; use crate::parser::rule::RegexRule;
@ -122,8 +123,9 @@ impl Code {
.as_str(); .as_str();
} }
result += result += "<div class=\"code-block-content\"><table cellspacing=\"0\">"
"<div class=\"code-block-content\"><table cellspacing=\"0\">".to_string().as_str(); .to_string()
.as_str();
for (line_id, line) in self.code.split('\n').enumerate() { for (line_id, line) in self.code.split('\n').enumerate() {
result += "<tr><td class=\"code-block-gutter\">"; result += "<tr><td class=\"code-block-gutter\">";
@ -134,20 +136,13 @@ impl Code {
// Code // Code
result += "</td><td class=\"code-block-line\"><pre>"; result += "</td><td class=\"code-block-line\"><pre>";
match h.highlight_line(line, Code::get_syntaxes()) { match h.highlight_line(line, Code::get_syntaxes()) {
Err(e) => { Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e)),
return Err(format!(
"Error highlighting line `{line}`: {}",
e
))
}
Ok(regions) => { Ok(regions) => {
match syntect::html::styled_line_to_highlighted_html( match syntect::html::styled_line_to_highlighted_html(
&regions[..], &regions[..],
syntect::html::IncludeBackground::No, syntect::html::IncludeBackground::No,
) { ) {
Err(e) => { Err(e) => return Err(format!("Error highlighting code: {}", e)),
return Err(format!("Error highlighting code: {}", e))
}
Ok(highlighted) => { Ok(highlighted) => {
result += if highlighted.is_empty() { result += if highlighted.is_empty() {
"<br>" "<br>"
@ -169,20 +164,13 @@ impl Code {
result += "<tr><td class=\"code-block-line\"><pre>"; result += "<tr><td class=\"code-block-line\"><pre>";
// Code // Code
match h.highlight_line(line, Code::get_syntaxes()) { match h.highlight_line(line, Code::get_syntaxes()) {
Err(e) => { Err(e) => return Err(format!("Error highlighting line `{line}`: {}", e)),
return Err(format!(
"Error highlighting line `{line}`: {}",
e
))
}
Ok(regions) => { Ok(regions) => {
match syntect::html::styled_line_to_highlighted_html( match syntect::html::styled_line_to_highlighted_html(
&regions[..], &regions[..],
syntect::html::IncludeBackground::No, syntect::html::IncludeBackground::No,
) { ) {
Err(e) => { Err(e) => return Err(format!("Error highlighting code: {}", e)),
return Err(format!("Error highlighting code: {}", e))
}
Ok(highlighted) => { Ok(highlighted) => {
result += if highlighted.is_empty() { result += if highlighted.is_empty() {
"<br>" "<br>"
@ -199,21 +187,13 @@ impl Code {
} else if self.block == CodeKind::Inline { } else if self.block == CodeKind::Inline {
result += "<a class=\"inline-code\"><code>"; result += "<a class=\"inline-code\"><code>";
match h.highlight_line(self.code.as_str(), Code::get_syntaxes()) { match h.highlight_line(self.code.as_str(), Code::get_syntaxes()) {
Err(e) => { Err(e) => return Err(format!("Error highlighting line `{}`: {}", self.code, e)),
return Err(format!(
"Error highlighting line `{}`: {}",
self.code,
e
))
}
Ok(regions) => { Ok(regions) => {
match syntect::html::styled_line_to_highlighted_html( match syntect::html::styled_line_to_highlighted_html(
&regions[..], &regions[..],
syntect::html::IncludeBackground::No, syntect::html::IncludeBackground::No,
) { ) {
Err(e) => { Err(e) => return Err(format!("Error highlighting code: {}", e)),
return Err(format!("Error highlighting code: {}", e))
}
Ok(highlighted) => result += highlighted.as_str(), Ok(highlighted) => result += highlighted.as_str(),
} }
} }
@ -245,9 +225,12 @@ impl Cached for Code {
let mut hasher = Sha512::new(); let mut hasher = Sha512::new();
hasher.input((self.block as usize).to_be_bytes().as_slice()); hasher.input((self.block as usize).to_be_bytes().as_slice());
hasher.input(self.line_offset.to_be_bytes().as_slice()); hasher.input(self.line_offset.to_be_bytes().as_slice());
if let Some(theme) = self.theme if let Some(theme) = self.theme.as_ref() {
.as_ref() { hasher.input(theme.as_bytes()) } hasher.input(theme.as_bytes())
if let Some(name) = self.name.as_ref() { hasher.input(name.as_bytes()) } }
if let Some(name) = self.name.as_ref() {
hasher.input(name.as_bytes())
}
hasher.input(self.language.as_bytes()); hasher.input(self.language.as_bytes());
hasher.input(self.code.as_bytes()); hasher.input(self.code.as_bytes());
@ -262,7 +245,12 @@ impl Element for Code {
fn element_name(&self) -> &'static str { "Code Block" } fn element_name(&self) -> &'static str { "Code Block" }
fn compile(&self, compiler: &Compiler, _document: &dyn Document, _cursor: usize) -> Result<String, String> { fn compile(
&self,
compiler: &Compiler,
_document: &dyn Document,
_cursor: usize,
) -> Result<String, String> {
match compiler.target() { match compiler.target() {
Target::HTML => { Target::HTML => {
static CACHE_INIT: Once = Once::new(); static CACHE_INIT: Once = Once::new();
@ -319,7 +307,7 @@ impl CodeRule {
) )
.unwrap(), .unwrap(),
Regex::new( Regex::new(
r"``(?:\[((?:\\.|[^\\\\])*?)\])?(?:(.*?),)?((?:\\(?:.|\n)|[^\\\\])*?)``", r"``(?:\[((?:\\.|[^\\\\])*?)\])?(?:(.*?)(?:,|\n))?((?:\\(?:.|\n)|[^\\\\])*?)``",
) )
.unwrap(), .unwrap(),
], ],
@ -452,7 +440,8 @@ impl RegexRule for CodeRule {
} }
let theme = document let theme = document
.get_variable("code.theme").map(|var| var.to_string()); .get_variable("code.theme")
.map(|var| var.to_string());
if index == 0 if index == 0
// Block // Block
@ -539,6 +528,45 @@ impl RegexRule for CodeRule {
); );
} }
if let Some((sems, tokens)) =
Semantics::from_source(token.source(), &state.shared.semantics)
{
let range = matches
.get(0)
.map(|m| {
if token.source().content().as_bytes()[m.start()] == b'\n' {
m.start() + 1..m.end()
} else {
m.range()
}
})
.unwrap();
sems.add(
range.start..range.start + if index == 0 { 3 } else { 2 },
tokens.code_sep,
);
if let Some(props) = matches.get(1).map(|m| m.range()) {
sems.add(props.start - 1..props.start, tokens.code_props_sep);
sems.add(props.clone(), tokens.code_props);
sems.add(props.end..props.end + 1, tokens.code_props_sep);
}
if let Some(lang) = matches.get(2).map(|m| m.range()) {
sems.add(lang.clone(), tokens.code_lang);
}
if index == 0 {
if let Some(title) = matches.get(3).map(|m| m.range()) {
sems.add(title.clone(), tokens.code_title);
}
sems.add(matches.get(4).unwrap().range(), tokens.code_content);
} else {
sems.add(matches.get(3).unwrap().range(), tokens.code_content);
}
sems.add(
range.end - if index == 0 { 3 } else { 2 }..range.end,
tokens.code_sep,
);
}
reports reports
} }
@ -551,7 +579,8 @@ impl RegexRule for CodeRule {
ctx.as_ref().map(|ctx| { ctx.as_ref().map(|ctx| {
let theme = ctx let theme = ctx
.document .document
.get_variable("code.theme").map(|var| var.to_string()); .get_variable("code.theme")
.map(|var| var.to_string());
ctx.state.push( ctx.state.push(
ctx.document, ctx.document,
@ -581,7 +610,8 @@ impl RegexRule for CodeRule {
ctx.as_ref().map(|ctx| { ctx.as_ref().map(|ctx| {
let theme = ctx let theme = ctx
.document .document
.get_variable("code.theme").map(|var| var.to_string()); .get_variable("code.theme")
.map(|var| var.to_string());
ctx.state.push( ctx.state.push(
ctx.document, ctx.document,
@ -618,7 +648,8 @@ impl RegexRule for CodeRule {
ctx.as_ref().map(|ctx| { ctx.as_ref().map(|ctx| {
let theme = ctx let theme = ctx
.document .document
.get_variable("code.theme").map(|var| var.to_string()); .get_variable("code.theme")
.map(|var| var.to_string());
ctx.state.push( ctx.state.push(
ctx.document, ctx.document,
@ -651,6 +682,7 @@ mod tests {
use crate::parser::langparser::LangParser; use crate::parser::langparser::LangParser;
use crate::parser::parser::Parser; use crate::parser::parser::Parser;
use crate::parser::source::SourceFile; use crate::parser::source::SourceFile;
use crate::validate_semantics;
#[test] #[test]
fn code_block() { fn code_block() {
@ -754,4 +786,40 @@ fn fact(n: usize) -> usize
assert_eq!(found[2].code, "std::vector<std::vector<int>> u;"); assert_eq!(found[2].code, "std::vector<std::vector<int>> u;");
assert_eq!(found[2].line_offset, 1); assert_eq!(found[2].line_offset, 1);
} }
#[test]
fn semantic() {
let source = Rc::new(SourceFile::with_content(
"".to_string(),
r#"
```[line_offset=15] C, Title
test code
```
``C, Single Line``
"#
.to_string(),
None,
));
let parser = LangParser::default();
let (_, state) = parser.parse(
ParserState::new_with_semantics(&parser, None),
source.clone(),
None,
);
validate_semantics!(state, source.clone(), 0,
code_sep { delta_line == 1, delta_start == 0, length == 3 };
code_props_sep { delta_line == 0, delta_start == 3, length == 1 };
code_props { delta_line == 0, delta_start == 1, length == 14 };
code_props_sep { delta_line == 0, delta_start == 14, length == 1 };
code_lang { delta_line == 0, delta_start == 1, length == 2 };
code_title { delta_line == 0, delta_start == 3, length == 6 };
code_content { delta_line == 1, delta_start == 0, length == 10 };
code_sep { delta_line == 1, delta_start == 0, length == 3 };
code_sep { delta_line == 1, delta_start == 0, length == 2 };
code_lang { delta_line == 0, delta_start == 2, length == 1 };
code_content { delta_line == 0, delta_start == 2, length == 12 };
code_sep { delta_line == 0, delta_start == 12, length == 2 };
);
}
} }

View file

@ -1,5 +1,6 @@
use crate::document::document::Document; use crate::document::document::Document;
use crate::document::document::DocumentAccessors; use crate::document::document::DocumentAccessors;
use crate::lsp::semantic::Semantics;
use crate::parser::parser::ParserState; use crate::parser::parser::ParserState;
use crate::parser::parser::ReportColors; use crate::parser::parser::ReportColors;
use crate::parser::rule::RegexRule; use crate::parser::rule::RegexRule;
@ -180,12 +181,9 @@ impl RegexRule for ImportRule {
); );
} }
/*
if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics)
RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) {
.ok()
.unwrap()
}) {
// @import // @import
let import = if token.source().content().as_bytes()[matches.get(0).unwrap().start()] == b'\n' let import = if token.source().content().as_bytes()[matches.get(0).unwrap().start()] == b'\n'
{ {
@ -195,19 +193,18 @@ impl RegexRule for ImportRule {
{ {
matches.get(0).unwrap().start() matches.get(0).unwrap().start()
}; };
sems.add(token.source(), import..import + 7, sems.token.import_import); sems.add(import..import + 7, tokens.import_import);
if let Some(import_as) = matches.get(1) if let Some(import_as) = matches.get(1)
{ {
sems.add(token.source(), import_as.start()-1..import_as.start(), sems.token.import_as_sep); sems.add(import_as.start()-1..import_as.start(), tokens.import_as_sep);
sems.add(token.source(), import_as.range(), sems.token.import_as); sems.add(import_as.range(), tokens.import_as);
sems.add(token.source(), import_as.end()..import_as.end()+1, sems.token.import_as_sep); sems.add(import_as.end()..import_as.end()+1, tokens.import_as_sep);
} }
let path = matches.get(2).unwrap().range(); let path = matches.get(2).unwrap().range();
sems.add(token.source(), path, sems.token.import_path); sems.add(path, tokens.import_path);
} }
*/
result result
} }

View file

@ -2,6 +2,7 @@ use crate::document::document::Document;
use crate::document::variable::BaseVariable; use crate::document::variable::BaseVariable;
use crate::document::variable::PathVariable; use crate::document::variable::PathVariable;
use crate::document::variable::Variable; use crate::document::variable::Variable;
use crate::lsp::semantic::Semantics;
use crate::lua::kernel::CTX; use crate::lua::kernel::CTX;
use crate::parser::parser::ParserState; use crate::parser::parser::ParserState;
use crate::parser::parser::ReportColors; use crate::parser::parser::ReportColors;
@ -15,7 +16,6 @@ use ariadne::ReportKind;
use mlua::Function; use mlua::Function;
use mlua::Lua; use mlua::Lua;
use regex::Regex; use regex::Regex;
use std::cell::RefMut;
use std::ops::Range; use std::ops::Range;
use std::rc::Rc; use std::rc::Rc;
use std::str::FromStr; use std::str::FromStr;
@ -257,23 +257,20 @@ impl RegexRule for VariableRule {
} }
} }
//if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics)
// RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) {
// .ok() let name = matches.get(2).unwrap().range();
// .unwrap() if let Some(kind) = matches.get(1).map(|m| m.range()) {
//}) { sems.add(kind.start-1..kind.start, tokens.variable_operator);
// let name = matches.get(2).unwrap().range(); sems.add(kind, tokens.variable_kind);
// if let Some(kind) = matches.get(1).map(|m| m.range()) { } else {
// sems.add(token.source(), kind.start-1..kind.start, sems.token.variable_operator); sems.add(name.start-1..name.start, tokens.variable_operator);
// sems.add(token.source(), kind, sems.token.variable_kind); }
// } else { sems.add(name.clone(), tokens.variable_name);
// sems.add(token.source(), name.start-1..name.start, sems.token.variable_operator); sems.add(name.end..name.end+1, tokens.variable_sep);
// } let value = matches.get(3).unwrap().range();
// sems.add(token.source(), name.clone(), sems.token.variable_name); sems.add(value.clone(), tokens.variable_value);
// sems.add(token.source(), name.end..name.end+1, sems.token.variable_sep); }
// let value = matches.get(3).unwrap().range();
// sems.add(token.source(), value.clone(), sems.token.variable_value);
//}
result result
} }
@ -434,7 +431,15 @@ impl RegexRule for VariableSubstitutionRule {
_ => panic!("Unknown error"), _ => panic!("Unknown error"),
}; };
variable.parse(state, token, document); variable.parse(state, token.clone(), document);
if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics)
{
let name = matches.get(1).unwrap().range();
sems.add(name.start-1..name.start, tokens.variable_sub_sep);
sems.add(name.clone(), tokens.variable_sub_name);
sems.add(name.end..name.end+1, tokens.variable_sub_sep);
}
result result
} }

View file

@ -85,6 +85,7 @@ macro_rules! token {
}; };
} }
/// Predefined list of tokens
#[derive(Debug)] #[derive(Debug)]
pub struct Tokens { pub struct Tokens {
pub section_heading: (u32, u32), pub section_heading: (u32, u32),
@ -118,6 +119,16 @@ pub struct Tokens {
pub variable_name: (u32, u32), pub variable_name: (u32, u32),
pub variable_sep: (u32, u32), pub variable_sep: (u32, u32),
pub variable_value: (u32, u32), pub variable_value: (u32, u32),
pub variable_sub_sep: (u32, u32),
pub variable_sub_name: (u32, u32),
pub code_sep: (u32, u32),
pub code_props_sep: (u32, u32),
pub code_props: (u32, u32),
pub code_lang: (u32, u32),
pub code_title: (u32, u32),
pub code_content: (u32, u32),
} }
impl Tokens { impl Tokens {
@ -154,6 +165,16 @@ impl Tokens {
variable_name: token!("macro"), variable_name: token!("macro"),
variable_sep: token!("operator"), variable_sep: token!("operator"),
variable_value: token!("function"), variable_value: token!("function"),
variable_sub_sep: token!("operator"),
variable_sub_name: token!("macro"),
code_sep: token!("operator"),
code_props_sep: token!("operator"),
code_props: token!("enum"),
code_lang: token!("function"),
code_title: token!("number"),
code_content: token!("string"),
} }
} }
} }
@ -248,7 +269,7 @@ impl<'a> Semantics<'a> {
while cursor.pos != range.end { while cursor.pos != range.end {
let end = self.source.content()[cursor.pos..range.end] let end = self.source.content()[cursor.pos..range.end]
.find('\n') .find('\n')
.unwrap_or(self.source.content().len() - cursor.pos); .unwrap_or(self.source.content().len() - 1) + 1;
let len = usize::min(range.end - cursor.pos, end); let len = usize::min(range.end - cursor.pos, end);
let clen = self.source.content()[cursor.pos..cursor.pos + len] let clen = self.source.content()[cursor.pos..cursor.pos + len]
.chars() .chars()