From 8e45b001a31ea7140f8eb89d6300e554b997ef47 Mon Sep 17 00:00:00 2001 From: ef3d0c3e Date: Sat, 19 Oct 2024 21:35:18 +0200 Subject: [PATCH] Semantics refactor --- src/elements/comment.rs | 32 ++++- src/elements/import.rs | 30 ++++ src/elements/link.rs | 62 +++++++- src/elements/reference.rs | 61 +++++++- src/elements/section.rs | 26 ++-- src/elements/style.rs | 36 ++++- src/elements/variable.rs | 19 +++ src/lsp/semantic.rs | 293 +++++++++++++++++++++++++++----------- src/parser/langparser.rs | 8 +- src/parser/parser.rs | 8 +- src/parser/source.rs | 48 +++++-- src/server.rs | 30 ++-- 12 files changed, 514 insertions(+), 139 deletions(-) diff --git a/src/elements/comment.rs b/src/elements/comment.rs index ce29620..0ff57e7 100644 --- a/src/elements/comment.rs +++ b/src/elements/comment.rs @@ -2,6 +2,7 @@ use crate::compiler::compiler::Compiler; use crate::document::document::Document; use crate::document::element::ElemKind; use crate::document::element::Element; +use crate::lsp::semantic::Semantics; use crate::parser::parser::ParserState; use crate::parser::rule::RegexRule; use crate::parser::source::Source; @@ -89,6 +90,12 @@ impl RegexRule for CommentRule { }), ); + if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics) + { + let comment = matches.get(1).unwrap().range(); + sems.add(comment.start-2..comment.end, tokens.comment); + } + reports } } @@ -101,7 +108,7 @@ mod tests { use crate::parser::langparser::LangParser; use crate::parser::parser::Parser; use crate::parser::source::SourceFile; - use crate::validate_document; + use crate::{validate_document, validate_semantics}; use super::*; @@ -128,4 +135,27 @@ COMMENT ::Test }; ); } + + #[test] + fn semantic() + { + let source = Rc::new(SourceFile::with_content( + "".to_string(), + r#" +::Test + ::Another + :: Another + "# + .to_string(), + None, + )); + let parser = LangParser::default(); + let (_, state) = parser.parse(ParserState::new_with_semantics(&parser, None), source.clone(), None); + + validate_semantics!(state, source.clone(), 0, + comment { delta_line == 1, delta_start == 0, length == 6 }; + comment { delta_line == 1, delta_start == 1, length == 9 }; + comment { delta_line == 1, delta_start == 1, length == 10 }; + ); + } } diff --git a/src/elements/import.rs b/src/elements/import.rs index 8504d85..fe14d72 100644 --- a/src/elements/import.rs +++ b/src/elements/import.rs @@ -12,6 +12,7 @@ use ariadne::Report; use ariadne::ReportKind; use regex::Captures; use regex::Regex; +use std::cell::RefMut; use std::ops::Range; use std::rc::Rc; @@ -179,6 +180,35 @@ impl RegexRule for ImportRule { ); } + /* + if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { + RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) + .ok() + .unwrap() + }) { + // @import + let import = if token.source().content().as_bytes()[matches.get(0).unwrap().start()] == b'\n' + { + matches.get(0).unwrap().start() + 1 + } + else + { + matches.get(0).unwrap().start() + }; + sems.add(token.source(), import..import + 7, sems.token.import_import); + + if let Some(import_as) = matches.get(1) + { + sems.add(token.source(), import_as.start()-1..import_as.start(), sems.token.import_as_sep); + sems.add(token.source(), import_as.range(), sems.token.import_as); + sems.add(token.source(), import_as.end()..import_as.end()+1, sems.token.import_as_sep); + } + + let path = matches.get(2).unwrap().range(); + sems.add(token.source(), path, sems.token.import_path); + } + */ + result } } diff --git a/src/elements/link.rs b/src/elements/link.rs index 0152d9e..b2e4ae1 100644 --- a/src/elements/link.rs +++ b/src/elements/link.rs @@ -4,6 +4,7 @@ use crate::document::document::Document; use crate::document::element::ContainerElement; use crate::document::element::ElemKind; use crate::document::element::Element; +use crate::lsp::semantic::Semantics; use crate::lua::kernel::CTX; use crate::parser::parser::ParserState; use crate::parser::rule::RegexRule; @@ -134,6 +135,10 @@ impl RegexRule for LinkRule { return reports; } + if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics) + { + sems.add(display.range().start-1..display.range().start, tokens.link_display_sep); + } let source = Rc::new(VirtualSource::new( Token::new(display.range(), token.source()), "Link Display".to_string(), @@ -201,12 +206,27 @@ impl RegexRule for LinkRule { state.push( document, Box::new(Link { - location: token, + location: token.clone(), display: link_display, url: link_url, }), ); + //if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { + // RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) + // .ok() + // .unwrap() + //}) { + // let name = matches.get(1).unwrap().range(); + // sems.add(token.source(), name.start-1..name.start, sems.token.link_name_sep); + // sems.add(token.source(), name.clone(), sems.token.link_name); + // sems.add(token.source(), name.end..name.end+1, sems.token.link_name_sep); + // let url = matches.get(2).unwrap().range(); + // sems.add(token.source(), url.start-1..url.start, sems.token.link_url_sep); + // sems.add(token.source(), url.clone(), sems.token.link_url); + // sems.add(token.source(), url.end..url.end+1, sems.token.link_url_sep); + //} + reports } @@ -270,7 +290,7 @@ mod tests { use crate::parser::langparser::LangParser; use crate::parser::parser::Parser; use crate::parser::source::SourceFile; - use crate::validate_document; + use crate::{validate_document, validate_semantics}; use super::*; @@ -331,4 +351,42 @@ nml.link.push("**BOLD link**", "another url") }; ); } + + #[test] + fn semantics() + { + let source = Rc::new(SourceFile::with_content( + "".to_string(), + r#" +[li**n**k](url) + "# + .to_string(), + None, + )); + let parser = LangParser::default(); + let (_, state) = parser.parse(ParserState::new_with_semantics(&parser, None), source.clone(), None); + + println!("{:#?}", state.shared.semantics); + /* + let source = Rc::new(SourceFile::with_content( + "".to_string(), + r#" +[link](url) + "# + .to_string(), + None, + )); + let parser = LangParser::default(); + let (_, state) = parser.parse(ParserState::new_with_semantics(&parser, None), source.clone(), None); + + validate_semantics!(state, source.clone(), 0, + link_name_sep { delta_line == 1, delta_start == 0, length == 1 }; + link_name { delta_line == 0, delta_start == 1, length == 4 }; + link_name_sep { delta_line == 0, delta_start == 4, length == 1 }; + link_url_sep { delta_line == 0, delta_start == 1, length == 1 }; + link_url { delta_line == 0, delta_start == 1, length == 3 }; + link_url_sep { delta_line == 0, delta_start == 3, length == 1 }; + ); + */ + } } diff --git a/src/elements/reference.rs b/src/elements/reference.rs index 332dd11..b73cb3e 100644 --- a/src/elements/reference.rs +++ b/src/elements/reference.rs @@ -1,3 +1,4 @@ +use std::cell::RefMut; use std::collections::HashMap; use std::ops::Range; use std::rc::Rc; @@ -177,7 +178,7 @@ impl ReferenceRule { ), ); Self { - re: [Regex::new(r"§\{(.*?)\}(\[((?:\\.|[^\\\\])*?)\])?").unwrap()], + re: [Regex::new(r"&\{(.*?)\}(?:\[((?:\\.|[^\\\\])*?)\])?").unwrap()], properties: PropertyParser { properties: props }, } } @@ -284,7 +285,7 @@ impl RegexRule for ReferenceRule { }; // Properties - let properties = match self.parse_properties(state.parser.colors(), &token, &matches.get(3)) + let properties = match self.parse_properties(state.parser.colors(), &token, &matches.get(2)) { Ok(pm) => pm, Err(report) => { @@ -315,7 +316,7 @@ impl RegexRule for ReferenceRule { state.push( document, Box::new(ExternalReference { - location: token, + location: token.clone(), reference: CrossReference::Unspecific(refname), caption, style, @@ -326,24 +327,72 @@ impl RegexRule for ReferenceRule { state.push( document, Box::new(ExternalReference { - location: token, - reference: CrossReference::Specific(refdoc, refname), + location: token.clone(), + reference: CrossReference::Specific(refdoc.clone(), refname), caption, style, }), ); } + + /* + if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { + RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) + .ok() + .unwrap() + }) { + let link = matches.get(1).unwrap().range(); + sems.add(token.source(), link.start-2..link.start-1, sems.token.reference_operator); + sems.add(token.source(), link.start-1..link.start, sems.token.reference_link_sep); + + if !refdoc.is_empty() + { + sems.add(token.source(), link.start.. refdoc.len()+link.start, sems.token.reference_doc); + } + sems.add(token.source(), refdoc.len()+link.start.. refdoc.len()+link.start+1, sems.token.reference_doc_sep); + sems.add(token.source(), refdoc.len()+link.start+1..link.end, sems.token.reference_link); + sems.add(token.source(), link.end..link.end+1, sems.token.reference_link_sep); + } + */ } else { state.push( document, Box::new(InternalReference { - location: token, + location: token.clone(), refname, caption, }), ); + /* + if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { + RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) + .ok() + .unwrap() + }) { + let link = matches.get(1).unwrap().range(); + sems.add(token.source(), link.start-2..link.start-1, sems.token.reference_operator); + sems.add(token.source(), link.start-1..link.start, sems.token.reference_link_sep); + sems.add(token.source(), link.clone(), sems.token.reference_link); + sems.add(token.source(), link.end..link.end+1, sems.token.reference_link_sep); + } + */ } + /* + if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { + RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) + .ok() + .unwrap() + }) { + if let Some(props) = matches.get(2).map(|m| m.range()) + { + sems.add(token.source(), props.start-1..props.start, sems.token.reference_props_sep); + sems.add(token.source(), props.clone(), sems.token.reference_props); + sems.add(token.source(), props.end..props.end+1, sems.token.reference_props_sep); + } + } + */ + reports } diff --git a/src/elements/section.rs b/src/elements/section.rs index fc0c28a..e15cd97 100644 --- a/src/elements/section.rs +++ b/src/elements/section.rs @@ -4,6 +4,7 @@ use crate::document::document::Document; use crate::document::element::ElemKind; use crate::document::element::Element; use crate::document::element::ReferenceableElement; +use crate::lsp::semantic::Semantics; use crate::lua::kernel::CTX; use crate::parser::parser::ParserState; use crate::parser::rule::RegexRule; @@ -20,7 +21,6 @@ use mlua::Lua; use regex::Regex; use section_style::SectionLinkPos; use section_style::SectionStyle; -use std::cell::RefMut; use std::ops::Range; use std::rc::Rc; use std::sync::Arc; @@ -327,21 +327,18 @@ impl RegexRule for SectionRule { }), ); - if let Some(mut sems) = state.shared.semantics.as_ref().map(|sems| { - RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) - .ok() - .unwrap() - }) { - sems.add(token.source(), matches.get(1).unwrap().range(), sems.token.section_heading); + if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics) + { + sems.add(matches.get(1).unwrap().range(), tokens.section_heading); if let Some(reference) = matches.get(2) { - sems.add(token.source(), reference.start()-1..reference.end()+1, sems.token.section_reference); + sems.add(reference.start()-1..reference.end()+1, tokens.section_reference); } if let Some(kind) = matches.get(3) { - sems.add(token.source(), kind.range(), sems.token.section_kind); + sems.add(kind.range(), tokens.section_kind); } - sems.add(token.source(), matches.get(5).unwrap().range(), sems.token.section_name); + sems.add(matches.get(5).unwrap().range(), tokens.section_name); } result @@ -555,9 +552,7 @@ nml.section.push("6", 6, "", "refname") let source = Rc::new(SourceFile::with_content( "".to_string(), r#" -# First section -##{か}+ test -#{refname}*+ Another section +#{📫} test "# .to_string(), None, @@ -565,13 +560,14 @@ nml.section.push("6", 6, "", "refname") let parser = LangParser::default(); let (_, state) = parser.parse(ParserState::new_with_semantics(&parser, None), source.clone(), None); + println!("{:#?}", state.shared.semantics); validate_semantics!(state, source.clone(), 0, section_heading { delta_line == 1, delta_start == 0, length == 1 }; section_name { delta_line == 0, delta_start == 1 }; section_heading { delta_line == 1, delta_start == 0, length == 2 }; - section_reference { delta_line == 0, delta_start == 2, length == 3 }; - section_kind { delta_line == 0, delta_start == 3, length == 1 }; + section_reference { delta_line == 0, delta_start == 2, length == 4 }; + section_kind { delta_line == 0, delta_start == 4, length == 1 }; section_name { delta_line == 0, delta_start == 1 }; section_heading { delta_line == 1, delta_start == 0, length == 1 }; diff --git a/src/elements/style.rs b/src/elements/style.rs index ef81eea..ab82574 100644 --- a/src/elements/style.rs +++ b/src/elements/style.rs @@ -4,6 +4,7 @@ use crate::document::document::Document; use crate::document::document::DocumentAccessors; use crate::document::element::ElemKind; use crate::document::element::Element; +use crate::lsp::semantic::Semantics; use crate::lua::kernel::CTX; use crate::parser::parser::ParserState; use crate::parser::rule::RegexRule; @@ -199,6 +200,11 @@ impl RegexRule for StyleRule { style_state.toggled[index].is_none(), )), ); + + if let Some((sems, tokens)) = Semantics::from_source(token.source(), &state.shared.semantics) + { + sems.add(token.start()..token.end(), tokens.style_marker); + } } else { panic!("Invalid state at `{STATE_NAME}`"); } @@ -279,7 +285,7 @@ mod tests { use crate::parser::langparser::LangParser; use crate::parser::parser::Parser; use crate::parser::source::SourceFile; - use crate::validate_document; + use crate::{validate_document, validate_semantics}; use super::*; @@ -364,4 +370,32 @@ terminated here%% }; ); } + + #[test] + fn semantic() + { + let source = Rc::new(SourceFile::with_content( + "".to_string(), + r#" +**test** `another` +__test__ *another* + "# + .to_string(), + None, + )); + let parser = LangParser::default(); + let (_, state) = parser.parse(ParserState::new_with_semantics(&parser, None), source.clone(), None); + + validate_semantics!(state, source.clone(), 0, + style_marker { delta_line == 1, delta_start == 0, length == 2 }; + style_marker { delta_line == 0, delta_start == 6, length == 2 }; + style_marker { delta_line == 0, delta_start == 3, length == 1 }; + style_marker { delta_line == 0, delta_start == 8, length == 1 }; + + style_marker { delta_line == 1, delta_start == 0, length == 2 }; + style_marker { delta_line == 0, delta_start == 6, length == 2 }; + style_marker { delta_line == 0, delta_start == 3, length == 1 }; + style_marker { delta_line == 0, delta_start == 8, length == 1 }; + ); + } } diff --git a/src/elements/variable.rs b/src/elements/variable.rs index 9e3621b..161d19b 100644 --- a/src/elements/variable.rs +++ b/src/elements/variable.rs @@ -15,6 +15,7 @@ use ariadne::ReportKind; use mlua::Function; use mlua::Lua; use regex::Regex; +use std::cell::RefMut; use std::ops::Range; use std::rc::Rc; use std::str::FromStr; @@ -256,6 +257,24 @@ impl RegexRule for VariableRule { } } + //if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { + // RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&token.source())) + // .ok() + // .unwrap() + //}) { + // let name = matches.get(2).unwrap().range(); + // if let Some(kind) = matches.get(1).map(|m| m.range()) { + // sems.add(token.source(), kind.start-1..kind.start, sems.token.variable_operator); + // sems.add(token.source(), kind, sems.token.variable_kind); + // } else { + // sems.add(token.source(), name.start-1..name.start, sems.token.variable_operator); + // } + // sems.add(token.source(), name.clone(), sems.token.variable_name); + // sems.add(token.source(), name.end..name.end+1, sems.token.variable_sep); + // let value = matches.get(3).unwrap().range(); + // sems.add(token.source(), value.clone(), sems.token.variable_value); + //} + result } diff --git a/src/lsp/semantic.rs b/src/lsp/semantic.rs index 79c9506..745c855 100644 --- a/src/lsp/semantic.rs +++ b/src/lsp/semantic.rs @@ -1,65 +1,70 @@ +use std::cell::Ref; use std::cell::RefCell; +use std::cell::RefMut; +use std::collections::HashMap; use std::ops::Range; use std::rc::Rc; use tower_lsp::lsp_types::SemanticToken; use tower_lsp::lsp_types::SemanticTokenModifier; use tower_lsp::lsp_types::SemanticTokenType; +use unicode_width::UnicodeWidthStr; use crate::parser::source::LineCursor; use crate::parser::source::Source; - +use crate::parser::source::SourceFile; +use crate::parser::source::VirtualSource; pub const TOKEN_TYPE: &[SemanticTokenType] = &[ - SemanticTokenType::NAMESPACE, - SemanticTokenType::TYPE, - SemanticTokenType::CLASS, - SemanticTokenType::ENUM, - SemanticTokenType::INTERFACE, - SemanticTokenType::STRUCT, - SemanticTokenType::TYPE_PARAMETER, - SemanticTokenType::PARAMETER, - SemanticTokenType::VARIABLE, - SemanticTokenType::PROPERTY, - SemanticTokenType::ENUM_MEMBER, - SemanticTokenType::EVENT, - SemanticTokenType::FUNCTION, - SemanticTokenType::METHOD, - SemanticTokenType::MACRO, - SemanticTokenType::KEYWORD, - SemanticTokenType::MODIFIER, - SemanticTokenType::COMMENT, - SemanticTokenType::STRING, - SemanticTokenType::NUMBER, - SemanticTokenType::REGEXP, - SemanticTokenType::OPERATOR, - SemanticTokenType::DECORATOR, + SemanticTokenType::NAMESPACE, + SemanticTokenType::TYPE, + SemanticTokenType::CLASS, + SemanticTokenType::ENUM, + SemanticTokenType::INTERFACE, + SemanticTokenType::STRUCT, + SemanticTokenType::TYPE_PARAMETER, + SemanticTokenType::PARAMETER, + SemanticTokenType::VARIABLE, + SemanticTokenType::PROPERTY, + SemanticTokenType::ENUM_MEMBER, + SemanticTokenType::EVENT, + SemanticTokenType::FUNCTION, + SemanticTokenType::METHOD, + SemanticTokenType::MACRO, + SemanticTokenType::KEYWORD, + SemanticTokenType::MODIFIER, + SemanticTokenType::COMMENT, + SemanticTokenType::STRING, + SemanticTokenType::NUMBER, + SemanticTokenType::REGEXP, + SemanticTokenType::OPERATOR, + SemanticTokenType::DECORATOR, ]; pub const TOKEN_MODIFIERS: &[SemanticTokenModifier] = &[ - SemanticTokenModifier::DECLARATION, - SemanticTokenModifier::DEFINITION, - SemanticTokenModifier::READONLY, - SemanticTokenModifier::STATIC, - SemanticTokenModifier::DEPRECATED, - SemanticTokenModifier::ABSTRACT, - SemanticTokenModifier::ASYNC, - SemanticTokenModifier::MODIFICATION, - SemanticTokenModifier::DOCUMENTATION, - SemanticTokenModifier::DEFAULT_LIBRARY, + SemanticTokenModifier::DECLARATION, + SemanticTokenModifier::DEFINITION, + SemanticTokenModifier::READONLY, + SemanticTokenModifier::STATIC, + SemanticTokenModifier::DEPRECATED, + SemanticTokenModifier::ABSTRACT, + SemanticTokenModifier::ASYNC, + SemanticTokenModifier::MODIFICATION, + SemanticTokenModifier::DOCUMENTATION, + SemanticTokenModifier::DEFAULT_LIBRARY, ]; -fn token_index(name: &str) -> u32 -{ - TOKEN_TYPE.iter() +fn token_index(name: &str) -> u32 { + TOKEN_TYPE + .iter() .enumerate() .find(|(_, token)| token.as_str() == name) .map(|(index, _)| index as u32) .unwrap_or(0) } -fn modifier_index(name: &str) -> u32 -{ - TOKEN_MODIFIERS.iter() +fn modifier_index(name: &str) -> u32 { + TOKEN_MODIFIERS + .iter() .enumerate() .find(|(_, token)| token.as_str() == name) .map(|(index, _)| index as u32) @@ -71,45 +76,93 @@ macro_rules! token { (token_index($key), 0) } }; - ($key:expr, $($mods:tt)*) => { + ($key:expr, $($mods:tt),*) => { { let mut bitset : u32 = 0; $( bitset |= 1 << modifier_index($mods); )* - (token_index($key), bitset) + (token_index($key), bitset) } }; } #[derive(Debug)] -pub struct Tokens -{ +pub struct Tokens { pub section_heading: (u32, u32), pub section_reference: (u32, u32), pub section_kind: (u32, u32), pub section_name: (u32, u32), + + pub comment: (u32, u32), + + pub link_display_sep: (u32, u32), + pub link_url_sep: (u32, u32), + pub link_url: (u32, u32), + + pub style_marker: (u32, u32), + + pub import_import: (u32, u32), + pub import_as_sep: (u32, u32), + pub import_as: (u32, u32), + pub import_path: (u32, u32), + + pub reference_operator: (u32, u32), + pub reference_link_sep: (u32, u32), + pub reference_doc_sep: (u32, u32), + pub reference_doc: (u32, u32), + pub reference_link: (u32, u32), + pub reference_props_sep: (u32, u32), + pub reference_props: (u32, u32), + + pub variable_operator: (u32, u32), + pub variable_kind: (u32, u32), + pub variable_name: (u32, u32), + pub variable_sep: (u32, u32), + pub variable_value: (u32, u32), } -impl Tokens -{ - pub fn new() -> Self - { +impl Tokens { + pub fn new() -> Self { Self { - section_heading : token!("number"), - section_reference : token!("enum", "async"), - section_kind : token!("enum"), - section_name : token!("string"), + section_heading: token!("number"), + section_reference: token!("enum", "async"), + section_kind: token!("enum"), + section_name: token!("string"), + + comment: token!("comment"), + + link_display_sep: token!("macro"), + link_url_sep: token!("macro"), + link_url: token!("operator", "readonly", "abstract", "abstract"), + + style_marker: token!("operator"), + + import_import: token!("macro"), + import_as_sep: token!("operator"), + import_as: token!("operator"), + import_path: token!("function"), + + reference_operator: token!("operator"), + reference_link_sep: token!("operator"), + reference_doc_sep: token!("function"), + reference_doc: token!("function"), + reference_link: token!("macro"), + reference_props_sep: token!("operator"), + reference_props: token!("enum"), + + variable_operator: token!("operator"), + variable_kind: token!("operator"), + variable_name: token!("macro"), + variable_sep: token!("operator"), + variable_value: token!("function"), } } } -/// Semantics for a buffer +/// Per file semantic tokens #[derive(Debug)] -pub struct Semantics { - /// The tokens - pub token: Tokens, - +pub struct SemanticsData { /// The current cursor cursor: RefCell, @@ -117,54 +170,110 @@ pub struct Semantics { pub tokens: RefCell>, } -impl Semantics { - pub fn new(source: Rc) -> Semantics { +impl SemanticsData +{ + pub fn new(source: Rc) -> Self + { Self { - token: Tokens::new(), cursor: RefCell::new(LineCursor::new(source)), tokens: RefCell::new(vec![]), } } +} - pub fn add( - &self, +#[derive(Debug)] +pub struct Semantics<'a> { + pub(self) sems: Ref<'a, SemanticsData>, + pub(self) source: Rc, + pub(self) range: Range, +} + +impl<'a> Semantics<'a> { + fn from_source_impl( source: Rc, - range: Range, - token: (u32, u32) - ) { - let mut tokens = self.tokens.borrow_mut(); - let mut cursor = self.cursor.borrow_mut(); + semantics: &'a Option>, + range: Range) + -> Option<(Self, Ref<'a, Tokens>)> + { + if let Some(location) = source + .clone() + .downcast_rc::() + .ok() + .as_ref() + .map(|parent| parent.location()) + .unwrap_or(None) + { + return Self::from_source_impl(location.source(), semantics, range); + } else if let Some(source) = source.clone().downcast_rc::().ok() { + return Ref::filter_map( + semantics.as_ref().unwrap().borrow(), + |semantics: &SemanticsHolder| { + semantics.sems.get(&(source.clone() as Rc)) + }, + ) + .ok() + .map(|sems| { + ( + Self { + sems, + source, + range, + }, + Ref::map( + semantics.as_ref().unwrap().borrow(), + |semantics: &SemanticsHolder| &semantics.tokens, + ), + ) + }); + } + return None; + } + + pub fn from_source( + source: Rc, + semantics: &'a Option>, + ) -> Option<(Self, Ref<'a, Tokens>)> { + if semantics.is_none() { + return None; + } + let range = source.location().map_or_else( + || 0..source.content().len(), + |location| location.range.clone()); + return Self::from_source_impl(source, semantics, range); + } + + pub fn add(&self, range: Range, token: (u32, u32)) { + let range = self.range.start+range.start..self.range.start+range.end; + let mut tokens = self.sems.tokens.borrow_mut(); + let mut cursor = self.sems.cursor.borrow_mut(); let mut current = cursor.clone(); cursor.move_to(range.start); while cursor.pos != range.end { - let end = source.content()[cursor.pos..] + let end = self.source.content()[cursor.pos..range.end] .find('\n') - .unwrap_or(source.content().len() - cursor.pos); + .unwrap_or(self.source.content().len() - cursor.pos); let len = usize::min(range.end - cursor.pos, end); - let clen = source.content()[cursor.pos..cursor.pos+len] - .chars() - .fold(0, |clen, _| clen + 1); + let clen = self.source.content()[cursor.pos..cursor.pos + len].width(); // TODO Fix issue with CJK characters let delta_line = cursor.line - current.line; let delta_start = if delta_line == 0 { - if let Some(last) = tokens.last() { - cursor.line_pos - current.line_pos + last.length as usize - } else { - cursor.line_pos - current.line_pos - } + cursor.line_pos - current.line_pos } else { cursor.line_pos }; - //eprintln!("CURRENT={:#?}, CURS={:#?}", current, cursor); tokens.push(SemanticToken { delta_line: delta_line as u32, delta_start: delta_start as u32, length: clen as u32, token_type: token.0, - token_modifiers_bitset: token.1 + token_modifiers_bitset: token.1, }); + if cursor.pos + len == range.end + { + break; + } current = cursor.clone(); let pos = cursor.pos; cursor.move_to(pos + len); @@ -172,6 +281,21 @@ impl Semantics { } } +#[derive(Debug)] +pub struct SemanticsHolder { + pub tokens: Tokens, + pub sems: HashMap, SemanticsData>, +} + +impl SemanticsHolder { + pub fn new() -> Self { + Self { + tokens: Tokens::new(), + sems: HashMap::new(), + } + } +} + #[cfg(test)] pub mod tests { #[macro_export] @@ -182,6 +306,7 @@ pub mod tests { .as_ref() .unwrap() .borrow() + .sems .get(&($source as Rc)) .unwrap() .tokens @@ -191,12 +316,9 @@ pub mod tests { .as_ref() .unwrap() .borrow() - .get(&($source as Rc)) - .unwrap() - .token + .tokens .$token_name; - let found_token = (token.token_type, token.token_modifiers_bitset); assert!(found_token == token_type, "Invalid token at index {}, expected {}{token_type:#?}, got: {found_token:#?}", $idx, stringify!($token_name)); @@ -217,6 +339,7 @@ pub mod tests { .as_ref() .unwrap() .borrow() + .sems .get(&($source as Rc)) .unwrap() .tokens @@ -226,9 +349,7 @@ pub mod tests { .as_ref() .unwrap() .borrow() - .get(&($source as Rc)) - .unwrap() - .token + .tokens .$token_name; diff --git a/src/parser/langparser.rs b/src/parser/langparser.rs index dde7396..e14cc43 100644 --- a/src/parser/langparser.rs +++ b/src/parser/langparser.rs @@ -6,6 +6,7 @@ use crate::document::element::DocumentEnd; use crate::document::langdocument::LangDocument; use crate::elements::text::Text; use crate::lsp::semantic::Semantics; +use crate::lsp::semantic::SemanticsData; use super::parser::Parser; use super::parser::ParserState; @@ -13,6 +14,7 @@ use super::parser::ReportColors; use super::rule::Rule; use super::source::Cursor; use super::source::Source; +use super::source::SourceFile; use super::source::Token; use super::util; @@ -61,12 +63,12 @@ impl Parser for LangParser { let doc = LangDocument::new(source.clone(), parent); // Insert semantics into state - if let Some(semantics) = state.shared.semantics.as_ref() + if let (Some(_), Some(semantics)) = (source.clone().downcast_rc::().ok(), state.shared.semantics.as_ref()) { let mut b = semantics.borrow_mut(); - if !b.contains_key(&source) + if !b.sems.contains_key(&source) { - b.insert(source.clone(), Semantics::new(source.clone())); + b.sems.insert(source.clone(), SemanticsData::new(source.clone())); } } diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 84cd9ed..a1a7d9c 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -21,7 +21,7 @@ use crate::document::element::ContainerElement; use crate::document::element::ElemKind; use crate::document::element::Element; use crate::elements::paragraph::Paragraph; -use crate::lsp::semantic::Semantics; +use crate::lsp::semantic::SemanticsHolder; use crate::lua::kernel::Kernel; use crate::lua::kernel::KernelHolder; use crate::parser::source::SourceFile; @@ -72,8 +72,8 @@ pub struct SharedState { /// The custom styles pub custom_styles: RefCell, - /// The semantic map - pub semantics: Option, Semantics>>>, + /// The semantics + pub semantics: Option>, } impl SharedState { @@ -85,7 +85,7 @@ impl SharedState { styles: RefCell::new(StyleHolder::default()), layouts: RefCell::new(LayoutHolder::default()), custom_styles: RefCell::new(CustomStyleHolder::default()), - semantics: enable_semantics.then_some(RefCell::new(HashMap::new())), + semantics: enable_semantics.then_some(RefCell::new(SemanticsHolder::new())), }; // Register default kernel diff --git a/src/parser/source.rs b/src/parser/source.rs index 449b740..c2edcab 100644 --- a/src/parser/source.rs +++ b/src/parser/source.rs @@ -7,7 +7,7 @@ use downcast_rs::impl_downcast; use downcast_rs::Downcast; /// Trait for source content -pub trait Source: Downcast { +pub trait Source: Downcast + Debug { /// Gets the source's location fn location(&self) -> Option<&Token>; /// Gets the source's name @@ -23,12 +23,6 @@ impl core::fmt::Display for dyn Source { } } -impl core::fmt::Debug for dyn Source { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Source{{{}}}", self.name()) - } -} - impl std::cmp::PartialEq for dyn Source { fn eq(&self, other: &Self) -> bool { self.name() == other.name() @@ -43,6 +37,7 @@ impl std::hash::Hash for dyn Source { } } +#[derive(Debug)] pub struct SourceFile { location: Option, path: String, @@ -74,6 +69,11 @@ impl SourceFile { content, } } + + pub fn path(&self) -> &String + { + &self.path + } } impl Source for SourceFile { @@ -88,6 +88,7 @@ impl Source for SourceFile { } } +#[derive(Debug)] pub struct VirtualSource { location: Token, name: String, @@ -181,7 +182,7 @@ impl LineCursor { pub fn move_to(&mut self, pos: usize) { if self.pos < pos { let start = self.pos; - let mut it = self.source.content().as_str()[start..] // pos+1 + let mut it = self.source.content().as_str()[start..] .chars() .peekable(); @@ -193,7 +194,7 @@ impl LineCursor { let c = it.next().unwrap(); let len = c.len_utf8(); - if self.pos != 0 && prev == Some('\n') { + if self.pos != start && prev == Some('\n') { self.line += 1; self.line_pos = 0; } @@ -201,12 +202,37 @@ impl LineCursor { self.pos += len; prev = Some(c); } - if self.pos != 0 && prev == Some('\n') { + if self.pos != start && prev == Some('\n') { self.line += 1; self.line_pos = 0; } } else if self.pos > pos { - panic!("Going back is not supported"); + panic!(); + let start = self.pos; + let mut it = self.source.content().as_str()[..start] + .chars() + .rev() + .peekable(); + + let mut prev = self.source.content().as_str()[start..] + .chars() + .next(); + while self.pos > pos { + let c = it.next().unwrap(); + let len = c.len_utf8(); + + if self.pos != start && prev == Some('\n') { + self.line -= 1; + self.line_pos = 0; + } + self.line_pos -= 1; + self.pos -= len; + prev = Some(c); + } + if self.pos != start && prev == Some('\n') { + self.line -= 1; + self.line_pos = 0; + } } // May fail if pos is not utf8-aligned diff --git a/src/server.rs b/src/server.rs index 50b067f..b7c332a 100644 --- a/src/server.rs +++ b/src/server.rs @@ -42,16 +42,23 @@ impl Backend { // Which will require a dyn Document to work let source = Rc::new(SourceFile::with_content(params.uri.to_string(), params.text.clone(), None)); let parser = LangParser::default(); - let (doc, state) = parser.parse(ParserState::new_with_semantics(&parser, None), source.clone(), None); + let (_doc, state) = parser.parse(ParserState::new_with_semantics(&parser, None), source.clone(), None); - if let Some(sems) = state.shared.semantics.as_ref().map(|sems| { - std::cell::RefMut::filter_map(sems.borrow_mut(), |sems| sems.get_mut(&(source as Rc))) - .ok() - .unwrap() - }) { - self.semantic_token_map - .insert(params.uri.to_string(), sems.tokens.borrow().to_owned()); - }; + if let Some(sems) = state.shared.semantics.as_ref() + { + let borrow = sems.borrow(); + for (source, sem) in &borrow.sems + { + if let Some(path) = source.clone().downcast_rc::() + .ok() + .map(|source| source.path().to_owned()) + { + self.semantic_token_map + .insert(path, sem.tokens.replace(vec![])); + } + } + + } } } @@ -59,7 +66,6 @@ impl Backend { impl LanguageServer for Backend { async fn initialize(&self, params: InitializeParams) -> Result { Ok(InitializeResult { - server_info: None, capabilities: ServerCapabilities { text_document_sync: Some(TextDocumentSyncCapability::Kind( TextDocumentSyncKind::FULL, @@ -98,6 +104,10 @@ impl LanguageServer for Backend { ), ..ServerCapabilities::default() }, + server_info: Some(ServerInfo { + name: "nmlls".into(), + version: Some("0.1".into()) + }), }) }