nml/src/lsp/semantic.rs

520 lines
13 KiB
Rust

use std::cell::Ref;
use std::cell::RefCell;
use std::collections::VecDeque;
use std::ops::Range;
use std::rc::Rc;
use tower_lsp::lsp_types::SemanticToken;
use tower_lsp::lsp_types::SemanticTokenModifier;
use tower_lsp::lsp_types::SemanticTokenType;
use crate::parser::source::LineCursor;
use crate::parser::source::Source;
use crate::parser::source::SourceFile;
use crate::parser::source::SourcePosition;
use crate::parser::source::VirtualSource;
use super::data::LSPData;
pub const TOKEN_TYPE: &[SemanticTokenType] = &[
SemanticTokenType::NAMESPACE,
SemanticTokenType::TYPE,
SemanticTokenType::CLASS,
SemanticTokenType::ENUM,
SemanticTokenType::INTERFACE,
SemanticTokenType::STRUCT,
SemanticTokenType::TYPE_PARAMETER,
SemanticTokenType::PARAMETER,
SemanticTokenType::VARIABLE,
SemanticTokenType::PROPERTY,
SemanticTokenType::ENUM_MEMBER,
SemanticTokenType::EVENT,
SemanticTokenType::FUNCTION,
SemanticTokenType::METHOD,
SemanticTokenType::MACRO,
SemanticTokenType::KEYWORD,
SemanticTokenType::MODIFIER,
SemanticTokenType::COMMENT,
SemanticTokenType::STRING,
SemanticTokenType::NUMBER,
SemanticTokenType::REGEXP,
SemanticTokenType::OPERATOR,
SemanticTokenType::DECORATOR,
];
pub const TOKEN_MODIFIERS: &[SemanticTokenModifier] = &[
SemanticTokenModifier::DECLARATION,
SemanticTokenModifier::DEFINITION,
SemanticTokenModifier::READONLY,
SemanticTokenModifier::STATIC,
SemanticTokenModifier::DEPRECATED,
SemanticTokenModifier::ABSTRACT,
SemanticTokenModifier::ASYNC,
SemanticTokenModifier::MODIFICATION,
SemanticTokenModifier::DOCUMENTATION,
SemanticTokenModifier::DEFAULT_LIBRARY,
];
fn token_index(name: &str) -> u32 {
TOKEN_TYPE
.iter()
.enumerate()
.find(|(_, token)| token.as_str() == name)
.map(|(index, _)| index as u32)
.unwrap_or(0)
}
fn modifier_index(name: &str) -> u32 {
TOKEN_MODIFIERS
.iter()
.enumerate()
.find(|(_, token)| token.as_str() == name)
.map(|(index, _)| index as u32)
.unwrap_or(0)
}
macro_rules! token {
($key:expr) => {
{
(token_index($key), 0)
}
};
($key:expr, $($mods:tt),*) => {
{
let mut bitset : u32 = 0;
$(
bitset |= 1 << modifier_index($mods);
)*
(token_index($key), bitset)
}
};
}
/// Predefined list of tokens
#[derive(Debug)]
pub struct Tokens {
pub section_heading: (u32, u32),
pub section_reference: (u32, u32),
pub section_kind: (u32, u32),
pub section_name: (u32, u32),
pub prop_equal: (u32, u32),
pub prop_comma: (u32, u32),
pub prop_name: (u32, u32),
pub prop_value: (u32, u32),
pub comment: (u32, u32),
pub link_display_sep: (u32, u32),
pub link_url_sep: (u32, u32),
pub link_url: (u32, u32),
pub style_marker: (u32, u32),
pub customstyle_marker: (u32, u32),
pub import_import: (u32, u32),
pub import_as_sep: (u32, u32),
pub import_as: (u32, u32),
pub import_path: (u32, u32),
pub reference_operator: (u32, u32),
pub reference_link_sep: (u32, u32),
pub reference_doc_sep: (u32, u32),
pub reference_doc: (u32, u32),
pub reference_link: (u32, u32),
pub reference_props_sep: (u32, u32),
pub variable_operator: (u32, u32),
pub variable_kind: (u32, u32),
pub variable_name: (u32, u32),
pub variable_sep: (u32, u32),
pub variable_value: (u32, u32),
pub variable_sub_sep: (u32, u32),
pub variable_sub_name: (u32, u32),
pub elemstyle_operator: (u32, u32),
pub elemstyle_name: (u32, u32),
pub elemstyle_equal: (u32, u32),
pub elemstyle_value: (u32, u32),
pub code_sep: (u32, u32),
pub code_props_sep: (u32, u32),
pub code_lang: (u32, u32),
pub code_title: (u32, u32),
pub code_content: (u32, u32),
pub script_sep: (u32, u32),
pub script_kernel_sep: (u32, u32),
pub script_kernel: (u32, u32),
pub script_kind: (u32, u32),
pub script_content: (u32, u32),
pub list_bullet: (u32, u32),
pub list_props_sep: (u32, u32),
pub blockquote_marker: (u32, u32),
pub blockquote_props_sep: (u32, u32),
pub raw_sep: (u32, u32),
pub raw_props_sep: (u32, u32),
pub raw_content: (u32, u32),
pub tex_sep: (u32, u32),
pub tex_props_sep: (u32, u32),
pub tex_content: (u32, u32),
pub graph_sep: (u32, u32),
pub graph_props_sep: (u32, u32),
pub graph_content: (u32, u32),
pub layout_sep: (u32, u32),
pub layout_token: (u32, u32),
pub layout_props_sep: (u32, u32),
pub layout_type: (u32, u32),
pub toc_sep: (u32, u32),
pub toc_token: (u32, u32),
pub toc_title: (u32, u32),
pub media_sep: (u32, u32),
pub media_refname_sep: (u32, u32),
pub media_refname: (u32, u32),
pub media_uri_sep: (u32, u32),
pub media_uri: (u32, u32),
pub media_props_sep: (u32, u32),
}
impl Tokens {
pub fn new() -> Self {
Self {
section_heading: token!("number"),
section_reference: token!("enum", "async"),
section_kind: token!("enum"),
section_name: token!("string"),
prop_equal: token!("operator"),
prop_comma: token!("operator"),
prop_name: token!("class"),
prop_value: token!("enum"),
comment: token!("comment"),
link_display_sep: token!("macro"),
link_url_sep: token!("macro"),
link_url: token!("function", "readonly", "abstract", "abstract"),
style_marker: token!("operator"),
customstyle_marker: token!("operator"),
import_import: token!("macro"),
import_as_sep: token!("operator"),
import_as: token!("operator"),
import_path: token!("parameter"),
reference_operator: token!("operator"),
reference_link_sep: token!("operator"),
reference_doc_sep: token!("function"),
reference_doc: token!("function"),
reference_link: token!("macro"),
reference_props_sep: token!("operator"),
variable_operator: token!("operator"),
variable_kind: token!("operator"),
variable_name: token!("macro"),
variable_sep: token!("operator"),
variable_value: token!("parameter"),
variable_sub_sep: token!("operator"),
variable_sub_name: token!("macro"),
elemstyle_operator: token!("operator"),
elemstyle_name: token!("macro"),
elemstyle_equal: token!("operator"),
elemstyle_value: token!("number"),
code_sep: token!("operator"),
code_props_sep: token!("operator"),
code_lang: token!("function"),
code_title: token!("number"),
code_content: token!("string"),
script_sep: token!("operator"),
script_kernel_sep: token!("operator"),
script_kernel: token!("function"),
script_kind: token!("function"),
script_content: token!("string"),
list_bullet: token!("macro"),
list_props_sep: token!("operator"),
blockquote_marker: token!("macro"),
blockquote_props_sep: token!("operator"),
raw_sep: token!("operator"),
raw_props_sep: token!("operator"),
raw_content: token!("string"),
tex_sep: token!("modifier"),
tex_props_sep: token!("operator"),
tex_content: token!("string"),
graph_sep: token!("modifier"),
graph_props_sep: token!("operator"),
graph_content: token!("string"),
layout_sep: token!("number"),
layout_token: token!("number"),
layout_props_sep: token!("operator"),
layout_type: token!("function"),
toc_sep: token!("number"),
toc_token: token!("number"),
toc_title: token!("function"),
media_sep: token!("macro"),
media_refname_sep: token!("macro"),
media_refname: token!("enum"),
media_uri_sep: token!("macro"),
media_uri: token!("function"),
media_props_sep: token!("operator"),
}
}
}
/// Per file semantic tokens
#[derive(Debug)]
pub struct SemanticsData {
/// The current cursor
cursor: RefCell<LineCursor>,
/// Semantic tokens that can't be added directly
pub semantic_queue: RefCell<VecDeque<(Range<usize>, (u32, u32))>>,
/// Semantic tokens
pub tokens: RefCell<Vec<SemanticToken>>,
}
impl SemanticsData {
pub fn new(source: Rc<dyn Source>) -> Self {
Self {
cursor: RefCell::new(LineCursor::new(source)),
semantic_queue: RefCell::new(VecDeque::new()),
tokens: RefCell::new(vec![]),
}
}
}
#[derive(Debug)]
pub struct Semantics<'a> {
pub(self) sems: Ref<'a, SemanticsData>,
// The source used when resolving the parent source
pub(self) original_source: Rc<dyn Source>,
/// The resolved parent source
pub(self) source: Rc<dyn Source>,
}
impl<'a> Semantics<'a> {
fn from_source_impl(
source: Rc<dyn Source>,
lsp: &'a Option<RefCell<LSPData>>,
original_source: Rc<dyn Source>,
) -> Option<(Self, Ref<'a, Tokens>)> {
if (source.name().starts_with(":LUA:") || source.name().starts_with(":VAR:"))
&& source.downcast_ref::<VirtualSource>().is_some()
{
return None;
}
if let Some(location) = source
.clone()
.downcast_rc::<VirtualSource>()
.ok()
.as_ref()
.map(|parent| parent.location())
.unwrap_or(None)
{
return Self::from_source_impl(location.source(), lsp, original_source);
} else if let Ok(source) = source.clone().downcast_rc::<SourceFile>() {
return Ref::filter_map(lsp.as_ref().unwrap().borrow(), |lsp: &LSPData| {
lsp.semantic_data.get(&(source.clone() as Rc<dyn Source>))
})
.ok()
.map(|sems| {
(
Self {
sems,
source,
original_source,
},
Ref::map(lsp.as_ref().unwrap().borrow(), |lsp: &LSPData| {
&lsp.semantic_tokens
}),
)
});
}
None
}
pub fn from_source(
source: Rc<dyn Source>,
lsp: &'a Option<RefCell<LSPData>>,
) -> Option<(Self, Ref<'a, Tokens>)> {
if lsp.is_none() {
return None;
}
Self::from_source_impl(source.clone(), lsp, source)
}
/// Method that should be called at the end of parsing
///
/// This function will process the end of the semantic queue
pub fn on_document_end(lsp: &'a Option<RefCell<LSPData>>, source: Rc<dyn Source>) {
if source.content().is_empty() {
return;
}
let pos = source.original_position(source.content().len() - 1).1;
if let Some((sems, _)) = Self::from_source(source, lsp) {
sems.process_queue(pos);
}
}
/// Processes the semantic queue up to a certain position
fn process_queue(&self, pos: usize) {
let mut queue = self.sems.semantic_queue.borrow_mut();
while !queue.is_empty() {
let (range, token) = queue.front().unwrap();
if range.start > pos {
break;
}
self.add_impl(range.to_owned(), token.to_owned());
queue.pop_front();
}
}
fn add_impl(&self, range: Range<usize>, token: (u32, u32)) {
let mut tokens = self.sems.tokens.borrow_mut();
let mut cursor = self.sems.cursor.borrow_mut();
let mut current = cursor.clone();
cursor.move_to(range.start);
while cursor.pos != range.end {
let end = self.source.content()[cursor.pos..range.end]
.find('\n')
.unwrap_or(self.source.content().len() - 1)
+ 1;
let len = usize::min(range.end - cursor.pos, end);
let clen = self.source.content()[cursor.pos..cursor.pos + len]
.chars()
.fold(0, |acc, _| acc + 1);
let delta_line = cursor.line - current.line;
let delta_start = if delta_line == 0 {
cursor.line_pos - current.line_pos
} else {
cursor.line_pos
};
tokens.push(SemanticToken {
delta_line: delta_line as u32,
delta_start: delta_start as u32,
length: clen as u32,
token_type: token.0,
token_modifiers_bitset: token.1,
});
if cursor.pos + len == range.end {
break;
}
current = cursor.clone();
let pos = cursor.pos;
cursor.move_to(pos + len);
}
}
/// Add a semantic token to be processed instantly
pub fn add(&self, range: Range<usize>, token: (u32, u32)) {
let range = self.original_source.original_range(range).1;
eprintln!(
"Added {token:#?} range={range:#?} source={:#?}",
self.original_source
);
self.process_queue(range.start);
self.add_impl(range, token);
}
/// Add a semantic token to be processed in a future call to `add()`
pub fn add_to_queue(&self, range: Range<usize>, token: (u32, u32)) {
let range = self.original_source.original_range(range).1;
let mut queue = self.sems.semantic_queue.borrow_mut();
match queue.binary_search_by_key(&range.start, |(range, _)| range.start) {
Ok(pos) | Err(pos) => queue.insert(pos, (range, token)),
}
}
}
#[cfg(test)]
pub mod tests {
#[macro_export]
macro_rules! validate_semantics {
($state:expr, $source:expr, $idx:expr,) => {};
($state:expr, $source:expr, $idx:expr, $token_name:ident { $($field:ident == $value:expr),* }; $($tail:tt)*) => {{
let token = $state.shared.lsp
.as_ref()
.unwrap()
.borrow()
.semantic_data
.get(&($source as std::rc::Rc<dyn crate::parser::source::Source>))
.unwrap()
.tokens
.borrow()
[$idx];
let token_type = $state.shared.lsp
.as_ref()
.unwrap()
.borrow()
.semantic_tokens
.$token_name;
let found_token = (token.token_type, token.token_modifiers_bitset);
assert!(found_token == token_type, "Invalid token at index {}, expected {}{token_type:#?}, got: {found_token:#?}",
$idx, stringify!($token_name));
$(
let val = &token.$field;
assert!(*val == $value, "Invalid field {} at index {}, expected {:#?}, found {:#?}",
stringify!($field),
$idx,
$value,
val);
)*
validate_semantics!($state, $source, ($idx+1), $($tail)*);
}};
($state:expr, $source:expr, $idx:expr, $token_name:ident; $($tail:tt)*) => {{
let token = $state.shared.semantics
.as_ref()
.unwrap()
.borrow()
.sems
.get(&($source as std::rc::Rc<dyn crate::parser::source::Source>))
.unwrap()
.tokens
.borrow()
[$idx];
let token_type = $state.shared.semantics
.as_ref()
.unwrap()
.borrow()
.tokens
.$token_name;
let found_token = (token.token_type, token.token_modifiers_bitset);
assert!(found_token == token_type, "Invalid token at index {}, expected {}{token_type:#?}, got: {found_token:#?}",
$idx, stringify!($token_name));
validate_semantics!($state, $source, ($idx+1), $($tail)*);
}};
}
}