nml/src/lsp/semantic.rs

521 lines
13 KiB
Rust
Raw Normal View History

2024-10-19 21:35:18 +02:00
use std::cell::Ref;
2024-10-18 12:43:51 +02:00
use std::cell::RefCell;
2024-10-25 14:53:31 +02:00
use std::collections::VecDeque;
2024-10-18 12:43:51 +02:00
use std::ops::Range;
use std::rc::Rc;
2024-07-21 15:56:56 +02:00
2024-10-16 23:42:49 +02:00
use tower_lsp::lsp_types::SemanticToken;
2024-10-18 12:43:51 +02:00
use tower_lsp::lsp_types::SemanticTokenModifier;
2024-10-16 23:42:49 +02:00
use tower_lsp::lsp_types::SemanticTokenType;
2024-07-21 15:56:56 +02:00
2024-10-18 12:43:51 +02:00
use crate::parser::source::LineCursor;
use crate::parser::source::Source;
2024-10-19 21:35:18 +02:00
use crate::parser::source::SourceFile;
2024-10-23 12:50:02 +02:00
use crate::parser::source::SourcePosition;
2024-10-19 21:35:18 +02:00
use crate::parser::source::VirtualSource;
2024-07-21 15:56:56 +02:00
2024-10-25 10:05:13 +02:00
use super::data::LSPData;
2024-10-18 12:43:51 +02:00
pub const TOKEN_TYPE: &[SemanticTokenType] = &[
2024-10-19 21:35:18 +02:00
SemanticTokenType::NAMESPACE,
SemanticTokenType::TYPE,
SemanticTokenType::CLASS,
SemanticTokenType::ENUM,
SemanticTokenType::INTERFACE,
SemanticTokenType::STRUCT,
SemanticTokenType::TYPE_PARAMETER,
SemanticTokenType::PARAMETER,
SemanticTokenType::VARIABLE,
SemanticTokenType::PROPERTY,
SemanticTokenType::ENUM_MEMBER,
SemanticTokenType::EVENT,
SemanticTokenType::FUNCTION,
SemanticTokenType::METHOD,
SemanticTokenType::MACRO,
SemanticTokenType::KEYWORD,
SemanticTokenType::MODIFIER,
SemanticTokenType::COMMENT,
SemanticTokenType::STRING,
SemanticTokenType::NUMBER,
SemanticTokenType::REGEXP,
SemanticTokenType::OPERATOR,
SemanticTokenType::DECORATOR,
2024-10-18 12:43:51 +02:00
];
2024-07-21 15:56:56 +02:00
2024-10-18 12:43:51 +02:00
pub const TOKEN_MODIFIERS: &[SemanticTokenModifier] = &[
2024-10-19 21:35:18 +02:00
SemanticTokenModifier::DECLARATION,
SemanticTokenModifier::DEFINITION,
SemanticTokenModifier::READONLY,
SemanticTokenModifier::STATIC,
SemanticTokenModifier::DEPRECATED,
SemanticTokenModifier::ABSTRACT,
SemanticTokenModifier::ASYNC,
SemanticTokenModifier::MODIFICATION,
SemanticTokenModifier::DOCUMENTATION,
SemanticTokenModifier::DEFAULT_LIBRARY,
2024-10-18 12:43:51 +02:00
];
2024-10-16 23:42:49 +02:00
2024-10-19 21:35:18 +02:00
fn token_index(name: &str) -> u32 {
TOKEN_TYPE
.iter()
2024-10-18 12:43:51 +02:00
.enumerate()
.find(|(_, token)| token.as_str() == name)
.map(|(index, _)| index as u32)
.unwrap_or(0)
}
2024-10-19 21:35:18 +02:00
fn modifier_index(name: &str) -> u32 {
TOKEN_MODIFIERS
.iter()
2024-10-18 12:43:51 +02:00
.enumerate()
.find(|(_, token)| token.as_str() == name)
.map(|(index, _)| index as u32)
.unwrap_or(0)
}
macro_rules! token {
($key:expr) => {
{
(token_index($key), 0)
}
};
2024-10-19 21:35:18 +02:00
($key:expr, $($mods:tt),*) => {
2024-10-18 12:43:51 +02:00
{
let mut bitset : u32 = 0;
$(
bitset |= 1 << modifier_index($mods);
)*
2024-10-19 21:35:18 +02:00
(token_index($key), bitset)
2024-10-18 12:43:51 +02:00
}
};
2024-10-16 23:42:49 +02:00
}
/// Predefined list of tokens
2024-10-18 12:43:51 +02:00
#[derive(Debug)]
2024-10-19 21:35:18 +02:00
pub struct Tokens {
2024-10-18 12:43:51 +02:00
pub section_heading: (u32, u32),
pub section_reference: (u32, u32),
pub section_kind: (u32, u32),
pub section_name: (u32, u32),
2024-10-19 21:35:18 +02:00
2024-11-01 22:15:33 +01:00
pub prop_equal: (u32, u32),
pub prop_comma: (u32, u32),
pub prop_name: (u32, u32),
pub prop_value: (u32, u32),
2024-10-19 21:35:18 +02:00
pub comment: (u32, u32),
pub link_display_sep: (u32, u32),
pub link_url_sep: (u32, u32),
pub link_url: (u32, u32),
pub style_marker: (u32, u32),
2024-10-25 10:05:13 +02:00
pub customstyle_marker: (u32, u32),
2024-10-19 21:35:18 +02:00
pub import_import: (u32, u32),
pub import_as_sep: (u32, u32),
pub import_as: (u32, u32),
pub import_path: (u32, u32),
pub reference_operator: (u32, u32),
pub reference_link_sep: (u32, u32),
pub reference_doc_sep: (u32, u32),
pub reference_doc: (u32, u32),
pub reference_link: (u32, u32),
pub reference_props_sep: (u32, u32),
pub variable_operator: (u32, u32),
pub variable_kind: (u32, u32),
pub variable_name: (u32, u32),
pub variable_sep: (u32, u32),
pub variable_value: (u32, u32),
pub variable_sub_sep: (u32, u32),
pub variable_sub_name: (u32, u32),
2024-10-26 17:30:10 +02:00
pub elemstyle_operator: (u32, u32),
pub elemstyle_name: (u32, u32),
pub elemstyle_equal: (u32, u32),
pub elemstyle_value: (u32, u32),
pub code_sep: (u32, u32),
pub code_props_sep: (u32, u32),
pub code_lang: (u32, u32),
pub code_title: (u32, u32),
pub code_content: (u32, u32),
2024-10-20 12:25:52 +02:00
pub script_sep: (u32, u32),
pub script_kernel_sep: (u32, u32),
pub script_kernel: (u32, u32),
pub script_kind: (u32, u32),
pub script_content: (u32, u32),
pub list_bullet: (u32, u32),
pub list_props_sep: (u32, u32),
2024-10-22 21:40:00 +02:00
2024-10-25 14:53:31 +02:00
pub blockquote_marker: (u32, u32),
pub blockquote_props_sep: (u32, u32),
2024-10-22 21:40:00 +02:00
pub raw_sep: (u32, u32),
pub raw_props_sep: (u32, u32),
pub raw_content: (u32, u32),
pub tex_sep: (u32, u32),
pub tex_props_sep: (u32, u32),
pub tex_content: (u32, u32),
2024-10-24 14:49:39 +02:00
pub graph_sep: (u32, u32),
pub graph_props_sep: (u32, u32),
pub graph_content: (u32, u32),
2024-10-22 21:40:00 +02:00
pub layout_sep: (u32, u32),
pub layout_token: (u32, u32),
pub layout_props_sep: (u32, u32),
pub layout_type: (u32, u32),
2024-10-29 10:36:21 +01:00
2024-10-30 11:17:35 +01:00
pub toc_sep: (u32, u32),
pub toc_token: (u32, u32),
pub toc_title: (u32, u32),
2024-10-29 10:36:21 +01:00
pub media_sep: (u32, u32),
pub media_refname_sep: (u32, u32),
pub media_refname: (u32, u32),
pub media_uri_sep: (u32, u32),
pub media_uri: (u32, u32),
pub media_props_sep: (u32, u32),
2024-10-18 12:43:51 +02:00
}
2024-07-21 15:56:56 +02:00
2024-10-19 21:35:18 +02:00
impl Tokens {
pub fn new() -> Self {
2024-10-18 12:43:51 +02:00
Self {
2024-10-19 21:35:18 +02:00
section_heading: token!("number"),
section_reference: token!("enum", "async"),
section_kind: token!("enum"),
section_name: token!("string"),
2024-11-01 22:15:33 +01:00
prop_equal: token!("operator"),
prop_comma: token!("operator"),
prop_name: token!("class"),
prop_value: token!("enum"),
2024-10-19 21:35:18 +02:00
comment: token!("comment"),
link_display_sep: token!("macro"),
link_url_sep: token!("macro"),
2024-10-19 22:02:10 +02:00
link_url: token!("function", "readonly", "abstract", "abstract"),
2024-10-19 21:35:18 +02:00
style_marker: token!("operator"),
2024-10-25 10:05:13 +02:00
customstyle_marker: token!("operator"),
2024-10-19 21:35:18 +02:00
import_import: token!("macro"),
import_as_sep: token!("operator"),
import_as: token!("operator"),
2024-10-22 21:40:00 +02:00
import_path: token!("parameter"),
2024-10-19 21:35:18 +02:00
reference_operator: token!("operator"),
reference_link_sep: token!("operator"),
reference_doc_sep: token!("function"),
reference_doc: token!("function"),
reference_link: token!("macro"),
reference_props_sep: token!("operator"),
variable_operator: token!("operator"),
variable_kind: token!("operator"),
variable_name: token!("macro"),
variable_sep: token!("operator"),
2024-10-22 21:40:00 +02:00
variable_value: token!("parameter"),
variable_sub_sep: token!("operator"),
variable_sub_name: token!("macro"),
2024-10-26 17:30:10 +02:00
elemstyle_operator: token!("operator"),
elemstyle_name: token!("macro"),
elemstyle_equal: token!("operator"),
elemstyle_value: token!("number"),
code_sep: token!("operator"),
code_props_sep: token!("operator"),
code_lang: token!("function"),
code_title: token!("number"),
code_content: token!("string"),
2024-10-20 12:25:52 +02:00
script_sep: token!("operator"),
script_kernel_sep: token!("operator"),
script_kernel: token!("function"),
script_kind: token!("function"),
script_content: token!("string"),
list_bullet: token!("macro"),
list_props_sep: token!("operator"),
2024-10-22 21:40:00 +02:00
2024-10-25 14:53:31 +02:00
blockquote_marker: token!("macro"),
blockquote_props_sep: token!("operator"),
2024-10-22 21:40:00 +02:00
raw_sep: token!("operator"),
raw_props_sep: token!("operator"),
raw_content: token!("string"),
tex_sep: token!("modifier"),
tex_props_sep: token!("operator"),
tex_content: token!("string"),
2024-10-24 14:49:39 +02:00
graph_sep: token!("modifier"),
graph_props_sep: token!("operator"),
graph_content: token!("string"),
2024-10-22 21:40:00 +02:00
layout_sep: token!("number"),
layout_token: token!("number"),
layout_props_sep: token!("operator"),
layout_type: token!("function"),
2024-10-29 10:36:21 +01:00
2024-10-30 11:17:35 +01:00
toc_sep: token!("number"),
toc_token: token!("number"),
toc_title: token!("function"),
2024-10-29 10:36:21 +01:00
media_sep: token!("macro"),
media_refname_sep: token!("macro"),
media_refname: token!("enum"),
media_uri_sep: token!("macro"),
media_uri: token!("function"),
media_props_sep: token!("operator"),
2024-10-18 12:43:51 +02:00
}
2024-10-16 23:42:49 +02:00
}
2024-10-18 12:43:51 +02:00
}
2024-07-21 15:56:56 +02:00
2024-10-19 21:35:18 +02:00
/// Per file semantic tokens
2024-10-18 12:43:51 +02:00
#[derive(Debug)]
2024-10-19 21:35:18 +02:00
pub struct SemanticsData {
2024-10-18 12:43:51 +02:00
/// The current cursor
cursor: RefCell<LineCursor>,
2024-10-25 14:53:31 +02:00
/// Semantic tokens that can't be added directly
pub semantic_queue: RefCell<VecDeque<(Range<usize>, (u32, u32))>>,
2024-10-18 12:43:51 +02:00
/// Semantic tokens
pub tokens: RefCell<Vec<SemanticToken>>,
2024-07-21 15:56:56 +02:00
}
2024-10-19 22:02:10 +02:00
impl SemanticsData {
pub fn new(source: Rc<dyn Source>) -> Self {
2024-10-18 12:43:51 +02:00
Self {
cursor: RefCell::new(LineCursor::new(source)),
2024-10-25 14:53:31 +02:00
semantic_queue: RefCell::new(VecDeque::new()),
2024-10-18 12:43:51 +02:00
tokens: RefCell::new(vec![]),
}
}
2024-10-19 21:35:18 +02:00
}
#[derive(Debug)]
pub struct Semantics<'a> {
pub(self) sems: Ref<'a, SemanticsData>,
2024-10-25 10:05:13 +02:00
// The source used when resolving the parent source
2024-10-22 21:40:00 +02:00
pub(self) original_source: Rc<dyn Source>,
/// The resolved parent source
2024-10-19 21:35:18 +02:00
pub(self) source: Rc<dyn Source>,
}
2024-07-21 15:56:56 +02:00
2024-10-19 21:35:18 +02:00
impl<'a> Semantics<'a> {
fn from_source_impl(
2024-10-18 12:43:51 +02:00
source: Rc<dyn Source>,
2024-10-25 10:05:13 +02:00
lsp: &'a Option<RefCell<LSPData>>,
2024-10-22 21:40:00 +02:00
original_source: Rc<dyn Source>,
2024-10-19 22:02:10 +02:00
) -> Option<(Self, Ref<'a, Tokens>)> {
2024-11-03 10:54:27 +01:00
if (source.name().starts_with(":LUA:") || source.name().starts_with(":VAR:"))
&& source.downcast_ref::<VirtualSource>().is_some()
{
2024-10-20 12:25:52 +02:00
return None;
}
2024-10-19 21:35:18 +02:00
if let Some(location) = source
.clone()
.downcast_rc::<VirtualSource>()
.ok()
.as_ref()
.map(|parent| parent.location())
.unwrap_or(None)
{
2024-10-25 10:05:13 +02:00
return Self::from_source_impl(location.source(), lsp, original_source);
2024-10-24 10:18:49 +02:00
} else if let Ok(source) = source.clone().downcast_rc::<SourceFile>() {
2024-10-26 17:30:10 +02:00
return Ref::filter_map(lsp.as_ref().unwrap().borrow(), |lsp: &LSPData| {
lsp.semantic_data.get(&(source.clone() as Rc<dyn Source>))
})
2024-10-19 21:35:18 +02:00
.ok()
.map(|sems| {
(
Self {
sems,
source,
2024-10-22 21:40:00 +02:00
original_source,
2024-10-19 21:35:18 +02:00
},
2024-10-26 17:30:10 +02:00
Ref::map(lsp.as_ref().unwrap().borrow(), |lsp: &LSPData| {
&lsp.semantic_tokens
}),
2024-10-19 21:35:18 +02:00
)
});
}
2024-10-24 10:18:49 +02:00
None
2024-10-19 21:35:18 +02:00
}
2024-10-19 22:02:10 +02:00
2024-10-19 21:35:18 +02:00
pub fn from_source(
source: Rc<dyn Source>,
2024-10-25 10:05:13 +02:00
lsp: &'a Option<RefCell<LSPData>>,
2024-10-19 21:35:18 +02:00
) -> Option<(Self, Ref<'a, Tokens>)> {
2024-10-25 10:05:13 +02:00
if lsp.is_none() {
2024-10-19 21:35:18 +02:00
return None;
}
2024-10-25 10:05:13 +02:00
Self::from_source_impl(source.clone(), lsp, source)
2024-10-19 21:35:18 +02:00
}
2024-10-25 14:53:31 +02:00
/// Method that should be called at the end of parsing
///
/// This function will process the end of the semantic queue
2024-10-26 17:30:10 +02:00
pub fn on_document_end(lsp: &'a Option<RefCell<LSPData>>, source: Rc<dyn Source>) {
if source.content().is_empty() {
2024-10-25 14:53:31 +02:00
return;
}
let pos = source.original_position(source.content().len() - 1).1;
2024-10-26 17:30:10 +02:00
if let Some((sems, _)) = Self::from_source(source, lsp) {
2024-10-25 14:53:31 +02:00
sems.process_queue(pos);
}
}
/// Processes the semantic queue up to a certain position
2024-10-26 17:30:10 +02:00
fn process_queue(&self, pos: usize) {
2024-10-25 14:53:31 +02:00
let mut queue = self.sems.semantic_queue.borrow_mut();
2024-10-26 17:30:10 +02:00
while !queue.is_empty() {
2024-10-25 14:53:31 +02:00
let (range, token) = queue.front().unwrap();
2024-10-26 17:30:10 +02:00
if range.start > pos {
2024-10-25 14:53:31 +02:00
break;
}
self.add_impl(range.to_owned(), token.to_owned());
queue.pop_front();
}
}
2024-10-26 17:30:10 +02:00
fn add_impl(&self, range: Range<usize>, token: (u32, u32)) {
2024-10-19 21:35:18 +02:00
let mut tokens = self.sems.tokens.borrow_mut();
let mut cursor = self.sems.cursor.borrow_mut();
2024-10-18 12:43:51 +02:00
let mut current = cursor.clone();
cursor.move_to(range.start);
while cursor.pos != range.end {
2024-10-19 21:35:18 +02:00
let end = self.source.content()[cursor.pos..range.end]
2024-10-18 12:43:51 +02:00
.find('\n')
2024-10-20 12:25:52 +02:00
.unwrap_or(self.source.content().len() - 1)
+ 1;
2024-10-18 12:43:51 +02:00
let len = usize::min(range.end - cursor.pos, end);
2024-10-20 09:03:17 +02:00
let clen = self.source.content()[cursor.pos..cursor.pos + len]
.chars()
2024-10-26 17:30:10 +02:00
.fold(0, |acc, _| acc + 1);
2024-10-18 12:43:51 +02:00
let delta_line = cursor.line - current.line;
let delta_start = if delta_line == 0 {
2024-10-19 21:35:18 +02:00
cursor.line_pos - current.line_pos
2024-10-18 12:43:51 +02:00
} else {
cursor.line_pos
};
tokens.push(SemanticToken {
delta_line: delta_line as u32,
delta_start: delta_start as u32,
2024-10-18 14:04:15 +02:00
length: clen as u32,
2024-10-18 12:43:51 +02:00
token_type: token.0,
2024-10-19 21:35:18 +02:00
token_modifiers_bitset: token.1,
2024-10-18 12:43:51 +02:00
});
2024-10-19 22:02:10 +02:00
if cursor.pos + len == range.end {
2024-10-19 21:35:18 +02:00
break;
}
2024-10-18 12:43:51 +02:00
current = cursor.clone();
let pos = cursor.pos;
cursor.move_to(pos + len);
}
}
2024-10-25 14:53:31 +02:00
/// Add a semantic token to be processed instantly
pub fn add(&self, range: Range<usize>, token: (u32, u32)) {
let range = self.original_source.original_range(range).1;
eprintln!(
"Added {token:#?} range={range:#?} source={:#?}",
self.original_source
);
2024-10-25 14:53:31 +02:00
self.process_queue(range.start);
self.add_impl(range, token);
}
/// Add a semantic token to be processed in a future call to `add()`
2024-10-26 17:30:10 +02:00
pub fn add_to_queue(&self, range: Range<usize>, token: (u32, u32)) {
2024-10-25 14:53:31 +02:00
let range = self.original_source.original_range(range).1;
let mut queue = self.sems.semantic_queue.borrow_mut();
2024-10-26 17:30:10 +02:00
match queue.binary_search_by_key(&range.start, |(range, _)| range.start) {
Ok(pos) | Err(pos) => queue.insert(pos, (range, token)),
2024-10-25 14:53:31 +02:00
}
}
2024-07-21 15:56:56 +02:00
}
2024-10-18 14:04:15 +02:00
#[cfg(test)]
pub mod tests {
#[macro_export]
macro_rules! validate_semantics {
($state:expr, $source:expr, $idx:expr,) => {};
($state:expr, $source:expr, $idx:expr, $token_name:ident { $($field:ident == $value:expr),* }; $($tail:tt)*) => {{
2024-10-25 10:05:13 +02:00
let token = $state.shared.lsp
2024-10-18 14:04:15 +02:00
.as_ref()
.unwrap()
.borrow()
2024-10-25 10:05:13 +02:00
.semantic_data
2024-10-24 10:18:49 +02:00
.get(&($source as std::rc::Rc<dyn crate::parser::source::Source>))
2024-10-18 14:04:15 +02:00
.unwrap()
.tokens
.borrow()
[$idx];
2024-10-25 10:05:13 +02:00
let token_type = $state.shared.lsp
2024-10-18 14:04:15 +02:00
.as_ref()
.unwrap()
.borrow()
2024-10-25 10:05:13 +02:00
.semantic_tokens
2024-10-18 14:04:15 +02:00
.$token_name;
let found_token = (token.token_type, token.token_modifiers_bitset);
assert!(found_token == token_type, "Invalid token at index {}, expected {}{token_type:#?}, got: {found_token:#?}",
$idx, stringify!($token_name));
$(
let val = &token.$field;
assert!(*val == $value, "Invalid field {} at index {}, expected {:#?}, found {:#?}",
stringify!($field),
$idx,
$value,
val);
)*
validate_semantics!($state, $source, ($idx+1), $($tail)*);
}};
($state:expr, $source:expr, $idx:expr, $token_name:ident; $($tail:tt)*) => {{
let token = $state.shared.semantics
.as_ref()
.unwrap()
.borrow()
2024-10-19 21:35:18 +02:00
.sems
2024-10-24 10:18:49 +02:00
.get(&($source as std::rc::Rc<dyn crate::parser::source::Source>))
2024-10-18 14:04:15 +02:00
.unwrap()
.tokens
.borrow()
[$idx];
let token_type = $state.shared.semantics
.as_ref()
.unwrap()
.borrow()
2024-10-19 21:35:18 +02:00
.tokens
2024-10-18 14:04:15 +02:00
.$token_name;
let found_token = (token.token_type, token.token_modifiers_bitset);
assert!(found_token == token_type, "Invalid token at index {}, expected {}{token_type:#?}, got: {found_token:#?}",
$idx, stringify!($token_name));
validate_semantics!($state, $source, ($idx+1), $($tail)*);
}};
}
}