nml/src/lsp/semantic.rs

407 lines
10 KiB
Rust
Raw Normal View History

2024-10-19 21:35:18 +02:00
use std::cell::Ref;
2024-10-18 12:43:51 +02:00
use std::cell::RefCell;
2024-10-19 21:35:18 +02:00
use std::collections::HashMap;
2024-10-18 12:43:51 +02:00
use std::ops::Range;
use std::rc::Rc;
2024-07-21 15:56:56 +02:00
2024-10-16 23:42:49 +02:00
use tower_lsp::lsp_types::SemanticToken;
2024-10-18 12:43:51 +02:00
use tower_lsp::lsp_types::SemanticTokenModifier;
2024-10-16 23:42:49 +02:00
use tower_lsp::lsp_types::SemanticTokenType;
2024-07-21 15:56:56 +02:00
2024-10-18 12:43:51 +02:00
use crate::parser::source::LineCursor;
use crate::parser::source::Source;
2024-10-19 21:35:18 +02:00
use crate::parser::source::SourceFile;
use crate::parser::source::VirtualSource;
2024-07-21 15:56:56 +02:00
2024-10-18 12:43:51 +02:00
pub const TOKEN_TYPE: &[SemanticTokenType] = &[
2024-10-19 21:35:18 +02:00
SemanticTokenType::NAMESPACE,
SemanticTokenType::TYPE,
SemanticTokenType::CLASS,
SemanticTokenType::ENUM,
SemanticTokenType::INTERFACE,
SemanticTokenType::STRUCT,
SemanticTokenType::TYPE_PARAMETER,
SemanticTokenType::PARAMETER,
SemanticTokenType::VARIABLE,
SemanticTokenType::PROPERTY,
SemanticTokenType::ENUM_MEMBER,
SemanticTokenType::EVENT,
SemanticTokenType::FUNCTION,
SemanticTokenType::METHOD,
SemanticTokenType::MACRO,
SemanticTokenType::KEYWORD,
SemanticTokenType::MODIFIER,
SemanticTokenType::COMMENT,
SemanticTokenType::STRING,
SemanticTokenType::NUMBER,
SemanticTokenType::REGEXP,
SemanticTokenType::OPERATOR,
SemanticTokenType::DECORATOR,
2024-10-18 12:43:51 +02:00
];
2024-07-21 15:56:56 +02:00
2024-10-18 12:43:51 +02:00
pub const TOKEN_MODIFIERS: &[SemanticTokenModifier] = &[
2024-10-19 21:35:18 +02:00
SemanticTokenModifier::DECLARATION,
SemanticTokenModifier::DEFINITION,
SemanticTokenModifier::READONLY,
SemanticTokenModifier::STATIC,
SemanticTokenModifier::DEPRECATED,
SemanticTokenModifier::ABSTRACT,
SemanticTokenModifier::ASYNC,
SemanticTokenModifier::MODIFICATION,
SemanticTokenModifier::DOCUMENTATION,
SemanticTokenModifier::DEFAULT_LIBRARY,
2024-10-18 12:43:51 +02:00
];
2024-10-16 23:42:49 +02:00
2024-10-19 21:35:18 +02:00
fn token_index(name: &str) -> u32 {
TOKEN_TYPE
.iter()
2024-10-18 12:43:51 +02:00
.enumerate()
.find(|(_, token)| token.as_str() == name)
.map(|(index, _)| index as u32)
.unwrap_or(0)
}
2024-10-19 21:35:18 +02:00
fn modifier_index(name: &str) -> u32 {
TOKEN_MODIFIERS
.iter()
2024-10-18 12:43:51 +02:00
.enumerate()
.find(|(_, token)| token.as_str() == name)
.map(|(index, _)| index as u32)
.unwrap_or(0)
}
macro_rules! token {
($key:expr) => {
{
(token_index($key), 0)
}
};
2024-10-19 21:35:18 +02:00
($key:expr, $($mods:tt),*) => {
2024-10-18 12:43:51 +02:00
{
let mut bitset : u32 = 0;
$(
bitset |= 1 << modifier_index($mods);
)*
2024-10-19 21:35:18 +02:00
(token_index($key), bitset)
2024-10-18 12:43:51 +02:00
}
};
2024-10-16 23:42:49 +02:00
}
/// Predefined list of tokens
2024-10-18 12:43:51 +02:00
#[derive(Debug)]
2024-10-19 21:35:18 +02:00
pub struct Tokens {
2024-10-18 12:43:51 +02:00
pub section_heading: (u32, u32),
pub section_reference: (u32, u32),
pub section_kind: (u32, u32),
pub section_name: (u32, u32),
2024-10-19 21:35:18 +02:00
pub comment: (u32, u32),
pub link_display_sep: (u32, u32),
pub link_url_sep: (u32, u32),
pub link_url: (u32, u32),
pub style_marker: (u32, u32),
pub import_import: (u32, u32),
pub import_as_sep: (u32, u32),
pub import_as: (u32, u32),
pub import_path: (u32, u32),
pub reference_operator: (u32, u32),
pub reference_link_sep: (u32, u32),
pub reference_doc_sep: (u32, u32),
pub reference_doc: (u32, u32),
pub reference_link: (u32, u32),
pub reference_props_sep: (u32, u32),
pub reference_props: (u32, u32),
pub variable_operator: (u32, u32),
pub variable_kind: (u32, u32),
pub variable_name: (u32, u32),
pub variable_sep: (u32, u32),
pub variable_value: (u32, u32),
pub variable_sub_sep: (u32, u32),
pub variable_sub_name: (u32, u32),
pub code_sep: (u32, u32),
pub code_props_sep: (u32, u32),
pub code_props: (u32, u32),
pub code_lang: (u32, u32),
pub code_title: (u32, u32),
pub code_content: (u32, u32),
2024-10-20 12:25:52 +02:00
pub script_sep: (u32, u32),
pub script_kernel_sep: (u32, u32),
pub script_kernel: (u32, u32),
pub script_kind: (u32, u32),
pub script_content: (u32, u32),
pub list_bullet: (u32, u32),
pub list_props_sep: (u32, u32),
pub list_props: (u32, u32),
2024-10-18 12:43:51 +02:00
}
2024-07-21 15:56:56 +02:00
2024-10-19 21:35:18 +02:00
impl Tokens {
pub fn new() -> Self {
2024-10-18 12:43:51 +02:00
Self {
2024-10-19 21:35:18 +02:00
section_heading: token!("number"),
section_reference: token!("enum", "async"),
section_kind: token!("enum"),
section_name: token!("string"),
comment: token!("comment"),
link_display_sep: token!("macro"),
link_url_sep: token!("macro"),
2024-10-19 22:02:10 +02:00
link_url: token!("function", "readonly", "abstract", "abstract"),
2024-10-19 21:35:18 +02:00
style_marker: token!("operator"),
import_import: token!("macro"),
import_as_sep: token!("operator"),
import_as: token!("operator"),
import_path: token!("function"),
reference_operator: token!("operator"),
reference_link_sep: token!("operator"),
reference_doc_sep: token!("function"),
reference_doc: token!("function"),
reference_link: token!("macro"),
reference_props_sep: token!("operator"),
reference_props: token!("enum"),
variable_operator: token!("operator"),
variable_kind: token!("operator"),
variable_name: token!("macro"),
variable_sep: token!("operator"),
variable_value: token!("function"),
variable_sub_sep: token!("operator"),
variable_sub_name: token!("macro"),
code_sep: token!("operator"),
code_props_sep: token!("operator"),
code_props: token!("enum"),
code_lang: token!("function"),
code_title: token!("number"),
code_content: token!("string"),
2024-10-20 12:25:52 +02:00
script_sep: token!("operator"),
script_kernel_sep: token!("operator"),
script_kernel: token!("function"),
script_kind: token!("function"),
script_content: token!("string"),
list_bullet: token!("macro"),
list_props_sep: token!("operator"),
list_props: token!("enum"),
2024-10-18 12:43:51 +02:00
}
2024-10-16 23:42:49 +02:00
}
2024-10-18 12:43:51 +02:00
}
2024-07-21 15:56:56 +02:00
2024-10-19 21:35:18 +02:00
/// Per file semantic tokens
2024-10-18 12:43:51 +02:00
#[derive(Debug)]
2024-10-19 21:35:18 +02:00
pub struct SemanticsData {
2024-10-18 12:43:51 +02:00
/// The current cursor
cursor: RefCell<LineCursor>,
/// Semantic tokens
pub tokens: RefCell<Vec<SemanticToken>>,
2024-07-21 15:56:56 +02:00
}
2024-10-19 22:02:10 +02:00
impl SemanticsData {
pub fn new(source: Rc<dyn Source>) -> Self {
2024-10-18 12:43:51 +02:00
Self {
cursor: RefCell::new(LineCursor::new(source)),
tokens: RefCell::new(vec![]),
}
}
2024-10-19 21:35:18 +02:00
}
#[derive(Debug)]
pub struct Semantics<'a> {
pub(self) sems: Ref<'a, SemanticsData>,
pub(self) source: Rc<dyn Source>,
pub(self) range: Range<usize>,
}
2024-07-21 15:56:56 +02:00
2024-10-19 21:35:18 +02:00
impl<'a> Semantics<'a> {
fn from_source_impl(
2024-10-18 12:43:51 +02:00
source: Rc<dyn Source>,
2024-10-19 21:35:18 +02:00
semantics: &'a Option<RefCell<SemanticsHolder>>,
2024-10-19 22:02:10 +02:00
range: Range<usize>,
) -> Option<(Self, Ref<'a, Tokens>)> {
2024-10-20 12:25:52 +02:00
if source.name().starts_with(":LUA:") && source.downcast_ref::<VirtualSource>().is_some() {
return None;
}
2024-10-19 21:35:18 +02:00
if let Some(location) = source
.clone()
.downcast_rc::<VirtualSource>()
.ok()
.as_ref()
.map(|parent| parent.location())
.unwrap_or(None)
{
return Self::from_source_impl(location.source(), semantics, range);
} else if let Some(source) = source.clone().downcast_rc::<SourceFile>().ok() {
return Ref::filter_map(
semantics.as_ref().unwrap().borrow(),
|semantics: &SemanticsHolder| {
semantics.sems.get(&(source.clone() as Rc<dyn Source>))
},
)
.ok()
.map(|sems| {
(
Self {
sems,
source,
range,
},
Ref::map(
semantics.as_ref().unwrap().borrow(),
|semantics: &SemanticsHolder| &semantics.tokens,
),
)
});
}
return None;
}
2024-10-19 22:02:10 +02:00
2024-10-19 21:35:18 +02:00
pub fn from_source(
source: Rc<dyn Source>,
semantics: &'a Option<RefCell<SemanticsHolder>>,
) -> Option<(Self, Ref<'a, Tokens>)> {
if semantics.is_none() {
return None;
}
let range = source.location().map_or_else(
|| 0..source.content().len(),
2024-10-19 22:02:10 +02:00
|location| location.range.clone(),
);
2024-10-19 21:35:18 +02:00
return Self::from_source_impl(source, semantics, range);
}
pub fn add(&self, range: Range<usize>, token: (u32, u32)) {
2024-10-19 22:02:10 +02:00
let range = self.range.start + range.start..self.range.start + range.end;
2024-10-19 21:35:18 +02:00
let mut tokens = self.sems.tokens.borrow_mut();
let mut cursor = self.sems.cursor.borrow_mut();
2024-10-18 12:43:51 +02:00
let mut current = cursor.clone();
cursor.move_to(range.start);
while cursor.pos != range.end {
2024-10-19 21:35:18 +02:00
let end = self.source.content()[cursor.pos..range.end]
2024-10-18 12:43:51 +02:00
.find('\n')
2024-10-20 12:25:52 +02:00
.unwrap_or(self.source.content().len() - 1)
+ 1;
2024-10-18 12:43:51 +02:00
let len = usize::min(range.end - cursor.pos, end);
2024-10-20 09:03:17 +02:00
let clen = self.source.content()[cursor.pos..cursor.pos + len]
.chars()
.fold(0, |acc, c| acc + c.len_utf16());
2024-10-18 12:43:51 +02:00
let delta_line = cursor.line - current.line;
let delta_start = if delta_line == 0 {
2024-10-19 21:35:18 +02:00
cursor.line_pos - current.line_pos
2024-10-18 12:43:51 +02:00
} else {
cursor.line_pos
};
tokens.push(SemanticToken {
delta_line: delta_line as u32,
delta_start: delta_start as u32,
2024-10-18 14:04:15 +02:00
length: clen as u32,
2024-10-18 12:43:51 +02:00
token_type: token.0,
2024-10-19 21:35:18 +02:00
token_modifiers_bitset: token.1,
2024-10-18 12:43:51 +02:00
});
2024-10-19 22:02:10 +02:00
if cursor.pos + len == range.end {
2024-10-19 21:35:18 +02:00
break;
}
2024-10-18 12:43:51 +02:00
current = cursor.clone();
let pos = cursor.pos;
cursor.move_to(pos + len);
}
}
2024-07-21 15:56:56 +02:00
}
2024-10-18 14:04:15 +02:00
2024-10-19 21:35:18 +02:00
#[derive(Debug)]
pub struct SemanticsHolder {
pub tokens: Tokens,
pub sems: HashMap<Rc<dyn Source>, SemanticsData>,
}
impl SemanticsHolder {
pub fn new() -> Self {
Self {
tokens: Tokens::new(),
sems: HashMap::new(),
}
}
}
2024-10-18 14:04:15 +02:00
#[cfg(test)]
pub mod tests {
#[macro_export]
macro_rules! validate_semantics {
($state:expr, $source:expr, $idx:expr,) => {};
($state:expr, $source:expr, $idx:expr, $token_name:ident { $($field:ident == $value:expr),* }; $($tail:tt)*) => {{
let token = $state.shared.semantics
.as_ref()
.unwrap()
.borrow()
2024-10-19 21:35:18 +02:00
.sems
2024-10-18 14:04:15 +02:00
.get(&($source as Rc<dyn Source>))
.unwrap()
.tokens
.borrow()
[$idx];
let token_type = $state.shared.semantics
.as_ref()
.unwrap()
.borrow()
2024-10-19 21:35:18 +02:00
.tokens
2024-10-18 14:04:15 +02:00
.$token_name;
let found_token = (token.token_type, token.token_modifiers_bitset);
assert!(found_token == token_type, "Invalid token at index {}, expected {}{token_type:#?}, got: {found_token:#?}",
$idx, stringify!($token_name));
$(
let val = &token.$field;
assert!(*val == $value, "Invalid field {} at index {}, expected {:#?}, found {:#?}",
stringify!($field),
$idx,
$value,
val);
)*
validate_semantics!($state, $source, ($idx+1), $($tail)*);
}};
($state:expr, $source:expr, $idx:expr, $token_name:ident; $($tail:tt)*) => {{
let token = $state.shared.semantics
.as_ref()
.unwrap()
.borrow()
2024-10-19 21:35:18 +02:00
.sems
2024-10-18 14:04:15 +02:00
.get(&($source as Rc<dyn Source>))
.unwrap()
.tokens
.borrow()
[$idx];
let token_type = $state.shared.semantics
.as_ref()
.unwrap()
.borrow()
2024-10-19 21:35:18 +02:00
.tokens
2024-10-18 14:04:15 +02:00
.$token_name;
let found_token = (token.token_type, token.token_modifiers_bitset);
assert!(found_token == token_type, "Invalid token at index {}, expected {}{token_type:#?}, got: {found_token:#?}",
$idx, stringify!($token_name));
validate_semantics!($state, $source, ($idx+1), $($tail)*);
}};
}
}