From f1de61409a73dc122ba0870f09d9a012085ee92d Mon Sep 17 00:00:00 2001 From: ef3d0c3e Date: Sun, 20 Oct 2024 09:03:17 +0200 Subject: [PATCH] Fix unicode width --- Cargo.toml | 1 - src/elements/style.rs | 8 ++++---- src/lsp/semantic.rs | 6 +++--- src/parser/source.rs | 10 ++++------ 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4ad8676..e2f2d5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,6 @@ tower-lsp = "0.20.0" unicode-segmentation = "1.11.0" walkdir = "2.5.0" runtime-format = "0.1.3" -unicode-width = "0.2.0" [dev-dependencies] rand = "0.8.5" diff --git a/src/elements/style.rs b/src/elements/style.rs index ab82574..10360d5 100644 --- a/src/elements/style.rs +++ b/src/elements/style.rs @@ -377,8 +377,8 @@ terminated here%% let source = Rc::new(SourceFile::with_content( "".to_string(), r#" -**test** `another` -__test__ *another* +**te📫st** `another` +__teかst__ *another* "# .to_string(), None, @@ -388,12 +388,12 @@ __test__ *another* validate_semantics!(state, source.clone(), 0, style_marker { delta_line == 1, delta_start == 0, length == 2 }; - style_marker { delta_line == 0, delta_start == 6, length == 2 }; + style_marker { delta_line == 0, delta_start == 6 + '📫'.len_utf16() as u32, length == 2 }; style_marker { delta_line == 0, delta_start == 3, length == 1 }; style_marker { delta_line == 0, delta_start == 8, length == 1 }; style_marker { delta_line == 1, delta_start == 0, length == 2 }; - style_marker { delta_line == 0, delta_start == 6, length == 2 }; + style_marker { delta_line == 0, delta_start == 6 + 'か'.len_utf16() as u32, length == 2 }; style_marker { delta_line == 0, delta_start == 3, length == 1 }; style_marker { delta_line == 0, delta_start == 8, length == 1 }; ); diff --git a/src/lsp/semantic.rs b/src/lsp/semantic.rs index d4f2457..880dc6c 100644 --- a/src/lsp/semantic.rs +++ b/src/lsp/semantic.rs @@ -1,6 +1,5 @@ use std::cell::Ref; use std::cell::RefCell; -use std::cell::RefMut; use std::collections::HashMap; use std::ops::Range; use std::rc::Rc; @@ -8,7 +7,6 @@ use std::rc::Rc; use tower_lsp::lsp_types::SemanticToken; use tower_lsp::lsp_types::SemanticTokenModifier; use tower_lsp::lsp_types::SemanticTokenType; -use unicode_width::UnicodeWidthStr; use crate::parser::source::LineCursor; use crate::parser::source::Source; @@ -252,7 +250,9 @@ impl<'a> Semantics<'a> { .find('\n') .unwrap_or(self.source.content().len() - cursor.pos); let len = usize::min(range.end - cursor.pos, end); - let clen = self.source.content()[cursor.pos..cursor.pos + len].width(); // TODO Fix issue with CJK characters + let clen = self.source.content()[cursor.pos..cursor.pos + len] + .chars() + .fold(0, |acc, c| acc + c.len_utf16()); let delta_line = cursor.line - current.line; let delta_start = if delta_line == 0 { diff --git a/src/parser/source.rs b/src/parser/source.rs index 3e14ae3..262c5f8 100644 --- a/src/parser/source.rs +++ b/src/parser/source.rs @@ -5,7 +5,6 @@ use std::rc::Rc; use downcast_rs::impl_downcast; use downcast_rs::Downcast; -use unicode_width::UnicodeWidthChar; /// Trait for source content pub trait Source: Downcast + Debug { @@ -159,14 +158,13 @@ impl LineCursor { let mut prev = self.source.content().as_str()[..start].chars().rev().next(); while self.pos < pos { let c = it.next().unwrap(); - let len = c.len_utf8(); if self.pos != start && prev == Some('\n') { self.line += 1; self.line_pos = 0; } - self.line_pos += c.width().unwrap_or(1); - self.pos += len; + self.line_pos += c.len_utf16(); + self.pos += c.len_utf8(); prev = Some(c); } if self.pos != start && prev == Some('\n') { @@ -190,8 +188,8 @@ impl LineCursor { self.line -= 1; self.line_pos = 0; } - self.line_pos -= c.width().unwrap_or(1); - self.pos -= len; + self.line_pos -= c.len_utf16(); + self.pos -= c.len_utf8(); prev = Some(c); } if self.pos != start && prev == Some('\n') {