Fix unicode width

This commit is contained in:
ef3d0c3e 2024-10-20 09:03:17 +02:00
parent 02b34d5424
commit f1de61409a
4 changed files with 11 additions and 14 deletions

View file

@ -40,7 +40,6 @@ tower-lsp = "0.20.0"
unicode-segmentation = "1.11.0" unicode-segmentation = "1.11.0"
walkdir = "2.5.0" walkdir = "2.5.0"
runtime-format = "0.1.3" runtime-format = "0.1.3"
unicode-width = "0.2.0"
[dev-dependencies] [dev-dependencies]
rand = "0.8.5" rand = "0.8.5"

View file

@ -377,8 +377,8 @@ terminated here%<nml.style.toggle("Italic")>%
let source = Rc::new(SourceFile::with_content( let source = Rc::new(SourceFile::with_content(
"".to_string(), "".to_string(),
r#" r#"
**test** `another` **te📫st** `another`
__test__ *another* __test__ *another*
"# "#
.to_string(), .to_string(),
None, None,
@ -388,12 +388,12 @@ __test__ *another*
validate_semantics!(state, source.clone(), 0, validate_semantics!(state, source.clone(), 0,
style_marker { delta_line == 1, delta_start == 0, length == 2 }; style_marker { delta_line == 1, delta_start == 0, length == 2 };
style_marker { delta_line == 0, delta_start == 6, length == 2 }; style_marker { delta_line == 0, delta_start == 6 + '📫'.len_utf16() as u32, length == 2 };
style_marker { delta_line == 0, delta_start == 3, length == 1 }; style_marker { delta_line == 0, delta_start == 3, length == 1 };
style_marker { delta_line == 0, delta_start == 8, length == 1 }; style_marker { delta_line == 0, delta_start == 8, length == 1 };
style_marker { delta_line == 1, delta_start == 0, length == 2 }; style_marker { delta_line == 1, delta_start == 0, length == 2 };
style_marker { delta_line == 0, delta_start == 6, length == 2 }; style_marker { delta_line == 0, delta_start == 6 + 'か'.len_utf16() as u32, length == 2 };
style_marker { delta_line == 0, delta_start == 3, length == 1 }; style_marker { delta_line == 0, delta_start == 3, length == 1 };
style_marker { delta_line == 0, delta_start == 8, length == 1 }; style_marker { delta_line == 0, delta_start == 8, length == 1 };
); );

View file

@ -1,6 +1,5 @@
use std::cell::Ref; use std::cell::Ref;
use std::cell::RefCell; use std::cell::RefCell;
use std::cell::RefMut;
use std::collections::HashMap; use std::collections::HashMap;
use std::ops::Range; use std::ops::Range;
use std::rc::Rc; use std::rc::Rc;
@ -8,7 +7,6 @@ use std::rc::Rc;
use tower_lsp::lsp_types::SemanticToken; use tower_lsp::lsp_types::SemanticToken;
use tower_lsp::lsp_types::SemanticTokenModifier; use tower_lsp::lsp_types::SemanticTokenModifier;
use tower_lsp::lsp_types::SemanticTokenType; use tower_lsp::lsp_types::SemanticTokenType;
use unicode_width::UnicodeWidthStr;
use crate::parser::source::LineCursor; use crate::parser::source::LineCursor;
use crate::parser::source::Source; use crate::parser::source::Source;
@ -252,7 +250,9 @@ impl<'a> Semantics<'a> {
.find('\n') .find('\n')
.unwrap_or(self.source.content().len() - cursor.pos); .unwrap_or(self.source.content().len() - cursor.pos);
let len = usize::min(range.end - cursor.pos, end); let len = usize::min(range.end - cursor.pos, end);
let clen = self.source.content()[cursor.pos..cursor.pos + len].width(); // TODO Fix issue with CJK characters let clen = self.source.content()[cursor.pos..cursor.pos + len]
.chars()
.fold(0, |acc, c| acc + c.len_utf16());
let delta_line = cursor.line - current.line; let delta_line = cursor.line - current.line;
let delta_start = if delta_line == 0 { let delta_start = if delta_line == 0 {

View file

@ -5,7 +5,6 @@ use std::rc::Rc;
use downcast_rs::impl_downcast; use downcast_rs::impl_downcast;
use downcast_rs::Downcast; use downcast_rs::Downcast;
use unicode_width::UnicodeWidthChar;
/// Trait for source content /// Trait for source content
pub trait Source: Downcast + Debug { pub trait Source: Downcast + Debug {
@ -159,14 +158,13 @@ impl LineCursor {
let mut prev = self.source.content().as_str()[..start].chars().rev().next(); let mut prev = self.source.content().as_str()[..start].chars().rev().next();
while self.pos < pos { while self.pos < pos {
let c = it.next().unwrap(); let c = it.next().unwrap();
let len = c.len_utf8();
if self.pos != start && prev == Some('\n') { if self.pos != start && prev == Some('\n') {
self.line += 1; self.line += 1;
self.line_pos = 0; self.line_pos = 0;
} }
self.line_pos += c.width().unwrap_or(1); self.line_pos += c.len_utf16();
self.pos += len; self.pos += c.len_utf8();
prev = Some(c); prev = Some(c);
} }
if self.pos != start && prev == Some('\n') { if self.pos != start && prev == Some('\n') {
@ -190,8 +188,8 @@ impl LineCursor {
self.line -= 1; self.line -= 1;
self.line_pos = 0; self.line_pos = 0;
} }
self.line_pos -= c.width().unwrap_or(1); self.line_pos -= c.len_utf16();
self.pos -= len; self.pos -= c.len_utf8();
prev = Some(c); prev = Some(c);
} }
if self.pos != start && prev == Some('\n') { if self.pos != start && prev == Some('\n') {