diff --git a/src/lsp.rs b/src/lsp.rs index 932b402..db26709 100644 --- a/src/lsp.rs +++ b/src/lsp.rs @@ -12,7 +12,7 @@ use crate::config::{apply_setting, Config, Settings}; use crate::consts::{trigger_ptn, NT_RE}; use crate::input::{Input, InputResult, InputState}; use crate::rime::{Candidate, Rime, RimeError, RimeResponse}; -use crate::utils; +use crate::utils::{self, Encoding}; pub struct Backend { client: Client, @@ -20,6 +20,7 @@ pub struct Backend { state: DashMap>, config: RwLock, regex: RwLock, + encoding: RwLock, } impl Backend { @@ -30,6 +31,7 @@ impl Backend { state: DashMap::new(), config: RwLock::new(Config::default()), regex: RwLock::new(NT_RE.clone()), + encoding: RwLock::new(Encoding::default()), } } @@ -139,11 +141,12 @@ impl Backend { async fn get_completions(&self, uri: Url, position: Position) -> Option { // get new input let rope = self.documents.get(uri.as_str())?; + let encoding = self.encoding.read().await.clone(); let line_begin = { let line_pos = Position::new(position.line, 0); - utils::position_to_offset(&rope, line_pos)? + utils::position_to_offset(&rope, line_pos, encoding)? }; - let curr_char = utils::position_to_offset(&rope, position)?; + let curr_char = utils::position_to_offset(&rope, position, encoding)?; let new_input = { let re = self.regex.read().await; let has_trigger = !self.config.read().await.trigger_characters.is_empty(); @@ -297,6 +300,14 @@ impl LanguageServer for Backend { triggers.extend_from_slice(user_triggers); triggers }; + + let encoding_options = params + .capabilities + .general + .and_then(|g| g.position_encodings); + let encoding = utils::select_encoding(encoding_options); + *self.encoding.write().await = encoding; + // return Ok(InitializeResult { server_info: Some(ServerInfo { @@ -304,6 +315,7 @@ impl LanguageServer for Backend { version: Some(env!("CARGO_PKG_VERSION").to_string()), }), capabilities: ServerCapabilities { + position_encoding: Some(PositionEncodingKind::new(encoding.as_str())), text_document_sync: Some(TextDocumentSyncCapability::Kind( TextDocumentSyncKind::INCREMENTAL, )), @@ -344,6 +356,7 @@ impl LanguageServer for Backend { } async fn did_change(&self, params: DidChangeTextDocumentParams) { + let encoding = self.encoding.read().await.clone(); let url = params.text_document.uri; if let Some(mut rope) = self.documents.get_mut(url.as_str()) { for change in params.content_changes { @@ -351,8 +364,8 @@ impl LanguageServer for Backend { match range { // incremental change Some(Range { start, end }) => { - let s = utils::position_to_offset(&rope, start); - let e = utils::position_to_offset(&rope, end); + let s = utils::position_to_offset(&rope, start, encoding); + let e = utils::position_to_offset(&rope, end, encoding); if let (Some(s), Some(e)) = (s, e) { rope.remove(s..e); rope.insert(s, &text); diff --git a/src/utils.rs b/src/utils.rs index 5ad5157..b058170 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,30 +1,71 @@ use ropey::Rope; use std::path::{Path, PathBuf}; -use tower_lsp::lsp_types::Position; +use tower_lsp::lsp_types::{Position, PositionEncodingKind}; use crate::consts::AUTO_TRIGGER_RE; -/// UTF-16 Position -> UTF-8 offset -pub fn position_to_offset(rope: &Rope, position: Position) -> Option { +#[derive(Clone, Copy)] +pub enum Encoding { + UTF8, + UTF16, + UTF32, +} + +impl Encoding { + pub fn as_str(&self) -> &'static str { + match self { + Encoding::UTF8 => "utf-8", + Encoding::UTF16 => "utf-16", + Encoding::UTF32 => "utf-32", + } + } +} + +impl Default for Encoding { + fn default() -> Self { + Encoding::UTF16 + } +} + +pub fn select_encoding(options: Option>) -> Encoding { + match options { + // prefer utf-32 because of no conversion cost + Some(v) if v.contains(&PositionEncodingKind::new("utf-32")) => Encoding::UTF32, + Some(v) if v.contains(&PositionEncodingKind::new("utf-8")) => Encoding::UTF8, + _ => Encoding::default(), + } +} + +/// UTF-16 Position -> char index +pub fn position_to_offset(rope: &Rope, position: Position, encoding: Encoding) -> Option { let (line, col) = (position.line as usize, position.character as usize); // position is at the end of rope if line == rope.len_lines() && col == 0 { return Some(rope.len_chars()); } (line < rope.len_lines()).then_some(line).and_then(|line| { - let col8 = rope.line(line).try_utf16_cu_to_char(col).ok()?; - let offset = rope.try_line_to_char(line).ok()? + col8; + let col_offset = match encoding { + Encoding::UTF8 => rope.line(line).try_byte_to_char(col).ok()?, + Encoding::UTF16 => rope.line(line).try_utf16_cu_to_char(col).ok()?, + Encoding::UTF32 => col, + }; + //let col8 = rope.line(line).try_utf16_cu_to_char(col).ok()?; + let offset = rope.try_line_to_char(line).ok()? + col_offset; Some(offset) }) } -/// UTF-8 offset -> UTF-16 Position -pub fn offset_to_position(rope: &Rope, offset: usize) -> Option { +/// char index -> UTF-16 Position +pub fn offset_to_position(rope: &Rope, offset: usize, encoding: Encoding) -> Option { let line = rope.try_char_to_line(offset).ok()?; - let col8 = offset - rope.try_line_to_char(line).ok()?; + let col_offset = offset - rope.try_line_to_char(line).ok()?; (line < rope.len_lines()).then_some(line).and_then(|line| { - let col16 = rope.line(line).try_char_to_utf16_cu(col8).ok()?; - Some(Position::new(line as u32, col16 as u32)) + let col = match encoding { + Encoding::UTF8 => rope.line(line).try_char_to_byte(col_offset).ok()?, + Encoding::UTF16 => rope.line(line).try_char_to_utf16_cu(col_offset).ok()?, + Encoding::UTF32 => col_offset, + }; + Some(Position::new(line as u32, col as u32)) }) }