From a8366ab34b033d55809dbf9e947bd3f997a42a95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20F=C3=B6rster?= Date: Wed, 22 Nov 2023 21:36:06 +0100 Subject: [PATCH] Downgrade line-index due to text sync regression --- Cargo.lock | 12 +- crates/base-db/Cargo.toml | 2 +- crates/bibfmt/Cargo.toml | 2 +- crates/completion/Cargo.toml | 2 +- crates/diagnostics/Cargo.toml | 2 +- crates/line-index/Cargo.toml | 14 ++ crates/line-index/src/lib.rs | 217 +++++++++++++++++++++++ crates/symbols/Cargo.toml | 2 +- crates/test-utils/Cargo.toml | 2 +- crates/texlab/Cargo.toml | 2 +- crates/texlab/src/util/line_index_ext.rs | 8 +- 11 files changed, 244 insertions(+), 21 deletions(-) create mode 100644 crates/line-index/Cargo.toml create mode 100644 crates/line-index/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 8769b104..e8de4e35 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -850,11 +850,9 @@ checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] name = "line-index" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ee59ea2e50e61bae4f09eee6416820f2ba9796d6e924c86f6e4f699dcb1a7ac" +version = "0.0.0" dependencies = [ - "nohash-hasher", + "rustc-hash", "text-size", ] @@ -982,12 +980,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "nohash-hasher" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" - [[package]] name = "notify" version = "6.1.1" diff --git a/crates/base-db/Cargo.toml b/crates/base-db/Cargo.toml index 9217a735..ba471e8d 100644 --- a/crates/base-db/Cargo.toml +++ b/crates/base-db/Cargo.toml @@ -11,7 +11,7 @@ bibtex-utils = { path = "../bibtex-utils" } dirs = "5.0.1" distro = { path = "../distro" } itertools = "0.11.0" -line-index = "0.1.0" +line-index = { path = "../line-index" } log = "0.4.20" notify = "6.0.1" once_cell = "1.18.0" diff --git a/crates/bibfmt/Cargo.toml b/crates/bibfmt/Cargo.toml index 3babf31c..18a4eb6d 100644 --- a/crates/bibfmt/Cargo.toml +++ b/crates/bibfmt/Cargo.toml @@ -9,7 +9,7 @@ rust-version.workspace = true [dependencies] rowan = "0.15.13" syntax = { path = "../syntax" } -line-index = "0.1.0" +line-index = { path = "../line-index" } [lib] doctest = false diff --git a/crates/completion/Cargo.toml b/crates/completion/Cargo.toml index d3638407..03f8e4d5 100644 --- a/crates/completion/Cargo.toml +++ b/crates/completion/Cargo.toml @@ -10,7 +10,7 @@ rust-version.workspace = true base-db = { path = "../base-db" } completion-data = { path = "../completion-data" } fuzzy-matcher = { version = "0.3.7", features = ["compact"] } -line-index = "0.1.0" +line-index = { path = "../line-index" } rayon = "1.7.0" rowan = "0.15.13" rustc-hash = "1.1.0" diff --git a/crates/diagnostics/Cargo.toml b/crates/diagnostics/Cargo.toml index 3b985e6f..18577230 100644 --- a/crates/diagnostics/Cargo.toml +++ b/crates/diagnostics/Cargo.toml @@ -9,7 +9,7 @@ rust-version.workspace = true [dependencies] base-db = { path = "../base-db" } itertools = "0.11.0" -line-index = "0.1.0" +line-index = { path = "../line-index" } rowan = "0.15.13" rustc-hash = "1.1.0" syntax = { path = "../syntax" } diff --git a/crates/line-index/Cargo.toml b/crates/line-index/Cargo.toml new file mode 100644 index 00000000..73d2766d --- /dev/null +++ b/crates/line-index/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "line-index" +version = "0.0.0" +license.workspace = true +authors.workspace = true +edition.workspace = true +rust-version.workspace = true + +[dependencies] +rustc-hash = "1.1.0" +text-size = "1.1.1" + +[lib] +doctest = false diff --git a/crates/line-index/src/lib.rs b/crates/line-index/src/lib.rs new file mode 100644 index 00000000..20161d49 --- /dev/null +++ b/crates/line-index/src/lib.rs @@ -0,0 +1,217 @@ +// The following code has been copied from rust-analyzer. + +//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)` +//! representation. +use std::iter; + +use rustc_hash::FxHashMap; +use text_size::{TextRange, TextSize}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LineIndex { + /// Offset the the beginning of each line, zero-based + pub newlines: Vec, + /// List of non-ASCII characters on each line + pub(crate) utf16_lines: FxHashMap>, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct LineColUtf16 { + /// Zero-based + pub line: u32, + /// Zero-based + pub col: u32, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct LineCol { + /// Zero-based + pub line: u32, + /// Zero-based utf8 offset + pub col: u32, +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub(crate) struct Utf16Char { + /// Start offset of a character inside a line, zero-based + pub(crate) start: TextSize, + /// End offset of a character inside a line, zero-based + pub(crate) end: TextSize, +} + +impl Utf16Char { + /// Returns the length in 8-bit UTF-8 code units. + fn len(&self) -> TextSize { + self.end - self.start + } + + /// Returns the length in 16-bit UTF-16 code units. + fn len_utf16(&self) -> usize { + if self.len() == TextSize::from(4) { + 2 + } else { + 1 + } + } +} + +impl LineIndex { + pub fn new(text: &str) -> LineIndex { + let mut utf16_lines = FxHashMap::default(); + let mut utf16_chars = Vec::new(); + + let mut newlines = vec![0.into()]; + let mut curr_row = 0.into(); + let mut curr_col = 0.into(); + let mut line = 0; + for c in text.chars() { + let c_len = TextSize::of(c); + curr_row += c_len; + if c == '\n' { + newlines.push(curr_row); + + // Save any utf-16 characters seen in the previous line + if !utf16_chars.is_empty() { + utf16_lines.insert(line, utf16_chars); + utf16_chars = Vec::new(); + } + + // Prepare for processing the next line + curr_col = 0.into(); + line += 1; + continue; + } + + if !c.is_ascii() { + utf16_chars.push(Utf16Char { + start: curr_col, + end: curr_col + c_len, + }); + } + + curr_col += c_len; + } + + // Save any utf-16 characters seen in the last line + if !utf16_chars.is_empty() { + utf16_lines.insert(line, utf16_chars); + } + + LineIndex { + newlines, + utf16_lines, + } + } + + pub fn line_col(&self, offset: TextSize) -> LineCol { + let line = partition_point(&self.newlines, |&it| it <= offset) - 1; + let line_start_offset = self.newlines[line]; + let col = offset - line_start_offset; + LineCol { + line: line as u32, + col: col.into(), + } + } + + pub fn offset(&self, line_col: LineCol) -> Option { + Some(self.newlines[line_col.line as usize] + TextSize::from(line_col.col)) + } + + pub fn to_utf16(&self, line_col: LineCol) -> Option { + let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into()); + Some(LineColUtf16 { + line: line_col.line, + col: col as u32, + }) + } + + pub fn to_utf8(&self, line_col: LineColUtf16) -> Option { + let col = self.utf16_to_utf8_col(line_col.line, line_col.col); + Some(LineCol { + line: line_col.line, + col: col.into(), + }) + } + + pub fn lines(&self, range: TextRange) -> impl Iterator + '_ { + let lo = partition_point(&self.newlines, |&it| it < range.start()); + let hi = partition_point(&self.newlines, |&it| it <= range.end()); + let all = iter::once(range.start()) + .chain(self.newlines[lo..hi].iter().copied()) + .chain(iter::once(range.end())); + + all.clone() + .zip(all.skip(1)) + .map(|(lo, hi)| TextRange::new(lo, hi)) + .filter(|it| !it.is_empty()) + } + + fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize { + let mut res: usize = col.into(); + if let Some(utf16_chars) = self.utf16_lines.get(&line) { + for c in utf16_chars { + if c.end <= col { + res -= usize::from(c.len()) - c.len_utf16(); + } else { + // From here on, all utf16 characters come *after* the character we are mapping, + // so we don't need to take them into account + break; + } + } + } + res + } + + fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { + if let Some(utf16_chars) = self.utf16_lines.get(&line) { + for c in utf16_chars { + if col > u32::from(c.start) { + col += u32::from(c.len()) - c.len_utf16() as u32; + } else { + // From here on, all utf16 characters come *after* the character we are mapping, + // so we don't need to take them into account + break; + } + } + } + + col.into() + } +} + +/// Returns `idx` such that: +/// +/// ```text +/// ∀ x in slice[..idx]: pred(x) +/// && ∀ x in slice[idx..]: !pred(x) +/// ``` +/// +/// https://github.com/rust-lang/rust/issues/73831 +fn partition_point(slice: &[T], mut pred: P) -> usize +where + P: FnMut(&T) -> bool, +{ + let mut left = 0; + let mut right = slice.len(); + + while left != right { + let mid = left + (right - left) / 2; + // SAFETY: + // When left < right, left <= mid < right. + // Therefore left always increases and right always decreases, + // and either of them is selected. + // In both cases left <= right is satisfied. + // Therefore if left < right in a step, + // left <= right is satisfied in the next step. + // Therefore as long as left != right, 0 <= left < right <= len is satisfied + // and if this case 0 <= mid < len is satisfied too. + let value = unsafe { slice.get_unchecked(mid) }; + if pred(value) { + left = mid + 1; + } else { + right = mid; + } + } + + left +} diff --git a/crates/symbols/Cargo.toml b/crates/symbols/Cargo.toml index 3f72d905..1c587464 100644 --- a/crates/symbols/Cargo.toml +++ b/crates/symbols/Cargo.toml @@ -13,7 +13,7 @@ doctest = false base-db = { path = "../base-db" } distro = { path = "../distro" } itertools = "0.11.0" -line-index = "0.1.0" +line-index = { path = "../line-index" } rowan = "0.15.13" syntax = { path = "../syntax" } titlecase = "2.2.1" diff --git a/crates/test-utils/Cargo.toml b/crates/test-utils/Cargo.toml index fccebe87..8d8788d3 100644 --- a/crates/test-utils/Cargo.toml +++ b/crates/test-utils/Cargo.toml @@ -9,7 +9,7 @@ rust-version.workspace = true [dependencies] base-db = { path = "../base-db" } distro = { path = "../distro" } -line-index = "0.1.0" +line-index = { path = "../line-index" } rowan = "0.15.13" url = "=2.3.1" diff --git a/crates/texlab/Cargo.toml b/crates/texlab/Cargo.toml index 84d3b26a..8af60a9d 100644 --- a/crates/texlab/Cargo.toml +++ b/crates/texlab/Cargo.toml @@ -41,7 +41,7 @@ folding = { path = "../folding" } highlights = { path = "../highlights" } hover = { path = "../hover" } inlay-hints = { path = "../inlay-hints" } -line-index = "0.1.0" +line-index = { path = "../line-index" } links = { path = "../links" } log = "0.4.19" lsp-server = "0.7.4" diff --git a/crates/texlab/src/util/line_index_ext.rs b/crates/texlab/src/util/line_index_ext.rs index 3905f843..8e6bb6da 100644 --- a/crates/texlab/src/util/line_index_ext.rs +++ b/crates/texlab/src/util/line_index_ext.rs @@ -1,4 +1,4 @@ -use line_index::{LineCol, LineIndex, WideEncoding, WideLineCol}; +use line_index::{LineCol, LineColUtf16, LineIndex}; use lsp_types::{Position, Range}; use rowan::{TextRange, TextSize}; @@ -14,12 +14,12 @@ pub trait LineIndexExt { impl LineIndexExt for LineIndex { fn offset_lsp(&self, line_col: Position) -> Option { - let line_col = WideLineCol { + let line_col = LineColUtf16 { line: line_col.line, col: line_col.character, }; - let line_col = self.to_utf8(WideEncoding::Utf16, line_col)?; + let line_col = self.to_utf8(line_col)?; self.offset(line_col) } @@ -31,7 +31,7 @@ impl LineIndexExt for LineIndex { fn line_col_lsp(&self, offset: TextSize) -> Option { let line_col = self.line_col(offset); - let line_col = self.to_wide(WideEncoding::Utf16, line_col)?; + let line_col = self.to_utf16(line_col)?; Some(Position::new(line_col.line, line_col.col)) }