Downgrade line-index due to text sync regression (#967)

latex-lsp · Nov 23, 2023 · a4aa31c · a4aa31c
1 parent 867dbc9
commit a4aa31c
Show file tree

Hide file tree

Showing 11 changed files with 244 additions and 21 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/base-db/Cargo.toml b/crates/base-db/Cargo.toml
@@ -11,7 +11,7 @@ bibtex-utils = { path = "../bibtex-utils" }
 dirs = "5.0.1"
 distro = { path = "../distro" }
 itertools = "0.11.0"
-line-index = "0.1.0"
+line-index = { path = "../line-index" }
 log = "0.4.20"
 notify = "6.0.1"
 once_cell = "1.18.0"

diff --git a/crates/bibfmt/Cargo.toml b/crates/bibfmt/Cargo.toml
@@ -9,7 +9,7 @@ rust-version.workspace = true
 [dependencies]
 rowan = "0.15.13"
 syntax = { path = "../syntax" }
-line-index = "0.1.0"
+line-index = { path = "../line-index" }
 
 [lib]
 doctest = false

diff --git a/crates/completion/Cargo.toml b/crates/completion/Cargo.toml
@@ -10,7 +10,7 @@ rust-version.workspace = true
 base-db = { path = "../base-db" }
 completion-data = { path = "../completion-data" }
 fuzzy-matcher = { version = "0.3.7", features = ["compact"] }
-line-index = "0.1.0"
+line-index = { path = "../line-index" }
 rayon = "1.7.0"
 rowan = "0.15.13"
 rustc-hash = "1.1.0"

diff --git a/crates/diagnostics/Cargo.toml b/crates/diagnostics/Cargo.toml
@@ -9,7 +9,7 @@ rust-version.workspace = true
 [dependencies]
 base-db = { path = "../base-db" }
 itertools = "0.11.0"
-line-index = "0.1.0"
+line-index = { path = "../line-index" }
 rowan = "0.15.13"
 rustc-hash = "1.1.0"
 syntax = { path = "../syntax" }

diff --git a/crates/line-index/Cargo.toml b/crates/line-index/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "line-index"
+version = "0.0.0"
+license.workspace = true
+authors.workspace = true
+edition.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+rustc-hash = "1.1.0"
+text-size = "1.1.1"
+
+[lib]
+doctest = false
diff --git a/crates/line-index/src/lib.rs b/crates/line-index/src/lib.rs
@@ -0,0 +1,217 @@
+// The following code has been copied from rust-analyzer.
+
+//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
+//! representation.
+use std::iter;
+
+use rustc_hash::FxHashMap;
+use text_size::{TextRange, TextSize};
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct LineIndex {
+    /// Offset the the beginning of each line, zero-based
+    pub newlines: Vec<TextSize>,
+    /// List of non-ASCII characters on each line
+    pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub struct LineColUtf16 {
+    /// Zero-based
+    pub line: u32,
+    /// Zero-based
+    pub col: u32,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub struct LineCol {
+    /// Zero-based
+    pub line: u32,
+    /// Zero-based utf8 offset
+    pub col: u32,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+pub(crate) struct Utf16Char {
+    /// Start offset of a character inside a line, zero-based
+    pub(crate) start: TextSize,
+    /// End offset of a character inside a line, zero-based
+    pub(crate) end: TextSize,
+}
+
+impl Utf16Char {
+    /// Returns the length in 8-bit UTF-8 code units.
+    fn len(&self) -> TextSize {
+        self.end - self.start
+    }
+
+    /// Returns the length in 16-bit UTF-16 code units.
+    fn len_utf16(&self) -> usize {
+        if self.len() == TextSize::from(4) {
+            2
+        } else {
+            1
+        }
+    }
+}
+
+impl LineIndex {
+    pub fn new(text: &str) -> LineIndex {
+        let mut utf16_lines = FxHashMap::default();
+        let mut utf16_chars = Vec::new();
+
+        let mut newlines = vec![0.into()];
+        let mut curr_row = 0.into();
+        let mut curr_col = 0.into();
+        let mut line = 0;
+        for c in text.chars() {
+            let c_len = TextSize::of(c);
+            curr_row += c_len;
+            if c == '\n' {
+                newlines.push(curr_row);
+
+                // Save any utf-16 characters seen in the previous line
+                if !utf16_chars.is_empty() {
+                    utf16_lines.insert(line, utf16_chars);
+                    utf16_chars = Vec::new();
+                }
+
+                // Prepare for processing the next line
+                curr_col = 0.into();
+                line += 1;
+                continue;
+            }
+
+            if !c.is_ascii() {
+                utf16_chars.push(Utf16Char {
+                    start: curr_col,
+                    end: curr_col + c_len,
+                });
+            }
+
+            curr_col += c_len;
+        }
+
+        // Save any utf-16 characters seen in the last line
+        if !utf16_chars.is_empty() {
+            utf16_lines.insert(line, utf16_chars);
+        }
+
+        LineIndex {
+            newlines,
+            utf16_lines,
+        }
+    }
+
+    pub fn line_col(&self, offset: TextSize) -> LineCol {
+        let line = partition_point(&self.newlines, |&it| it <= offset) - 1;
+        let line_start_offset = self.newlines[line];
+        let col = offset - line_start_offset;
+        LineCol {
+            line: line as u32,
+            col: col.into(),
+        }
+    }
+
+    pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {
+        Some(self.newlines[line_col.line as usize] + TextSize::from(line_col.col))
+    }
+
+    pub fn to_utf16(&self, line_col: LineCol) -> Option<LineColUtf16> {
+        let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into());
+        Some(LineColUtf16 {
+            line: line_col.line,
+            col: col as u32,
+        })
+    }
+
+    pub fn to_utf8(&self, line_col: LineColUtf16) -> Option<LineCol> {
+        let col = self.utf16_to_utf8_col(line_col.line, line_col.col);
+        Some(LineCol {
+            line: line_col.line,
+            col: col.into(),
+        })
+    }
+
+    pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
+        let lo = partition_point(&self.newlines, |&it| it < range.start());
+        let hi = partition_point(&self.newlines, |&it| it <= range.end());
+        let all = iter::once(range.start())
+            .chain(self.newlines[lo..hi].iter().copied())
+            .chain(iter::once(range.end()));
+
+        all.clone()
+            .zip(all.skip(1))
+            .map(|(lo, hi)| TextRange::new(lo, hi))
+            .filter(|it| !it.is_empty())
+    }
+
+    fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
+        let mut res: usize = col.into();
+        if let Some(utf16_chars) = self.utf16_lines.get(&line) {
+            for c in utf16_chars {
+                if c.end <= col {
+                    res -= usize::from(c.len()) - c.len_utf16();
+                } else {
+                    // From here on, all utf16 characters come *after* the character we are mapping,
+                    // so we don't need to take them into account
+                    break;
+                }
+            }
+        }
+        res
+    }
+
+    fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
+        if let Some(utf16_chars) = self.utf16_lines.get(&line) {
+            for c in utf16_chars {
+                if col > u32::from(c.start) {
+                    col += u32::from(c.len()) - c.len_utf16() as u32;
+                } else {
+                    // From here on, all utf16 characters come *after* the character we are mapping,
+                    // so we don't need to take them into account
+                    break;
+                }
+            }
+        }
+
+        col.into()
+    }
+}
+
+/// Returns `idx` such that:
+///
+/// ```text
+///     ∀ x in slice[..idx]:  pred(x)
+///  && ∀ x in slice[idx..]: !pred(x)
+/// ```
+///
+/// https://github.com/rust-lang/rust/issues/73831
+fn partition_point<T, P>(slice: &[T], mut pred: P) -> usize
+where
+    P: FnMut(&T) -> bool,
+{
+    let mut left = 0;
+    let mut right = slice.len();
+
+    while left != right {
+        let mid = left + (right - left) / 2;
+        // SAFETY:
+        // When left < right, left <= mid < right.
+        // Therefore left always increases and right always decreases,
+        // and either of them is selected.
+        // In both cases left <= right is satisfied.
+        // Therefore if left < right in a step,
+        // left <= right is satisfied in the next step.
+        // Therefore as long as left != right, 0 <= left < right <= len is satisfied
+        // and if this case 0 <= mid < len is satisfied too.
+        let value = unsafe { slice.get_unchecked(mid) };
+        if pred(value) {
+            left = mid + 1;
+        } else {
+            right = mid;
+        }
+    }
+
+    left
+}
diff --git a/crates/symbols/Cargo.toml b/crates/symbols/Cargo.toml
@@ -13,7 +13,7 @@ doctest = false
 base-db = { path = "../base-db" }
 distro = { path = "../distro" }
 itertools = "0.11.0"
-line-index = "0.1.0"
+line-index = { path = "../line-index" }
 rowan = "0.15.13"
 syntax = { path = "../syntax" }
 titlecase = "2.2.1"

diff --git a/crates/test-utils/Cargo.toml b/crates/test-utils/Cargo.toml
@@ -9,7 +9,7 @@ rust-version.workspace = true
 [dependencies]
 base-db = { path = "../base-db" }
 distro = { path = "../distro" }
-line-index = "0.1.0"
+line-index = { path = "../line-index" }
 rowan = "0.15.13"
 url = "=2.3.1"
 

diff --git a/crates/texlab/Cargo.toml b/crates/texlab/Cargo.toml
@@ -41,7 +41,7 @@ folding = { path = "../folding" }
 highlights = { path = "../highlights" }
 hover = { path = "../hover" }
 inlay-hints = { path = "../inlay-hints" }
-line-index = "0.1.0"
+line-index = { path = "../line-index" }
 links = { path = "../links" }
 log = "0.4.19"
 lsp-server = "0.7.4"

diff --git a/crates/texlab/src/util/line_index_ext.rs b/crates/texlab/src/util/line_index_ext.rs
@@ -1,4 +1,4 @@
-use line_index::{LineCol, LineIndex, WideEncoding, WideLineCol};
+use line_index::{LineCol, LineColUtf16, LineIndex};
 use lsp_types::{Position, Range};
 use rowan::{TextRange, TextSize};
 
@@ -14,12 +14,12 @@ pub trait LineIndexExt {
 
 impl LineIndexExt for LineIndex {
     fn offset_lsp(&self, line_col: Position) -> Option<TextSize> {
-        let line_col = WideLineCol {
+        let line_col = LineColUtf16 {
             line: line_col.line,
             col: line_col.character,
         };
 
-        let line_col = self.to_utf8(WideEncoding::Utf16, line_col)?;
+        let line_col = self.to_utf8(line_col)?;
         self.offset(line_col)
     }
 
@@ -31,7 +31,7 @@ impl LineIndexExt for LineIndex {
 
     fn line_col_lsp(&self, offset: TextSize) -> Option<Position> {
         let line_col = self.line_col(offset);
-        let line_col = self.to_wide(WideEncoding::Utf16, line_col)?;
+        let line_col = self.to_utf16(line_col)?;
         Some(Position::new(line_col.line, line_col.col))
     }