Skip to content

Commit

Permalink
Downgrade line-index due to text sync regression (#967)
Browse files Browse the repository at this point in the history
  • Loading branch information
pfoerster authored Nov 23, 2023
1 parent 867dbc9 commit a4aa31c
Show file tree
Hide file tree
Showing 11 changed files with 244 additions and 21 deletions.
12 changes: 2 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/base-db/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ bibtex-utils = { path = "../bibtex-utils" }
dirs = "5.0.1"
distro = { path = "../distro" }
itertools = "0.11.0"
line-index = "0.1.0"
line-index = { path = "../line-index" }
log = "0.4.20"
notify = "6.0.1"
once_cell = "1.18.0"
Expand Down
2 changes: 1 addition & 1 deletion crates/bibfmt/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ rust-version.workspace = true
[dependencies]
rowan = "0.15.13"
syntax = { path = "../syntax" }
line-index = "0.1.0"
line-index = { path = "../line-index" }

[lib]
doctest = false
Expand Down
2 changes: 1 addition & 1 deletion crates/completion/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ rust-version.workspace = true
base-db = { path = "../base-db" }
completion-data = { path = "../completion-data" }
fuzzy-matcher = { version = "0.3.7", features = ["compact"] }
line-index = "0.1.0"
line-index = { path = "../line-index" }
rayon = "1.7.0"
rowan = "0.15.13"
rustc-hash = "1.1.0"
Expand Down
2 changes: 1 addition & 1 deletion crates/diagnostics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ rust-version.workspace = true
[dependencies]
base-db = { path = "../base-db" }
itertools = "0.11.0"
line-index = "0.1.0"
line-index = { path = "../line-index" }
rowan = "0.15.13"
rustc-hash = "1.1.0"
syntax = { path = "../syntax" }
Expand Down
14 changes: 14 additions & 0 deletions crates/line-index/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "line-index"
version = "0.0.0"
license.workspace = true
authors.workspace = true
edition.workspace = true
rust-version.workspace = true

[dependencies]
rustc-hash = "1.1.0"
text-size = "1.1.1"

[lib]
doctest = false
217 changes: 217 additions & 0 deletions crates/line-index/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
// The following code has been copied from rust-analyzer.

//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
//! representation.
use std::iter;

use rustc_hash::FxHashMap;
use text_size::{TextRange, TextSize};

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct LineIndex {
/// Offset the the beginning of each line, zero-based
pub newlines: Vec<TextSize>,
/// List of non-ASCII characters on each line
pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct LineColUtf16 {
/// Zero-based
pub line: u32,
/// Zero-based
pub col: u32,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct LineCol {
/// Zero-based
pub line: u32,
/// Zero-based utf8 offset
pub col: u32,
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub(crate) struct Utf16Char {
/// Start offset of a character inside a line, zero-based
pub(crate) start: TextSize,
/// End offset of a character inside a line, zero-based
pub(crate) end: TextSize,
}

impl Utf16Char {
/// Returns the length in 8-bit UTF-8 code units.
fn len(&self) -> TextSize {
self.end - self.start
}

/// Returns the length in 16-bit UTF-16 code units.
fn len_utf16(&self) -> usize {
if self.len() == TextSize::from(4) {
2
} else {
1
}
}
}

impl LineIndex {
pub fn new(text: &str) -> LineIndex {
let mut utf16_lines = FxHashMap::default();
let mut utf16_chars = Vec::new();

let mut newlines = vec![0.into()];
let mut curr_row = 0.into();
let mut curr_col = 0.into();
let mut line = 0;
for c in text.chars() {
let c_len = TextSize::of(c);
curr_row += c_len;
if c == '\n' {
newlines.push(curr_row);

// Save any utf-16 characters seen in the previous line
if !utf16_chars.is_empty() {
utf16_lines.insert(line, utf16_chars);
utf16_chars = Vec::new();
}

// Prepare for processing the next line
curr_col = 0.into();
line += 1;
continue;
}

if !c.is_ascii() {
utf16_chars.push(Utf16Char {
start: curr_col,
end: curr_col + c_len,
});
}

curr_col += c_len;
}

// Save any utf-16 characters seen in the last line
if !utf16_chars.is_empty() {
utf16_lines.insert(line, utf16_chars);
}

LineIndex {
newlines,
utf16_lines,
}
}

pub fn line_col(&self, offset: TextSize) -> LineCol {
let line = partition_point(&self.newlines, |&it| it <= offset) - 1;
let line_start_offset = self.newlines[line];
let col = offset - line_start_offset;
LineCol {
line: line as u32,
col: col.into(),
}
}

pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {
Some(self.newlines[line_col.line as usize] + TextSize::from(line_col.col))
}

pub fn to_utf16(&self, line_col: LineCol) -> Option<LineColUtf16> {
let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into());
Some(LineColUtf16 {
line: line_col.line,
col: col as u32,
})
}

pub fn to_utf8(&self, line_col: LineColUtf16) -> Option<LineCol> {
let col = self.utf16_to_utf8_col(line_col.line, line_col.col);
Some(LineCol {
line: line_col.line,
col: col.into(),
})
}

pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
let lo = partition_point(&self.newlines, |&it| it < range.start());
let hi = partition_point(&self.newlines, |&it| it <= range.end());
let all = iter::once(range.start())
.chain(self.newlines[lo..hi].iter().copied())
.chain(iter::once(range.end()));

all.clone()
.zip(all.skip(1))
.map(|(lo, hi)| TextRange::new(lo, hi))
.filter(|it| !it.is_empty())
}

fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
let mut res: usize = col.into();
if let Some(utf16_chars) = self.utf16_lines.get(&line) {
for c in utf16_chars {
if c.end <= col {
res -= usize::from(c.len()) - c.len_utf16();
} else {
// From here on, all utf16 characters come *after* the character we are mapping,
// so we don't need to take them into account
break;
}
}
}
res
}

fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
if let Some(utf16_chars) = self.utf16_lines.get(&line) {
for c in utf16_chars {
if col > u32::from(c.start) {
col += u32::from(c.len()) - c.len_utf16() as u32;
} else {
// From here on, all utf16 characters come *after* the character we are mapping,
// so we don't need to take them into account
break;
}
}
}

col.into()
}
}

/// Returns `idx` such that:
///
/// ```text
/// ∀ x in slice[..idx]: pred(x)
/// && ∀ x in slice[idx..]: !pred(x)
/// ```
///
/// https://github.com/rust-lang/rust/issues/73831
fn partition_point<T, P>(slice: &[T], mut pred: P) -> usize
where
P: FnMut(&T) -> bool,
{
let mut left = 0;
let mut right = slice.len();

while left != right {
let mid = left + (right - left) / 2;
// SAFETY:
// When left < right, left <= mid < right.
// Therefore left always increases and right always decreases,
// and either of them is selected.
// In both cases left <= right is satisfied.
// Therefore if left < right in a step,
// left <= right is satisfied in the next step.
// Therefore as long as left != right, 0 <= left < right <= len is satisfied
// and if this case 0 <= mid < len is satisfied too.
let value = unsafe { slice.get_unchecked(mid) };
if pred(value) {
left = mid + 1;
} else {
right = mid;
}
}

left
}
2 changes: 1 addition & 1 deletion crates/symbols/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ doctest = false
base-db = { path = "../base-db" }
distro = { path = "../distro" }
itertools = "0.11.0"
line-index = "0.1.0"
line-index = { path = "../line-index" }
rowan = "0.15.13"
syntax = { path = "../syntax" }
titlecase = "2.2.1"
Expand Down
2 changes: 1 addition & 1 deletion crates/test-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ rust-version.workspace = true
[dependencies]
base-db = { path = "../base-db" }
distro = { path = "../distro" }
line-index = "0.1.0"
line-index = { path = "../line-index" }
rowan = "0.15.13"
url = "=2.3.1"

Expand Down
2 changes: 1 addition & 1 deletion crates/texlab/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ folding = { path = "../folding" }
highlights = { path = "../highlights" }
hover = { path = "../hover" }
inlay-hints = { path = "../inlay-hints" }
line-index = "0.1.0"
line-index = { path = "../line-index" }
links = { path = "../links" }
log = "0.4.19"
lsp-server = "0.7.4"
Expand Down
8 changes: 4 additions & 4 deletions crates/texlab/src/util/line_index_ext.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use line_index::{LineCol, LineIndex, WideEncoding, WideLineCol};
use line_index::{LineCol, LineColUtf16, LineIndex};
use lsp_types::{Position, Range};
use rowan::{TextRange, TextSize};

Expand All @@ -14,12 +14,12 @@ pub trait LineIndexExt {

impl LineIndexExt for LineIndex {
fn offset_lsp(&self, line_col: Position) -> Option<TextSize> {
let line_col = WideLineCol {
let line_col = LineColUtf16 {
line: line_col.line,
col: line_col.character,
};

let line_col = self.to_utf8(WideEncoding::Utf16, line_col)?;
let line_col = self.to_utf8(line_col)?;
self.offset(line_col)
}

Expand All @@ -31,7 +31,7 @@ impl LineIndexExt for LineIndex {

fn line_col_lsp(&self, offset: TextSize) -> Option<Position> {
let line_col = self.line_col(offset);
let line_col = self.to_wide(WideEncoding::Utf16, line_col)?;
let line_col = self.to_utf16(line_col)?;
Some(Position::new(line_col.line, line_col.col))
}

Expand Down

0 comments on commit a4aa31c

Please sign in to comment.