Skip to content

Commit

Permalink
fix: slight improvements to string indexing with multi-byte chars
Browse files Browse the repository at this point in the history
  • Loading branch information
reubeno committed Nov 29, 2024
1 parent c372508 commit 57fb43c
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 20 deletions.
57 changes: 42 additions & 15 deletions brush-core/src/expansion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ impl Expansion {
pub(crate) fn polymorphic_subslice(&self, index: usize, end: usize) -> Self {
let len = end - index;

// If there are multiple fields, then interpret `index` and `end` as indices
// into the vector of fields.
if self.fields.len() > 1 {
let actual_len = min(len, self.fields.len() - index);
let fields = self.fields[index..(index + actual_len)].to_vec();
Expand All @@ -107,37 +109,62 @@ impl Expansion {
undefined: self.undefined,
}
} else {
// Otherwise, interpret `index` and `end` as indices into the string contents.
let mut fields = vec![];

let mut offset = index;
// Keep track of how far away the interesting data is from the current read offset.
let mut dist_to_slice = index;
// Keep track of how many characters are left to be copied.
let mut left = len;

// Go through fields, copying the interesting parts.
for field in &self.fields {
let mut pieces = vec![];

for piece in &field.0 {
// Stop once we've extracted enough characters.
if left == 0 {
break;
}

// Get the inner string of the piece, and figure out how many
// characters are in it; make sure to get the *character count*
// and not just call `.len()` to get the byte count.
let piece_str = piece.as_str();
if offset < piece_str.len() {
let len_from_this_piece = min(left, piece_str.len() - offset);
let piece_char_count = piece_str.chars().count();

let new_piece = match piece {
ExpansionPiece::Unsplittable(s) => ExpansionPiece::Unsplittable(
s[offset..(offset + len_from_this_piece)].to_owned(),
),
ExpansionPiece::Splittable(s) => ExpansionPiece::Splittable(
s[offset..(offset + len_from_this_piece)].to_owned(),
),
};
// If the interesting data isn't even in this piece yet, then
// continue until we find it.
if dist_to_slice >= piece_char_count {
dist_to_slice -= piece_char_count;
continue;
}

pieces.push(new_piece);
// Figure out how far into this piece we're interested in copying.
let desired_offset_into_this_piece = dist_to_slice;
// Figure out how many characters we're going to use from *this* piece.
let len_from_this_piece =
min(left, piece_char_count - desired_offset_into_this_piece);

let new_piece = match piece {
ExpansionPiece::Unsplittable(s) => ExpansionPiece::Unsplittable(
s.chars()
.skip(desired_offset_into_this_piece)
.take(len_from_this_piece)
.collect(),
),
ExpansionPiece::Splittable(s) => ExpansionPiece::Splittable(
s.chars()
.skip(desired_offset_into_this_piece)
.take(len_from_this_piece)
.collect(),
),
};

left -= len_from_this_piece;
}
pieces.push(new_piece);

offset += piece_str.len();
left -= len_from_this_piece;
dist_to_slice = 0;
}

if !pieces.is_empty() {
Expand Down
5 changes: 2 additions & 3 deletions brush-parser/src/word.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,9 +401,8 @@ pub fn parse_parameter(
word: &str,
options: &ParserOptions,
) -> Result<Parameter, error::WordParseError> {
let pieces = expansion_parser::parameter(word, options)
.map_err(|err| error::WordParseError::Parameter(word.to_owned(), err))?;
Ok(pieces)
expansion_parser::parameter(word, options)
.map_err(|err| error::WordParseError::Parameter(word.to_owned(), err))
}

peg::parser! {
Expand Down
6 changes: 6 additions & 0 deletions brush-shell/tests/cases/word_expansion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,12 @@ cases:
echo "\${var:-1:1}: ${var:-1:1}"
echo "\${var:-3:1}: ${var:-3:1}"
- name: "Substring on string with multi-byte chars"
known_failure: true
stdin: |
var="7❌🖌️✅😂🔴⛔ABC"
echo "${var:2}" | hexdump -C
- name: "Substring operator on arrays"
stdin: |
set abcde fghij klmno pqrst uvwxy z
Expand Down
4 changes: 2 additions & 2 deletions brush-shell/tests/compat_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1122,8 +1122,8 @@ impl TestCase {

Ok(RunResult {
exit_status: cmd_result.status,
stdout: String::from_utf8(cmd_result.stdout)?,
stderr: String::from_utf8(cmd_result.stderr)?,
stdout: String::from_utf8_lossy(cmd_result.stdout.as_slice()).to_string(),
stderr: String::from_utf8_lossy(cmd_result.stderr.as_slice()).to_string(),
duration,
})
}
Expand Down

0 comments on commit 57fb43c

Please sign in to comment.