Skip to content

Commit

Permalink
fix: Escape regex symbols in all versions of like operator (#25)
Browse files Browse the repository at this point in the history
update
  • Loading branch information
waralexrom committed Oct 31, 2023
1 parent c296882 commit d59ba6c
Showing 1 changed file with 46 additions and 23 deletions.
69 changes: 46 additions & 23 deletions arrow/src/compute/kernels/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,47 @@ pub fn ilike_utf8<OffsetSize: StringOffsetSizeTrait>(
like_utf8_impl(left, right, false, false)
}

fn like_to_regex(pat: &str) -> Result<String> {
let mut is_escaped = false;
let mut re_pattern = String::new();
let regex_chars = "-[]{}()*+?.,^$|#";
for c in pat.chars() {
if is_escaped {
is_escaped = false;
if c == '%' {
re_pattern.push('%');
continue;
} else if c == '_' {
re_pattern.push('_');
continue;
} else if c == '\\' {
re_pattern.push_str("\\\\");
continue;
}
}

if regex_chars.find(c).is_some() {
re_pattern.push('\\');
re_pattern.push(c);
} else if c == '%' {
re_pattern.push_str(".*");
} else if c == '_' {
re_pattern.push('.');
} else if c == '\\' {
is_escaped = true;
} else {
re_pattern.push(c);
}
}
if is_escaped {
return Err(ArrowError::InvalidArgumentError(format!(
"LIKE pattern must not end with escape character. Pattern {}",
pat
)));
}
Ok(re_pattern)
}

fn like_utf8_impl<OffsetSize: StringOffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
Expand All @@ -261,6 +302,9 @@ fn like_utf8_impl<OffsetSize: StringOffsetSizeTrait>(
let re = if let Some(ref regex) = map.get(pat) {
regex
} else {
<<<<<<< HEAD
let re_pattern = like_to_regex(pat)?;
=======
let mut is_escaped = false;
let mut re_pattern = String::new();
let regex_chars = "-[]{}()*+?.,^$|#";
Expand Down Expand Up @@ -298,6 +342,7 @@ fn like_utf8_impl<OffsetSize: StringOffsetSizeTrait>(
pat
)));
}
>>>>>>> c0df3eacf (update)
let re = RegexBuilder::new(&format!("^{}$", re_pattern))
.case_insensitive(!case_sensitive)
.build()
Expand Down Expand Up @@ -406,29 +451,7 @@ fn like_utf8_scalar_impl<OffsetSize: StringOffsetSizeTrait>(
}
}
} else {
let mut prev_char = None;
let mut re_pattern = right
.replace(
|c| {
let res = c == '%' && prev_char != Some('\\');
prev_char = Some(c);
res
},
".*",
)
.replace("\\%", "%");

let mut prev_char = None;
re_pattern = re_pattern
.replace(
|c| {
let res = c == '_' && prev_char != Some('\\');
prev_char = Some(c);
res
},
".",
)
.replace("\\_", "_");
let re_pattern = like_to_regex(right)?;
let re = RegexBuilder::new(&format!("^{}$", re_pattern))
.case_insensitive(!case_sensitive)
.build()
Expand Down

0 comments on commit d59ba6c

Please sign in to comment.