Skip to content

Commit

Permalink
[fix](inverted index) Content Check for Tokenize Function Parser (#44465
Browse files Browse the repository at this point in the history
)

Problem Summary:
1. Prevent users from mistakenly assuming other tokenizers exist.
  • Loading branch information
zzzxl1993 authored and Your Name committed Nov 25, 2024
1 parent 74b9658 commit 2cf280f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
6 changes: 6 additions & 0 deletions be/src/vec/functions/function_tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,12 @@ Status FunctionTokenize::execute_impl(FunctionContext* /*context*/, Block& block
}
inverted_index_ctx.parser_type = get_inverted_index_parser_type_from_string(
get_parser_string_from_properties(properties));
if (inverted_index_ctx.parser_type == InvertedIndexParserType::PARSER_UNKNOWN) {
return Status::Error<doris::ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
"unsupported parser type. currently, only 'english', 'chinese', and "
"'unicode' analyzers are supported.");
}

inverted_index_ctx.parser_mode = get_parser_mode_string_from_properties(properties);
inverted_index_ctx.char_filter_map =
get_parser_char_filter_map_from_properties(properties);
Expand Down
11 changes: 11 additions & 0 deletions regression-test/suites/inverted_index_p0/test_tokenize.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

import java.sql.SQLException

suite("test_tokenize"){
// prepare test table
Expand Down Expand Up @@ -98,4 +99,14 @@ suite("test_tokenize"){

qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="unicode"');"""
qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="unicode","stopwords" = "none"');"""

try {
sql """ SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="eng"'); """
} catch (SQLException e) {
if (e.message.contains("E-6000")) {
log.info("e message: {}", e.message)
} else {
throw e
}
}
}

0 comments on commit 2cf280f

Please sign in to comment.