Skip to content

Commit

Permalink
Support special characters for <simple_term> in CONTAINS predicate (#…
Browse files Browse the repository at this point in the history
…2319)

* Support special characters for <simple_term> in CONTAINS clause

Currently for full-text search, babelfish throws syntax error for usage
special characters in the search conditions in CONTAINS predicate. This
commit implements the logic to support their usage by mapping different
set of special characters with respective unique hashes which are
utilized during the lookup

Task: BABEL-4651
Signed-off-by: Roshan Kanwar <[email protected]>

* fix expected dependency file

* fix expected create file

* fix expected dependency and create output file

* Added unsupported error for multiple special characters,
refactored code, added upgrade tests, and upgraded test files
for special chars.

* updated headers file

* fixed indentation and refactored code

* fix changes

* turn the escape hatch for fulltext search on by default,
updated logic for handling consecutive special characters during fts

* fix build failures

* fix build failures

* updated test files and fixed errors

* fixed indentation

* add more test cases for special characters

* added more test cases

* added more test cases

* updated test cases and handling for various categories

* updated logic and handled more cases

---------

Signed-off-by: Roshan Kanwar <[email protected]>
  • Loading branch information
roshan0708 authored Feb 8, 2024
1 parent ab83b9e commit 2411902
Show file tree
Hide file tree
Showing 26 changed files with 3,039 additions and 158 deletions.
25 changes: 25 additions & 0 deletions contrib/babelfishpg_tsql/runtime/functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ PG_FUNCTION_INFO_V1(datepart_internal_float);
PG_FUNCTION_INFO_V1(datepart_internal_real);
PG_FUNCTION_INFO_V1(datepart_internal_money);
PG_FUNCTION_INFO_V1(datepart_internal_smallmoney);
PG_FUNCTION_INFO_V1(replace_special_chars_fts);

void *string_to_tsql_varchar(const char *input_str);
void *get_servername_internal(void);
Expand Down Expand Up @@ -228,6 +229,7 @@ extern bool pltsql_case_insensitive_identifiers;
extern bool inited_ht_tsql_cast_info;
extern bool inited_ht_tsql_datatype_precedence_info;
extern PLtsql_execstate *get_outermost_tsql_estate(int *nestlevel);
extern char *replace_special_chars_fts_impl(char *input_str);

char *bbf_servername = "BABELFISH";
const char *bbf_servicename = "MSSQLSERVER";
Expand Down Expand Up @@ -2590,6 +2592,29 @@ type_name(PG_FUNCTION_ARGS)
PG_RETURN_NULL();
}

/*
* Wrapper for C function replace_special_chars_fts_impl()
*/
Datum
replace_special_chars_fts(PG_FUNCTION_ARGS)
{
text *input_text = PG_GETARG_TEXT_P(0);
char *input_str = text_to_cstring(input_text);
char *output_str;
text *result_text;

/* Modify the input_str in place */
output_str = replace_special_chars_fts_impl(input_str);

/* Convert the modified input_str back to text */
result_text = cstring_to_text(output_str);

/* Free the memory allocated for input_str */
pfree(input_str);
pfree(output_str);
PG_RETURN_TEXT_P(result_text);
}

Datum
has_dbaccess(PG_FUNCTION_ARGS)
{
Expand Down
7 changes: 7 additions & 0 deletions contrib/babelfishpg_tsql/sql/sys_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,13 @@ CREATE OR REPLACE FUNCTION sys.has_dbaccess(database_name SYSNAME) RETURNS INTEG
'babelfishpg_tsql', 'has_dbaccess'
LANGUAGE C STABLE STRICT;

-- This function performs replacing special characters to their corresponding unique hashes
-- in the search condition or the full text search CONTAINS predicate
CREATE OR REPLACE FUNCTION sys.replace_special_chars_fts(IN phrase text) RETURNS TEXT AS
'babelfishpg_tsql', 'replace_special_chars_fts'
LANGUAGE C IMMUTABLE STRICT;
GRANT EXECUTE ON FUNCTION sys.replace_special_chars_fts TO PUBLIC;

-- This function performs string rewriting for the full text search CONTAINS predicate
-- in Babelfish
-- For example, a T-SQL query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2033,6 +2033,13 @@ END;
$$;
GRANT EXECUTE on PROCEDURE sys.sp_rename(IN sys.nvarchar(776), IN sys.SYSNAME, IN sys.varchar(13)) TO PUBLIC;

-- This function performs replacing special characters to their corresponding unique hashes
-- in the search condition or the full text search CONTAINS predicate
CREATE OR REPLACE FUNCTION sys.replace_special_chars_fts(IN phrase text) RETURNS TEXT AS
'babelfishpg_tsql', 'replace_special_chars_fts'
LANGUAGE C IMMUTABLE STRICT;
GRANT EXECUTE ON FUNCTION sys.replace_special_chars_fts TO PUBLIC;

-- Update existing logins to remove createrole privilege
CREATE OR REPLACE PROCEDURE sys.bbf_remove_createrole_from_logins()
LANGUAGE C
Expand Down
13 changes: 10 additions & 3 deletions contrib/babelfishpg_tsql/src/backend_parser/gram-tsql-epilogue.y.c
Original file line number Diff line number Diff line change
Expand Up @@ -748,16 +748,24 @@ TsqlExpressionContains(char *colId, Node *search_expr, core_yyscan_t yyscanner)
return (Node *)fts;
}

/* Transform column_name into to_tsvector(pgconfig, column_name) */
/* Transform column_name into to_tsvector(pgconfig, replace_special_chars_fts(column_name)) */
static Node *
makeToTSVectorFuncCall(char *colId, core_yyscan_t yyscanner, Node *pgconfig)
{
Node *col;
List *args;
Node *replaceSpecialCharsFunc;
List *replaceSpecialCharsArgs;

/* Create a ColumnRef node for the column */
col = makeColumnRef(colId, NIL, -1, yyscanner);

args = list_make2(pgconfig, col);
/* Create a function call for replace_special_chars_fts(column_name) */
replaceSpecialCharsArgs = list_make1(col);
replaceSpecialCharsFunc = (Node *) makeFuncCall(TsqlSystemFuncName("replace_special_chars_fts"), replaceSpecialCharsArgs, COERCE_EXPLICIT_CALL, -1);

/* Create the final function call to_tsvector(pgconfig, replace_special_chars_fts(column_name)) */
args = list_make2(pgconfig, replaceSpecialCharsFunc);

return (Node *) makeFuncCall(list_make1(makeString("to_tsvector")), args, COERCE_EXPLICIT_CALL, -1);
}
Expand All @@ -773,7 +781,6 @@ makeToTSQueryFuncCall(Node *search_expr, Node *pgconfig)
args_rewrite = list_make1(search_expr);
result_rewrite = (Node *) makeFuncCall(TsqlSystemFuncName("babelfish_fts_rewrite"), args_rewrite, COERCE_EXPLICIT_CALL, -1);


args = list_make2(pgconfig, result_rewrite);
return (Node *) makeFuncCall(list_make1(makeString("to_tsquery")), args, COERCE_EXPLICIT_CALL, -1);
}
Expand Down
Loading

0 comments on commit 2411902

Please sign in to comment.