From b913695ae290208411e668f26d4b2c107916aab2 Mon Sep 17 00:00:00 2001 From: Roshan Kanwar Date: Wed, 27 Dec 2023 14:40:33 +0530 Subject: [PATCH 1/2] Support whitespace character for , fix bugs in translation logic and improve error handling related to Full-Text Search (#2163) Currently for full-text search, babelfish throws syntax error for usage of whitespace characters. This commit implements the logic to support the usage of whitespace characters as well as improves the error handling for unsupported languages and special characters for fulltext search conditions and also some minor code cleanups and bug fixes. Task: BABEL-4645 Signed-off-by: Roshan Kanwar * fixed validation output file * Updated generic error message for special characters and other languages --- contrib/babelfishpg_tsql/src/fts_data.h | 1 + contrib/babelfishpg_tsql/src/fts_parser.y | 59 +++-- contrib/babelfishpg_tsql/src/fts_scan.l | 39 ++- test/JDBC/expected/fts-contains-vu-verify.out | 226 +++++++++++++++++- .../fts-contains-vu-verify.mix | 97 +++++++- .../expected_create.out | 2 - 6 files changed, 385 insertions(+), 39 deletions(-) diff --git a/contrib/babelfishpg_tsql/src/fts_data.h b/contrib/babelfishpg_tsql/src/fts_data.h index 6068597725..e6efe6ad3a 100644 --- a/contrib/babelfishpg_tsql/src/fts_data.h +++ b/contrib/babelfishpg_tsql/src/fts_data.h @@ -6,6 +6,7 @@ extern int fts_yylex(void); extern void fts_yyerror(char **result, const char *message) pg_attribute_noreturn(); extern void fts_scanner_init(const char *str); extern void fts_scanner_finish(void); +extern bool isNonEnglishString(const char *str); /* in fts_parser.y */ extern int fts_yyparse(char **result); diff --git a/contrib/babelfishpg_tsql/src/fts_parser.y b/contrib/babelfishpg_tsql/src/fts_parser.y index b035054699..057e898bfb 100644 --- a/contrib/babelfishpg_tsql/src/fts_parser.y +++ b/contrib/babelfishpg_tsql/src/fts_parser.y @@ -23,10 +23,11 @@ static int scanbuflen; static char* translate_simple_term(const char* s); static char *trim(char *s, bool insideQuotes); +static bool containsSpecialCharacters(char s); %} -%token WORD_TOKEN WS_TOKEN TEXT_TOKEN PREFIX_TERM_TOKEN GENERATION_TERM_TOKEN AND_TOKEN AND_NOT_TOKEN OR_TOKEN INFLECTIONAL_TOKEN THESAURUS_TOKEN FORMSOF_TOKEN O_PAREN_TOKEN C_PAREN_TOKEN COMMA_TOKEN +%token WORD_TOKEN WS_TOKEN TEXT_TOKEN PREFIX_TERM_TOKEN GENERATION_TERM_TOKEN AND_TOKEN NOT_TOKEN AND_NOT_TOKEN OR_TOKEN INFLECTIONAL_TOKEN THESAURUS_TOKEN FORMSOF_TOKEN O_PAREN_TOKEN C_PAREN_TOKEN COMMA_TOKEN SPECIAL_CHAR_TOKEN NON_ENGLISH_TOKEN %left OR_TOKEN %left AND_TOKEN %left AND_NOT_TOKEN @@ -86,10 +87,10 @@ generation_term: generation_type: INFLECTIONAL_TOKEN { - $$ = $1; + fts_yyerror(NULL, "Generation term is not currently supported in Babelfish"); } | THESAURUS_TOKEN { - $$ = $1; + fts_yyerror(NULL, "Generation term is not currently supported in Babelfish"); } ; @@ -138,27 +139,35 @@ char* translate_simple_term(const char* inputStr) { inputLength = strlen(trimmedInputStr); initStringInfo(&output); + appendStringInfoString(&output, ""); // Initialize pointers for input and output - inputPtr = trimmedInputStr; - - while (*inputPtr != '\0') { - if (*inputPtr == ' ') { + for (inputPtr = trimmedInputStr; *inputPtr != '\0'; inputPtr++) { + if (isspace((unsigned char)*inputPtr)) { // Replace space with "<->" - while (*(inputPtr + 1) == ' ') { + while (isspace((unsigned char)*(inputPtr + 1))) { // Handle multiples spaces between words and skip over additional spaces inputPtr++; } appendStringInfoString(&output, "<->"); + } else if (*inputPtr != '-' && containsSpecialCharacters((unsigned char)*inputPtr)) { // Handle special characters and other languages + pfree(trimmedInputStr); + yyerror(NULL, "Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish"); } else { // Copy the character appendStringInfoChar(&output, *inputPtr); } - inputPtr++; } pfree(trimmedInputStr); return output.data; } else { + // Initialize pointers for input and output + for (inputPtr = trimmedInputStr; *inputPtr != '\0'; inputPtr++) { + if (*inputPtr != '-' && containsSpecialCharacters((unsigned char)*inputPtr)) { // Handle special characters and other languages + pfree(trimmedInputStr); + yyerror(NULL, "Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish"); + } + } // It's a single word, so no transformation needed return trimmedInputStr; } @@ -173,7 +182,6 @@ static char *trim(char *s, bool insideQuotes) { size_t start; size_t end; size_t newLength; - bool inQuotes; /* * Empty string, nothing to trim @@ -184,26 +192,22 @@ static char *trim(char *s, bool insideQuotes) { return s; } - inQuotes = false; start = 0; end = length - 1; - for (size_t i = 0; i < length; i++) { - if (s[i] == '"') { - inQuotes = !inQuotes; - } + if(insideQuotes) { + start++; + end--; + } - if (!inQuotes || insideQuotes) { - // Trim leading spaces - while (start < length && isspace(s[start])) { - start++; - } + // Trim leading spaces + while (start < length && isspace(s[start])) { + start++; + } - // Trim trailing spaces - while (end > start && isspace(s[end])) { - end--; - } - } + // Trim trailing spaces + while (end > start && isspace(s[end])) { + end--; } // Calculate the new length @@ -218,4 +222,9 @@ static char *trim(char *s, bool insideQuotes) { return s; } +static bool containsSpecialCharacters(char str) { + // Ignore Non-breaking space character + return str != '\302' && str != -62 && str != '\240' && str != -96 && !isalnum(str) && !isspace(str); +} + # include "fts_scan.c" \ No newline at end of file diff --git a/contrib/babelfishpg_tsql/src/fts_scan.l b/contrib/babelfishpg_tsql/src/fts_scan.l index 9322e8c4f5..e329bf322e 100644 --- a/contrib/babelfishpg_tsql/src/fts_scan.l +++ b/contrib/babelfishpg_tsql/src/fts_scan.l @@ -21,9 +21,10 @@ static YY_BUFFER_STATE scanbufhandle; %% -"AND" { yylval = "AND"; yyerror(NULL, "Boolean operator is not currently supported in Babelfish"); return AND_TOKEN; } -"AND NOT" { yylval = "AND NOT"; yyerror(NULL, "Boolean operator is not currently supported in Babelfish"); return AND_NOT_TOKEN; } -"OR" { yylval = "OR"; yyerror(NULL, "Boolean operator is not currently supported in Babelfish"); return OR_TOKEN; } +"AND" { yylval = "AND"; yyerror(NULL, "Full-text search conditions with boolean operators are not currently supported in Babelfish"); return AND_TOKEN; } +"AND NOT" { yylval = "AND NOT"; yyerror(NULL, "Full-text search conditions with boolean operators are not currently supported in Babelfish"); return AND_NOT_TOKEN; } +"OR" { yylval = "OR"; yyerror(NULL, "Full-text search conditions with boolean operators are not currently supported in Babelfish"); return OR_TOKEN; } +"NOT" { yylval = "NOT"; yyerror(NULL, "Full-text search conditions with boolean operators are not currently supported in Babelfish"); return NOT_TOKEN; } "INFLECTIONAL" { yylval = "INFLECTIONAL"; return INFLECTIONAL_TOKEN; } "THESAURUS" { yylval = "THESAURUS"; return THESAURUS_TOKEN; } "FORMSOF" { yylval = "FORMSOF"; return FORMSOF_TOKEN; } @@ -32,13 +33,13 @@ static YY_BUFFER_STATE scanbufhandle; "," { yylval = ","; return COMMA_TOKEN; } [ \t\r\n]+ { yylval = " "; return WS_TOKEN; } -\"[^"]+\*\" { if (yytext[strlen(yytext)-2] == ' ') { yyerror(NULL, "Syntax error, space is not allowed before *"); } yylval = yytext; return PREFIX_TERM_TOKEN; } // Handle prefix terms -[a-zA-Z0-9]+ { yylval = yytext; return WORD_TOKEN; } // Handle individual words -\"[^"*]*\" { yylval = yytext; return TEXT_TOKEN; } // Handle double-quoted phrases -FORMSOF\s*\(\s*(INFLECTIONAL|THESAURUS)\s*,\s*((\w+)|(\"[\w\s]+\"))(\s*,\s*((\w+)|(\"[\w\s]+\")))*\s*\) { yylval = yytext; return GENERATION_TERM_TOKEN; } // Handle FORMSOF generation term +FORMSOF\s*\(\s*(INFLECTIONAL|THESAURUS)\s*,\s*((\w+)|(\"[\w\s]+\"))(\s*,\s*((\w+)|(\"[\w\s]+\")))*\s*\) { yylval = yytext; yyerror(NULL, "Generation term is not currently supported in Babelfish"); yylval = yytext; return GENERATION_TERM_TOKEN; } // Handle FORMSOF generation term +\"[^"]+\*\" { if (yytext[strlen(yytext)-2] == ' ') { yyerror(NULL, "Syntax error, space is not allowed before *"); } yylval = yytext; return PREFIX_TERM_TOKEN; } // Handle prefix terms +\"[^"]*\" { yylval = yytext; return TEXT_TOKEN; } // Handle double-quoted phrases +[a-zA-Z0-9]+ { yylval = yytext; return WORD_TOKEN; } // Handle individual words -. { return yytext[0]; } // Other characters are returned as-is +. { return yytext[0]; } // Other characters are returned as-is %% @@ -72,6 +73,11 @@ fts_scanner_init(const char *str) yyerror(NULL, "Null or empty full-text predicate."); } + // Check for Non-English Language, if true, throw unsupported error + if(isNonEnglishString(str)) { + yyerror(NULL, "Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish"); + } + /* * Make a scan buffer with special termination needed by flex. */ @@ -93,3 +99,20 @@ fts_scanner_finish(void) yy_delete_buffer(scanbufhandle); pfree(scanbuf); } + +/* + * Util function to check if the string is of Non-English language + */ +bool +isNonEnglishString(const char *str) { + for (const char *p = str; *p; p++) { + // Ignore non-breaking space character + if ((*p == '\302' && *p == -62 ) || (*p == '\240' && *p == -96)) { + continue; + } + if (*p < 0 || *p > 127) { + return true; + } + } + return false; +} \ No newline at end of file diff --git a/test/JDBC/expected/fts-contains-vu-verify.out b/test/JDBC/expected/fts-contains-vu-verify.out index ec251bc85e..62143a0bb1 100644 --- a/test/JDBC/expected/fts-contains-vu-verify.out +++ b/test/JDBC/expected/fts-contains-vu-verify.out @@ -18,9 +18,178 @@ fts_contains_simple + -- Full syntax of CONTAINS: https://github.com/MicrosoftDocs/sql-docs/blob/live/docs/t-sql/queries/contains-transact-sql.md -- test basic CONTAINS use: ... CONTAINS(col_name, ) ... -- ::= { word | "phrase" } +-- tab character in search string +EXEC fts_contains_vu_prepare_p1 '"due to"' +GO +~~START~~ +int#!#text +140#!# The idea for a set of new performance spaces began taking place around 1976 due to dissatisfaction with Civic Auditorium @ @ @ @ @ @ @ @ @ @ satisfying venue ) +408#!# These sites are at risk , endangered due to everyday exposure to the elements , vandalism , war , urbanization , poorly managed tourism , catastrophic events , and general neglect +511#!#A trial was due to be held next month , but Melissa was told last week that the CPS were dropping the case due to " insufficient evidence " +562#!# Loans characterized by low down payments @ @ @ @ @ @ @ @ @ @ early accumulations of interest due , or involve balloon or variable future payments , have a higher default rate than more traditional loans +705#!# Black guessed that any aborted @ @ @ @ @ @ @ @ @ @ due to the " private choice " of a mother +~~END~~ + + +-- line break +EXEC fts_contains_vu_prepare_p1 '"due +to"' +GO +~~START~~ +int#!#text +140#!# The idea for a set of new performance spaces began taking place around 1976 due to dissatisfaction with Civic Auditorium @ @ @ @ @ @ @ @ @ @ satisfying venue ) +408#!# These sites are at risk , endangered due to everyday exposure to the elements , vandalism , war , urbanization , poorly managed tourism , catastrophic events , and general neglect +511#!#A trial was due to be held next month , but Melissa was told last week that the CPS were dropping the case due to " insufficient evidence " +562#!# Loans characterized by low down payments @ @ @ @ @ @ @ @ @ @ early accumulations of interest due , or involve balloon or variable future payments , have a higher default rate than more traditional loans +705#!# Black guessed that any aborted @ @ @ @ @ @ @ @ @ @ due to the " private choice " of a mother +~~END~~ + + +-- no-break space character +declare @my_str varchar(50) = '"due'+char(160)+'to"'; +EXEC fts_contains_vu_prepare_p1 @my_str +GO +~~START~~ +int#!#text +140#!# The idea for a set of new performance spaces began taking place around 1976 due to dissatisfaction with Civic Auditorium @ @ @ @ @ @ @ @ @ @ satisfying venue ) +408#!# These sites are at risk , endangered due to everyday exposure to the elements , vandalism , war , urbanization , poorly managed tourism , catastrophic events , and general neglect +511#!#A trial was due to be held next month , but Melissa was told last week that the CPS were dropping the case due to " insufficient evidence " +562#!# Loans characterized by low down payments @ @ @ @ @ @ @ @ @ @ early accumulations of interest due , or involve balloon or variable future payments , have a higher default rate than more traditional loans +705#!# Black guessed that any aborted @ @ @ @ @ @ @ @ @ @ due to the " private choice " of a mother +~~END~~ + + +EXEC fts_contains_vu_prepare_p1 '天' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 N'שלום' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 '"one 天 two"' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 'Tópicos' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 '天' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +SELECT sys.babelfish_fts_rewrite('"one''two"') +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +SELECT sys.babelfish_fts_rewrite('"one`two"') +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +-- colons, semicolons, pipes, parenthesis, etc. +EXEC fts_contains_vu_prepare_p1 '"one : two"' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 '"one :)"' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 'one;two' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 'one|two' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 '"one (two)"' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 '"one * two"' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Full-text search conditions with special characters or languages other than English are not currently supported in Babelfish)~~ + + +EXEC fts_contains_vu_prepare_p1 '"one) ... -- ::= { word | "phrase" } + +-- tab character in search string +EXEC fts_contains_vu_prepare_p1 '"due to"' +GO + +-- line break +EXEC fts_contains_vu_prepare_p1 '"due +to"' +GO + +-- no-break space character +declare @my_str varchar(50) = '"due'+char(160)+'to"'; +EXEC fts_contains_vu_prepare_p1 @my_str +GO + +EXEC fts_contains_vu_prepare_p1 '天' +GO + +EXEC fts_contains_vu_prepare_p1 N'שלום' +GO + +EXEC fts_contains_vu_prepare_p1 '"one 天 two"' +GO + +EXEC fts_contains_vu_prepare_p1 'Tópicos' +GO + +EXEC fts_contains_vu_prepare_p1 '天' +GO + +SELECT sys.babelfish_fts_rewrite('"one''two"') +GO + +SELECT sys.babelfish_fts_rewrite('"one`two"') +GO + +-- colons, semicolons, pipes, parenthesis, etc. +EXEC fts_contains_vu_prepare_p1 '"one : two"' +GO + +EXEC fts_contains_vu_prepare_p1 '"one :)"' +GO + +EXEC fts_contains_vu_prepare_p1 'one;two' +GO + +EXEC fts_contains_vu_prepare_p1 'one|two' +GO + +EXEC fts_contains_vu_prepare_p1 '"one (two)"' +GO + +EXEC fts_contains_vu_prepare_p1 '"one * two"' +GO + +EXEC fts_contains_vu_prepare_p1 '"one Date: Wed, 27 Dec 2023 16:41:18 +0530 Subject: [PATCH 2/2] Fix null port->gss->princ and emit principal in logs in case of GSS authentication (#2142) Earlier, GSS authenticated backend was not reporting GSS specific information like authenticated, encrypted and principal name while logging the connections. Even pg_stat_gssapi was showing principal as NULL since princ field in port->gss->princ was uninitialised. This commit resolves above issue by initialising the port->gss->princ at the time of authentication and including authenticated, encrypted and principal name field in connection logs if GSS authentication is enabled. It cherry-picks some changes from https://git.postgresql.org/gitweb/ p=postgresql.git;a=commitdiff;h=622ae4621ece72a9f64b5602c74d7aaf373c1631 and https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=126cdaf47af275f76b2f2ddb023bfdc6f018ae30 in the TDS extension. Task: BABEL-4660 Signed-off-by: Harsh Lunagariya --- .../babelfishpg_tds/src/backend/tds/tds_srv.c | 19 +++++++++++++++++++ .../src/backend/tds/tdslogin.c | 19 +++++++++++++++++++ .../babelfishpg_tds/test/t/002_tdskerberos.pl | 7 +++++++ 3 files changed, 45 insertions(+) diff --git a/contrib/babelfishpg_tds/src/backend/tds/tds_srv.c b/contrib/babelfishpg_tds/src/backend/tds/tds_srv.c index d53fb03a85..c25d1c2595 100644 --- a/contrib/babelfishpg_tds/src/backend/tds/tds_srv.c +++ b/contrib/babelfishpg_tds/src/backend/tds/tds_srv.c @@ -340,6 +340,25 @@ pe_authenticate(Port *port, const char **username) appendStringInfo(&logmsg, _(" Tds Version=0x%X."), GetClientTDSVersion()); +#ifdef ENABLE_GSS + if (port->gss) + { + const char *princ = be_gssapi_get_princ(port); + + if (princ) + appendStringInfo(&logmsg, + _(" GSS (authenticated=%s, encrypted=%s, principal=%s)"), + be_gssapi_get_auth(port) ? _("yes") : _("no"), + be_gssapi_get_enc(port) ? _("yes") : _("no"), + princ); + else + appendStringInfo(&logmsg, + _(" GSS (authenticated=%s, encrypted=%s)"), + be_gssapi_get_auth(port) ? _("yes") : _("no"), + be_gssapi_get_enc(port) ? _("yes") : _("no")); + } +#endif + ereport(LOG, errmsg_internal("%s", logmsg.data)); pfree(logmsg.data); } diff --git a/contrib/babelfishpg_tds/src/backend/tds/tdslogin.c b/contrib/babelfishpg_tds/src/backend/tds/tdslogin.c index 1a49868db8..d48a3ba087 100644 --- a/contrib/babelfishpg_tds/src/backend/tds/tdslogin.c +++ b/contrib/babelfishpg_tds/src/backend/tds/tdslogin.c @@ -1462,6 +1462,7 @@ CheckGSSAuth(Port *port) gss_buffer_desc gbuf; MemoryContext oldContext; char *at_pos = NULL; + char *princ; if (pg_krb_server_keyfile && strlen(pg_krb_server_keyfile) > 0) { @@ -1583,6 +1584,24 @@ CheckGSSAuth(Port *port) _("retrieving GSS user name failed"), maj_stat, min_stat); + /* + * gbuf.value might not be null-terminated, so turn it into a regular + * null-terminated string. + */ + princ = palloc(gbuf.length + 1); + memcpy(princ, gbuf.value, gbuf.length); + princ[gbuf.length] = '\0'; + + /* + * Copy the original name of the authenticated principal into our backend + * memory for display later. + * + * This is also our authenticated identity. Set it now, rather than + * waiting for the usermap check below, because authentication has already + * succeeded and we want the log file to reflect that. + */ + port->gss->princ = MemoryContextStrdup(TopMemoryContext, princ); + /* * XXX: In PG there are options to match realm names or perform ident * mappings. We're not going to do those checks now. If required, we can diff --git a/contrib/babelfishpg_tds/test/t/002_tdskerberos.pl b/contrib/babelfishpg_tds/test/t/002_tdskerberos.pl index 5179a3deb3..1bca609a4a 100644 --- a/contrib/babelfishpg_tds/test/t/002_tdskerberos.pl +++ b/contrib/babelfishpg_tds/test/t/002_tdskerberos.pl @@ -418,6 +418,13 @@ sub test_query my @connstr1 = $tsql_node->tsql_connstr('master'); $tsql_node->connect_ok('Kerberos auth test', (connstr => \@connstr1)); +$tsql_node->connect_ok('pg_stat_gssapi and connection log test', + (connstr => \@connstr1, + sql => "SELECT gss_authenticated, principal from pg_stat_gssapi where pid = pg_backend_pid();", + expected_stdout => qr/1.*test1\@$realm/, + log_like => [qr/connection authorized: user=test1\@$realm, application=SQLCMD, Tds Version=0x\d\d\d\d\d\d\d\d. GSS \(authenticated=yes, encrypted=no, principal=test1\@$realm\)/])); + + # Reset pg_hba.conf and mark every connection to use password based auth # But we should be able to use kerberos auth through TDS endpoint irrespective # of pg_hba.conf file