Skip to content

Commit

Permalink
Fix bugs for translation logic for simple_term and execution logic fo…
Browse files Browse the repository at this point in the history
…r FULLTEXT INDEX stmts (#2045)

Bug fixes for simple_term support in CONTAINS clause and FULLTEXT INDEX creation/deletion statements:

    Fix the buffer overflow issue on translation of simple_term.
    Fix multiple spaces issue between words in simple_term which caused exception.
    Remove the util function exec_utility_cmd_helper and use a normal wrapper.

Issues Resolved

JIRA: BABEL-4379, BABEL-4383
Signed-off-by: Roshan Kanwar [email protected]
  • Loading branch information
roshan0708 authored Nov 23, 2023
1 parent e15e35a commit 78f3e3e
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 94 deletions.
14 changes: 12 additions & 2 deletions contrib/babelfishpg_tsql/src/fts.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,21 @@ PG_FUNCTION_INFO_V1(babelfish_fts_rewrite);
Datum
babelfish_fts_rewrite(PG_FUNCTION_ARGS)
{
text* input_text = PG_GETARG_TEXT_P(0);
char* input_str = text_to_cstring(input_text);
text* input_text;
char* input_str;
char* translated_query;
text* result_text = NULL; // Initialize result_text to NULL

if (PG_ARGISNULL(0))
{
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("Incorrect syntax near the keyword 'null'")));
}

input_text = PG_GETARG_TEXT_P(0);
input_str = text_to_cstring(input_text);

if (!pltsql_allow_fulltext_parser)
{
ereport(ERROR,
Expand Down
134 changes: 45 additions & 89 deletions contrib/babelfishpg_tsql/src/fts_parser.y
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
%{
#include "postgres.h"
#include "lib/stringinfo.h"
#include <ctype.h>
#include "fts_data.h"

Expand All @@ -21,8 +22,7 @@ static char *scanbuf;
static int scanbuflen;

static char* translate_simple_term(const char* s);
static char *trim(char *s);
static char *trimInsideQuotes(char *s);
static char *trim(char *s, bool insideQuotes);

%}

Expand Down Expand Up @@ -114,136 +114,92 @@ simple_term_list:
*/
static
char* translate_simple_term(const char* inputStr) {
int inputLength;
StringInfoData output;
const char* inputPtr;
char* trimmedInputStr;

int inputLength;
int outputSize;
char* output;
const char* inputPtr;
char* outputPtr;
char* trimmedInputStr;

// Check for empty input
if (inputStr == NULL) {
return NULL;
// Check for empty input - this should not be possible based on lexer rules, but check just in case
if (!inputStr || !(inputLength = strlen(inputStr))) {
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("Null or empty full-text predicate.")));
}

trimmedInputStr = (char*)palloc(strlen(inputStr) + 1);
strcpy(trimmedInputStr, inputStr);
trimmedInputStr = pstrdup(inputStr);

// removing trailing and leading spaces
trim(trimmedInputStr);
trim(trimmedInputStr, false);
inputLength = strlen(trimmedInputStr);

// Check if the input is a phrase enclosed in double quotes
if (trimmedInputStr[0] == '"' && trimmedInputStr[inputLength - 1] == '"') {
trimInsideQuotes(trimmedInputStr);
trim(trimmedInputStr, true);
inputLength = strlen(trimmedInputStr);
outputSize = inputLength - 2; // Exclude both quotes
output = (char*)palloc(outputSize + 1); // +1 for the null terminator
if (output == NULL) {
return NULL;
}

initStringInfo(&output);

// Initialize pointers for input and output
inputPtr = trimmedInputStr;
outputPtr = output;

while (*inputPtr != '\0') {
if (*inputPtr == ' ') {
// Replace space with "<->"
*outputPtr++ = '<';
*outputPtr++ = '-';
*outputPtr++ = '>';
while (*(inputPtr + 1) == ' ') {
// Handle multiples spaces between words and skip over additional spaces
inputPtr++;
}
appendStringInfoString(&output, "<->");
} else {
// Copy the character
*outputPtr++ = *inputPtr;
appendStringInfoChar(&output, *inputPtr);
}
inputPtr++;
}
*outputPtr = '\0';

return output;
pfree(trimmedInputStr);
return output.data;
} else {
// It's a single word, so no transformation needed
return strdup(trimmedInputStr);
return trimmedInputStr;
}
}

// Function to remove leading and trailing spaces of a string
static char *trim(char *s) {
/*
* Function to remove leading and trailing spaces of a string
* If flag is true then it removes spaces inside double quotes
*/
static char *trim(char *s, bool insideQuotes) {
size_t length;
size_t start;
size_t end;
size_t newLength;
bool inQuotes;

if (s == NULL) {
return NULL;
}

length = strlen(s);
if (length == 0) {
return s; // Empty string, nothing to trim
/*
* Empty string, nothing to trim
* for the empty input, we're automatically throwing error,
* so if string is NULL or empty, this clause won't pose any issue, it's just a safety check
*/
if (!s || !(length = strlen(s))) {
return s;
}

inQuotes = false;
start = 0;
end = length - 1;

// Trim leading spaces
while (start < length && isspace(s[start])) {
start++;
}

// Trim trailing spaces
while (end > start && isspace(s[end])) {
end--;
}

// Calculate the new length
newLength = end - start + 1;

// Shift the non-space part to the beginning of the string
memmove(s, s + start, newLength);

// Null-terminate the result
s[newLength] = '\0';

return s;
}

// Function to remove leading and trailing spaces inside double quotes
static char *trimInsideQuotes(char *s) {
size_t length;
size_t start;
size_t end;
size_t i;
size_t newLength;
bool insideQuotes;

if (s == NULL) {
return NULL;
}

length = strlen(s);
if (length == 0) {
return s; // Empty string, nothing to trim
}

insideQuotes = false;
start = 1;
end = length - 2;

for (i = 0; i < length; i++) {
for (size_t i = 0; i < length; i++) {
if (s[i] == '"') {
insideQuotes = !insideQuotes;
inQuotes = !inQuotes;
}

if (!insideQuotes) {
// Trim leading spaces inside quotes
if (!inQuotes || insideQuotes) {
// Trim leading spaces
while (start < length && isspace(s[start])) {
start++;
}

// Trim trailing spaces inside quotes
// Trim trailing spaces
while (end > start && isspace(s[end])) {
end--;
}
Expand Down
11 changes: 9 additions & 2 deletions contrib/babelfishpg_tsql/src/fts_scan.l
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ static YY_BUFFER_STATE scanbufhandle;

\"[^"]+\*\" { if (yytext[strlen(yytext)-2] == ' ') { yyerror(NULL, "Syntax error, space is not allowed before *"); } yylval = yytext; return PREFIX_TERM_TOKEN; } // Handle prefix terms
[a-zA-Z0-9]+ { yylval = yytext; return WORD_TOKEN; } // Handle individual words
\"[^"*]+\" { yylval = yytext; return TEXT_TOKEN; } // Handle double-quoted phrases
\"[^"*]*\" { yylval = yytext; return TEXT_TOKEN; } // Handle double-quoted phrases

FORMSOF\s*\(\s*(INFLECTIONAL|THESAURUS)\s*,\s*((\w+)|(\"[\w\s]+\"))(\s*,\s*((\w+)|(\"[\w\s]+\")))*\s*\) { yylval = yytext; return GENERATION_TERM_TOKEN; } // Handle FORMSOF generation term

Expand Down Expand Up @@ -64,6 +64,14 @@ fts_scanner_init(const char *str)
if (YY_CURRENT_BUFFER)
yy_delete_buffer(YY_CURRENT_BUFFER);


/*
* Check for empty input
*/
if (!str || !*str || (strspn(str, " \t\n\r") == slen)) {
yyerror(NULL, "Null or empty full-text predicate.");
}

/*
* Make a scan buffer with special termination needed by flex.
*/
Expand All @@ -72,7 +80,6 @@ fts_scanner_init(const char *str)
memcpy(scanbuf, str, slen);
scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);

BEGIN(INITIAL);
}

Expand Down
28 changes: 27 additions & 1 deletion contrib/babelfishpg_tsql/src/pl_exec-2.c
Original file line number Diff line number Diff line change
Expand Up @@ -3756,6 +3756,9 @@ exec_stmt_fulltextindex(PLtsql_execstate *estate, PLtsql_stmt_fulltextindex *stm
Oid datdba;
bool login_is_db_owner;
bool is_create;
List *res;
Node *res_stmt;
PlannedStmt *wrapper;

Assert(stmt->schema_name != NULL);

Expand Down Expand Up @@ -3833,6 +3836,29 @@ exec_stmt_fulltextindex(PLtsql_execstate *estate, PLtsql_stmt_fulltextindex *stm
/* The above query will be
* executed using ProcessUtility()
*/
exec_utility_cmd_helper(query_str);
res = raw_parser(query_str, RAW_PARSE_DEFAULT);
res_stmt = ((RawStmt *) linitial(res))->stmt;

/* need to make a wrapper PlannedStmt */
wrapper = makeNode(PlannedStmt);
wrapper->commandType = CMD_UTILITY;
wrapper->canSetTag = false;
wrapper->utilityStmt = res_stmt;
wrapper->stmt_location = 0;
wrapper->stmt_len = 1;

/* do this step */
ProcessUtility(wrapper,
is_create ? "(CREATE FULLTEXT INDEX STATEMENT )" : "(DELETE FULLTEXT INDEX STATEMENT )",
false,
PROCESS_UTILITY_SUBCOMMAND,
NULL,
NULL,
None_Receiver,
NULL);

/* make sure later steps can see the object created here */
CommandCounterIncrement();

return PLTSQL_RC_OK;
}
70 changes: 70 additions & 0 deletions test/JDBC/expected/fts-contains-vu-verify.out
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,58 @@ int#!#text
~~END~~


-- Test for empty string input, should throw error
EXEC fts_contains_vu_prepare_p1 ''
GO
~~ERROR (Code: 33557097)~~

~~ERROR (Message: Null or empty full-text predicate.)~~


-- Test for empty string input, should throw error
EXEC fts_contains_vu_prepare_p1 ' '
GO
~~ERROR (Code: 33557097)~~

~~ERROR (Message: Null or empty full-text predicate.)~~


-- Test for empty string input, should throw error
EXEC fts_contains_vu_prepare_p1 NULL
GO
~~ERROR (Code: 33557097)~~

~~ERROR (Message: Incorrect syntax near the keyword 'null')~~


EXEC fts_contains_vu_prepare_p1 ' "" '
GO
~~START~~
int#!#text
~~END~~


EXEC fts_contains_vu_prepare_p1 ' ""'
GO
~~START~~
int#!#text
~~END~~


EXEC fts_contains_vu_prepare_p1 '"" '
GO
~~START~~
int#!#text
~~END~~


EXEC fts_contains_vu_prepare_p1 '" "'
GO
~~START~~
int#!#text
~~END~~


EXEC fts_contains_vu_prepare_p1 'other'
GO
~~START~~
Expand Down Expand Up @@ -167,6 +219,24 @@ int#!#text
~~END~~


EXEC fts_contains_vu_prepare_p1 '"come back"'
GO
~~START~~
int#!#text
6#!#Walter Hill , the director of The Warriors , strove to give a comic-book depiction of the gang 's flight from the Bronx back to their Coney Island turf
8#!# After making their way through rival gangs ' turf in Manhattan and then back to Coney Island , the Warriors defeat the gang responsible for Cyrus ' death
158#!# What were the largest or most active homegrown companies back in the late ' 80s -- Portland Rep , New Rose , Storefront Portland Civic Theater -- long ago folded
174#!#" Back then , there was this grittier , shoestring quality that imbued almost every company , " Mulligan says
176#!# But when I look back at that earlier renaissance of the ' 80s , the truth was the talent pool of the city needed to step up
290#!# I ca n't wait to get back in the ring with Michael
325#!#Judge Linnane , who adjourned the hearing briefly until the terms of settlement were lodged in the court file , put the summary judgment application back for mention on March 6 next
362#!#FRENKEL : Saher says that she never thought she would become a refugee , and it scares her to think about what 's happening back in her hometown of Daraa
416#!# At Mission Dolores , CyArk worked hard to get behind the very ornately carved reredos , a false wall in back of the altar that was made in Mexico and shipped to the Mission by boat in 1796
720#!#The Australopithecus boisei skull was one of several important discoveries Leakey made in her career -- all made despite the fact that she had nearly @ @ @ @ @ @ @ @ @ @ She got thrown out of school very early on and never wanted to go back , and became hugely interested in archaeology , " Richard Leakey , her son , told Archaeology magazine
889#!# In her book Lots of Candles , Plenty of Cake , Anna Quindlen , the 60-year-old novelist and journalist who famously chronicled thirtysomething life back in the 1980s , wrote that she started using Botox for her frown lines in her mid-50s because she did n't want to look cross when she was n't
~~END~~


EXEC fts_contains_vu_prepare_p1 '"much of the"'
GO
~~START~~
Expand Down
Loading

0 comments on commit 78f3e3e

Please sign in to comment.