Skip to content

Commit

Permalink
Support floating-point notation without exponent (#2197)
Browse files Browse the repository at this point in the history
T-SQL supports floating-point without specifying an exponent, or specifying the exponent only as '+' or '-'. This defaults the exponent to 0. Some examples: 2.1E, -.2e+, -2.e- .
This fix adds support by rewriting the exponent in ANTLR by appending a 0 in these cases.

In addition, adding entry/exitEveryRule listeners in tsqlMutator for debugging infrastucture: these floating-points cases may occur also as parameters in stored proc calls meaning we have to handle them in tsqlMutator (as well as in tsqlBuilder where this debugging infrastructure is already present).

Task: BABEL-4108
Signed-off-by: Rob Verschoor [email protected]
  • Loading branch information
robverschoor authored Jan 3, 2024
1 parent 2273124 commit dfe9a38
Show file tree
Hide file tree
Showing 9 changed files with 712 additions and 15 deletions.
2 changes: 1 addition & 1 deletion contrib/babelfishpg_tsql/antlr/TSqlLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ DECIMAL: DEC_DIGIT+;
ID: ( [_#] | LETTER) ( [_#$@0-9] | LETTER)*;
BINARY: '0' [Xx] ( HEX_DIGIT | '\\' [\r]? [\n] )*;
FLOAT: DEC_DOT_DEC;
REAL: (DECIMAL | DEC_DOT_DEC) ([Ee] ([+-]? DEC_DIGIT+)?);
REAL: (DECIMAL | DEC_DOT_DEC) ([Ee] ([+-]? DEC_DIGIT*)?);

MONEY: CURRENCY_SYMBOL [ ]* ('+'|'-')? (DECIMAL | DEC_DOT_DEC);

Expand Down
149 changes: 135 additions & 14 deletions contrib/babelfishpg_tsql/src/tsqlIface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ static bool in_execute_body_batch_parameter = false;
static const std::string fragment_SELECT_prefix = "SELECT "; // fragment prefix for expressions
static const std::string fragment_EXEC_prefix = "EXEC "; // fragment prefix for execute_body_batch
static PLtsql_stmt *makeChangeDbOwnerStatement(TSqlParser::Alter_authorizationContext *ctx);
static void handleFloatWithoutExponent(TSqlParser::ConstantContext *ctx);

/*
* Structure / Utility function for general purpose of query string modification
Expand Down Expand Up @@ -2010,6 +2011,7 @@ class tsqlBuilder : public tsqlCommonMutator
clear_rewritten_query_fragment();
}

// NB: similar code is in tsqlMutator
void exitChar_string(TSqlParser::Char_stringContext *ctx) override
{
std::string str = getFullText(ctx);
Expand Down Expand Up @@ -2121,6 +2123,13 @@ class tsqlBuilder : public tsqlCommonMutator
// TO-DO
}

// NB: this is copied in tsqlMutator
void exitConstant(TSqlParser::ConstantContext *ctx) override
{
// Check for floating-point number without exponent
handleFloatWithoutExponent(ctx);
}

//////////////////////////////////////////////////////////////////////////////
// Special handling of non-statement context
//////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -2419,18 +2428,19 @@ class tsqlBuilder : public tsqlCommonMutator

class tsqlMutator : public TSqlParserBaseListener
{
public:
MyInputStream &stream;
public:
const std::vector<std::string> &ruleNames;
MyInputStream &stream;
bool in_procedure_parameter = false;
bool in_procedure_parameter_id = false;

std::vector<int> double_quota_places;

explicit tsqlMutator(MyInputStream &s)
: stream(s)
explicit tsqlMutator(const std::vector<std::string> &rules, MyInputStream &s)
: ruleNames(rules), stream(s)
{
}

public:
void enterFunc_proc_name_schema(TSqlParser::Func_proc_name_schemaContext *ctx) override
{
Expand Down Expand Up @@ -2472,7 +2482,31 @@ class tsqlMutator : public TSqlParserBaseListener
stream.setText(ctx->start->getStartIndex(), " chr");
}
}

std::string
getNodeDesc(ParseTree *t)
{
std::string result = Trees::getNodeText(t, this->ruleNames);
return result;
}

// Tree listener overrides
void enterEveryRule(ParserRuleContext *ctx) override
{
std::string desc{getNodeDesc(ctx)};

if (pltsql_enable_antlr_detailed_log)
std::cout << "+entering (tsqlMutator)" << (void *) ctx << "[" << desc << "]" << std::endl;
}

void exitEveryRule(ParserRuleContext *ctx) override
{
std::string desc{getNodeDesc(ctx)};

if (pltsql_enable_antlr_detailed_log)
std::cout << "-leaving (tsqlMutator)" << (void *) ctx << "[" << desc << "]" << std::endl;
}

void enterFunc_proc_name_server_database_schema(TSqlParser::Func_proc_name_server_database_schemaContext *ctx) override
{
// We are looking at a function name; it may be a function call, or a
Expand Down Expand Up @@ -2665,6 +2699,7 @@ class tsqlMutator : public TSqlParserBaseListener
}
}

// NB: similar code is in tsqlBuilder
void exitChar_string(TSqlParser::Char_stringContext *ctx) override
{
if (in_procedure_parameter)
Expand Down Expand Up @@ -2695,6 +2730,13 @@ class tsqlMutator : public TSqlParserBaseListener
}
}
}

// NB: this is copied in tsqlBuilder
void exitConstant(TSqlParser::ConstantContext *ctx) override
{
// Check for floating-point number without exponent
handleFloatWithoutExponent(ctx);
}
};

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -2989,7 +3031,7 @@ antlr_parse_query(const char *sourceText, bool useSLLParsing) {
unsupportedFeatureHandler->visit(tree);
}

std::unique_ptr<tsqlMutator> mutator = std::make_unique<tsqlMutator>(sourceStream);
std::unique_ptr<tsqlMutator> mutator = std::make_unique<tsqlMutator>(parser.getRuleNames(), sourceStream);
antlr4::tree::ParseTreeWalker firstPass;
firstPass.walk(mutator.get(), tree);

Expand Down Expand Up @@ -4607,6 +4649,7 @@ makeDeclareStmt(TSqlParser::Declare_statementContext *ctx, std::map<PLtsql_stmt
{
std::string typeStr = ::getFullText(local->data_type());
PLtsql_type *type = parse_datatype(typeStr.c_str(), 0); // FIXME: the second arg should be 'location'

if (is_tsql_text_ntext_or_image_datatype(type->typoid))
{
throw PGErrorWrapperException(ERROR, ERRCODE_DATATYPE_MISMATCH, "The text, ntext, and image data types are invalid for local variables.", getLineAndPos(local->data_type()));
Expand Down Expand Up @@ -6013,7 +6056,7 @@ void process_execsql_destination_select(TSqlParser::Select_statement_standaloneC

if (elem->EQUAL())
{
// in PG main parser, '@a=1' will be treaed as a boolean expression to compare @a and 1. This is different T-SQL expected.
// in PG main parser, '@a=1' will be treated as a boolean expression to compare @a and 1. This is different T-SQL expected.
// We'll remove '@a=' from the query string so that main parser will return the expected result.
removeTokenStringFromQuery(stmt->sqlstmt, elem->LOCAL_ID(), ctx);
removeTokenStringFromQuery(stmt->sqlstmt, elem->EQUAL(), ctx);
Expand Down Expand Up @@ -6126,7 +6169,7 @@ void process_execsql_destination_update(TSqlParser::Update_statementContext *uct
removeCtxStringFromQuery(stmt->sqlstmt, elem->expression(), uctx);
}

// Concetually we have to remove any nearest COMMA.
// Conceptually we have to remove any nearest COMMA.
// But code is little bit dirty to handle some corner cases (the first few elems are removed or the last few elems are removed)
if ((i==0 || comma_carry_over) && i<uctx->COMMA().size())
{
Expand Down Expand Up @@ -6383,7 +6426,7 @@ static void
post_process_column_definition(TSqlParser::Column_definitionContext *ctx, PLtsql_stmt_execsql *stmt, TSqlParser::Ddl_statementContext *baseCtx)
{
/*
* TSQL allows timestamp datatype without column name in create/alter table/type
* T-SQL allows TIMESTAMP datatype without column name in create/alter table/type
* statement and internally assumes "timestamp" as column name. So here if
* we find TIMESTAMP token then we will prepend "timestamp" as a column name
* in the column definition.
Expand Down Expand Up @@ -6757,7 +6800,7 @@ post_process_declare_table_statement(PLtsql_stmt_decl_table *stmt, TSqlParser::T
for (auto cdtctx : ctx->column_def_table_constraints()->column_def_table_constraint())
{
/*
* TSQL allows timestamp datatype without column name in declare table type
* T-SQL allows TIMESTAMP datatype without column name in declare table type
* statement and internally assumes "timestamp" as column name. So here if
* we find TIMESTAMP token then we will prepend "timestamp" as a column name
* in the column definition.
Expand Down Expand Up @@ -7305,10 +7348,10 @@ bool
is_top_level_query_specification(TSqlParser::Query_specificationContext *ctx)
{
/*
* in ANTLR t-sql grammar, top-level select statement is represented as select_statement_standalone.
* subquery, derived table, cte can contain query specification via select statement but it is just a select_statement not via select_statement_standalone.
* To figure out the query-specification is corresponding to top-level select statement,
* iterate its ancestors and check if encoutering subquery, derived_table or common_table_expression.
* In ANTLR T-SQL grammar, top-level SELECT statement is represented as select_statement_standalone.
* subquery, derived table, CTE can contain query specification via SELECT statement but it is just a select_statement not via select_statement_standalone.
* To figure out the query-specification is corresponding to top-level SELECT statement,
* iterate its ancestors and check if encountering subquery, derived_table or common_table_expression.
* if it is query specification in top-level statement, it will never meet those grammar element.
*/
Assert(ctx);
Expand Down Expand Up @@ -7615,3 +7658,81 @@ makeChangeDbOwnerStatement(TSqlParser::Alter_authorizationContext *ctx)

return (PLtsql_stmt *) result;
}

// Look for '<number>E' : T-SQL allows the exponent to be omitted (defaults to 0), but PG raises an error
// The REAL token is generated by the lexer; check the actual string to see if this is REAL notation
// Notes:
// * the mantissa may also start with a '.', i.e. '.5e'
// * the exponent may just be a + or - sign (means '0'; 1e+ ==> 1e0 )
void
handleFloatWithoutExponent(TSqlParser::ConstantContext *ctx)
{
std::string str = getFullText(ctx);

// Check for case where exponent is only a sign: 2E+ , 2E-
if ((str.back() == '+') || (str.back() == '-'))
{
// remove terminating sign
str.pop_back();

if ((str.back() == 'E') || (str.back() == 'e'))
{
// ends in 'E+' or 'E-', continue below
}
else
{
// Whatever it is, it's not the notation we're looking for
return;
}
}

if ((str.back() == 'E') || (str.back() == 'e'))
{
// remove terminating E
str.pop_back();

if ((str.front() == '+') || (str.front() == '-'))
{
// remove leading sign
str.erase(0,1);
}

// Now check if this is a valid number. Note that it may start or end with '.'
// but in both cases it must have at least one digit as well.
size_t dot = str.find(".");
if (dot != std::string::npos)
{
// remove the dot
str.erase(dot,1);
}

// What we have left now should be all digits
bool is_number = true;
if (str.length() == 0)
{
is_number = false;
}
else
{
for(size_t i = 0; i < str.length(); i++)
{
if (!isdigit(str[i]))
{
is_number = false;
break;
}
}
}

if (is_number)
{
// Rewrite the exponent by adding a '0'
std::string str = getFullText(ctx);
size_t startPosition = ctx->start->getStartIndex();
if (in_execute_body_batch_parameter) startPosition += fragment_EXEC_prefix.length(); // add length of prefix prepended internally for execute_body_batch
rewritten_query_fragment.emplace(std::make_pair(startPosition, std::make_pair(str, str+"0")));
}
}

return;
}
14 changes: 14 additions & 0 deletions test/JDBC/expected/float_exponent-vu-cleanup.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
drop procedure p1_float_exponent
go

drop procedure p2_float_exponent
go

drop function f1_float_exponent
go

drop view v1_float_exponent
go

drop table t1_float_exponent
go
15 changes: 15 additions & 0 deletions test/JDBC/expected/float_exponent-vu-prepare.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
create table t1_float_exponent(a int, b real, c float, d decimal(10,2))
go

create view v1_float_exponent as select 2e as c
go

create procedure p1_float_exponent @p float as select @p
go

create procedure p2_float_exponent as
insert t1_float_exponent values (2e+, 3.1e, -.4e-, 5.e-)
go

create function f1_float_exponent (@p float) returns float as begin return @p end
go
Loading

0 comments on commit dfe9a38

Please sign in to comment.