Skip to content

Commit

Permalink
Support floating-point notation without exponent
Browse files Browse the repository at this point in the history
  • Loading branch information
robverschoor committed Jan 2, 2024
1 parent 5c67087 commit 93471a7
Show file tree
Hide file tree
Showing 9 changed files with 712 additions and 15 deletions.
2 changes: 1 addition & 1 deletion contrib/babelfishpg_tsql/antlr/TSqlLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -1136,7 +1136,7 @@ DECIMAL: DEC_DIGIT+;
ID: ( [_#] | LETTER) ( [_#$@0-9] | LETTER)*;
BINARY: '0' [Xx] ( HEX_DIGIT | '\\' [\r]? [\n] )*;
FLOAT: DEC_DOT_DEC;
REAL: (DECIMAL | DEC_DOT_DEC) ([Ee] ([+-]? DEC_DIGIT+)?);
REAL: (DECIMAL | DEC_DOT_DEC) ([Ee] ([+-]? DEC_DIGIT*)?);

MONEY: CURRENCY_SYMBOL [ ]* ('+'|'-')? (DECIMAL | DEC_DOT_DEC);

Expand Down
149 changes: 135 additions & 14 deletions contrib/babelfishpg_tsql/src/tsqlIface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ static bool in_execute_body_batch_parameter = false;
static const std::string fragment_SELECT_prefix = "SELECT "; // fragment prefix for expressions
static const std::string fragment_EXEC_prefix = "EXEC "; // fragment prefix for execute_body_batch
static PLtsql_stmt *makeChangeDbOwnerStatement(TSqlParser::Alter_authorizationContext *ctx);
static void handleFloatWithoutExponent(TSqlParser::ConstantContext *ctx);

/*
* Structure / Utility function for general purpose of query string modification
Expand Down Expand Up @@ -2010,6 +2011,7 @@ class tsqlBuilder : public tsqlCommonMutator
clear_rewritten_query_fragment();
}

// NB: similar code is in tsqlMutator
void exitChar_string(TSqlParser::Char_stringContext *ctx) override
{
std::string str = getFullText(ctx);
Expand Down Expand Up @@ -2121,6 +2123,13 @@ class tsqlBuilder : public tsqlCommonMutator
// TO-DO
}

// NB: this is copied in tsqlMutator
void exitConstant(TSqlParser::ConstantContext *ctx) override
{
// Check for floating-point number without exponent
handleFloatWithoutExponent(ctx);
}

//////////////////////////////////////////////////////////////////////////////
// Special handling of non-statement context
//////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -2419,18 +2428,19 @@ class tsqlBuilder : public tsqlCommonMutator

class tsqlMutator : public TSqlParserBaseListener
{
public:
MyInputStream &stream;
public:
const std::vector<std::string> &ruleNames;
MyInputStream &stream;
bool in_procedure_parameter = false;
bool in_procedure_parameter_id = false;

std::vector<int> double_quota_places;

explicit tsqlMutator(MyInputStream &s)
: stream(s)
explicit tsqlMutator(const std::vector<std::string> &rules, MyInputStream &s)
: ruleNames(rules), stream(s)
{
}

public:
void enterFunc_proc_name_schema(TSqlParser::Func_proc_name_schemaContext *ctx) override
{
Expand Down Expand Up @@ -2472,7 +2482,31 @@ class tsqlMutator : public TSqlParserBaseListener
stream.setText(ctx->start->getStartIndex(), " chr");
}
}

std::string
getNodeDesc(ParseTree *t)
{
std::string result = Trees::getNodeText(t, this->ruleNames);
return result;
}

// Tree listener overrides
void enterEveryRule(ParserRuleContext *ctx) override
{
std::string desc{getNodeDesc(ctx)};

if (pltsql_enable_antlr_detailed_log)
std::cout << "+entering (tsqlMutator)" << (void *) ctx << "[" << desc << "]" << std::endl;
}

void exitEveryRule(ParserRuleContext *ctx) override
{
std::string desc{getNodeDesc(ctx)};

if (pltsql_enable_antlr_detailed_log)
std::cout << "-leaving (tsqlMutator)" << (void *) ctx << "[" << desc << "]" << std::endl;
}

void enterFunc_proc_name_server_database_schema(TSqlParser::Func_proc_name_server_database_schemaContext *ctx) override
{
// We are looking at a function name; it may be a function call, or a
Expand Down Expand Up @@ -2665,6 +2699,7 @@ class tsqlMutator : public TSqlParserBaseListener
}
}

// NB: similar code is in tsqlBuilder
void exitChar_string(TSqlParser::Char_stringContext *ctx) override
{
if (in_procedure_parameter)
Expand Down Expand Up @@ -2695,6 +2730,13 @@ class tsqlMutator : public TSqlParserBaseListener
}
}
}

// NB: this is copied in tsqlBuilder
void exitConstant(TSqlParser::ConstantContext *ctx) override
{
// Check for floating-point number without exponent
handleFloatWithoutExponent(ctx);
}
};

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -2989,7 +3031,7 @@ antlr_parse_query(const char *sourceText, bool useSLLParsing) {
unsupportedFeatureHandler->visit(tree);
}

std::unique_ptr<tsqlMutator> mutator = std::make_unique<tsqlMutator>(sourceStream);
std::unique_ptr<tsqlMutator> mutator = std::make_unique<tsqlMutator>(parser.getRuleNames(), sourceStream);
antlr4::tree::ParseTreeWalker firstPass;
firstPass.walk(mutator.get(), tree);

Expand Down Expand Up @@ -4607,6 +4649,7 @@ makeDeclareStmt(TSqlParser::Declare_statementContext *ctx, std::map<PLtsql_stmt
{
std::string typeStr = ::getFullText(local->data_type());
PLtsql_type *type = parse_datatype(typeStr.c_str(), 0); // FIXME: the second arg should be 'location'

if (is_tsql_text_ntext_or_image_datatype(type->typoid))
{
throw PGErrorWrapperException(ERROR, ERRCODE_DATATYPE_MISMATCH, "The text, ntext, and image data types are invalid for local variables.", getLineAndPos(local->data_type()));
Expand Down Expand Up @@ -6013,7 +6056,7 @@ void process_execsql_destination_select(TSqlParser::Select_statement_standaloneC

if (elem->EQUAL())
{
// in PG main parser, '@a=1' will be treaed as a boolean expression to compare @a and 1. This is different T-SQL expected.
// in PG main parser, '@a=1' will be treated as a boolean expression to compare @a and 1. This is different T-SQL expected.
// We'll remove '@a=' from the query string so that main parser will return the expected result.
removeTokenStringFromQuery(stmt->sqlstmt, elem->LOCAL_ID(), ctx);
removeTokenStringFromQuery(stmt->sqlstmt, elem->EQUAL(), ctx);
Expand Down Expand Up @@ -6126,7 +6169,7 @@ void process_execsql_destination_update(TSqlParser::Update_statementContext *uct
removeCtxStringFromQuery(stmt->sqlstmt, elem->expression(), uctx);
}

// Concetually we have to remove any nearest COMMA.
// Conceptually we have to remove any nearest COMMA.
// But code is little bit dirty to handle some corner cases (the first few elems are removed or the last few elems are removed)
if ((i==0 || comma_carry_over) && i<uctx->COMMA().size())
{
Expand Down Expand Up @@ -6383,7 +6426,7 @@ static void
post_process_column_definition(TSqlParser::Column_definitionContext *ctx, PLtsql_stmt_execsql *stmt, TSqlParser::Ddl_statementContext *baseCtx)
{
/*
* TSQL allows timestamp datatype without column name in create/alter table/type
* T-SQL allows TIMESTAMP datatype without column name in create/alter table/type
* statement and internally assumes "timestamp" as column name. So here if
* we find TIMESTAMP token then we will prepend "timestamp" as a column name
* in the column definition.
Expand Down Expand Up @@ -6757,7 +6800,7 @@ post_process_declare_table_statement(PLtsql_stmt_decl_table *stmt, TSqlParser::T
for (auto cdtctx : ctx->column_def_table_constraints()->column_def_table_constraint())
{
/*
* TSQL allows timestamp datatype without column name in declare table type
* T-SQL allows TIMESTAMP datatype without column name in declare table type
* statement and internally assumes "timestamp" as column name. So here if
* we find TIMESTAMP token then we will prepend "timestamp" as a column name
* in the column definition.
Expand Down Expand Up @@ -7305,10 +7348,10 @@ bool
is_top_level_query_specification(TSqlParser::Query_specificationContext *ctx)
{
/*
* in ANTLR t-sql grammar, top-level select statement is represented as select_statement_standalone.
* subquery, derived table, cte can contain query specification via select statement but it is just a select_statement not via select_statement_standalone.
* To figure out the query-specification is corresponding to top-level select statement,
* iterate its ancestors and check if encoutering subquery, derived_table or common_table_expression.
* In ANTLR T-SQL grammar, top-level SELECT statement is represented as select_statement_standalone.
* subquery, derived table, CTE can contain query specification via SELECT statement but it is just a select_statement not via select_statement_standalone.
* To figure out the query-specification is corresponding to top-level SELECT statement,
* iterate its ancestors and check if encountering subquery, derived_table or common_table_expression.
* if it is query specification in top-level statement, it will never meet those grammar element.
*/
Assert(ctx);
Expand Down Expand Up @@ -7615,3 +7658,81 @@ makeChangeDbOwnerStatement(TSqlParser::Alter_authorizationContext *ctx)

return (PLtsql_stmt *) result;
}

// Look for '<number>E' : T-SQL allows the exponent to be omitted (defaults to 0), but PG raises an error
// The REAL token is generated by the lexer; check the actual string to see if this is REAL notation
// Notes:
// * the mantissa may also start with a '.', i.e. '.5e'
// * the exponent may just be a + or - sign (means '0'; 1e+ ==> 1e0 )
void
handleFloatWithoutExponent(TSqlParser::ConstantContext *ctx)
{
std::string str = getFullText(ctx);

// Check for case where exponent is only a sign: 2E+ , 2E-
if ((str.back() == '+') || (str.back() == '-'))
{
// remove terminating sign
str.pop_back();

if ((str.back() == 'E') || (str.back() == 'e'))
{
// ends in 'E+' or 'E-', continue below
}
else
{
// Whatever it is, it's not the notation we're looking for
return;
}
}

if ((str.back() == 'E') || (str.back() == 'e'))
{
// remove terminating E
str.pop_back();

if ((str.front() == '+') || (str.front() == '-'))
{
// remove leading sign
str.erase(0,1);
}

// Now check if this is a valid number. Note that it may start or end with '.'
// but in both cases it must have at least one digit as well.
size_t dot = str.find(".");
if (dot != std::string::npos)
{
// remove the dot
str.erase(dot,1);
}

// What we have left now should be all digits
bool is_number = true;
if (str.length() == 0)
{
is_number = false;
}
else
{
for(size_t i = 0; i < str.length(); i++)
{
if (!isdigit(str[i]))
{
is_number = false;
break;
}
}
}

if (is_number)
{
// Rewrite the exponent by adding a '0'
std::string str = getFullText(ctx);
size_t startPosition = ctx->start->getStartIndex();
if (in_execute_body_batch_parameter) startPosition += fragment_EXEC_prefix.length(); // add length of prefix prepended internally for execute_body_batch
rewritten_query_fragment.emplace(std::make_pair(startPosition, std::make_pair(str, str+"0")));
}
}

return;
}
14 changes: 14 additions & 0 deletions test/JDBC/expected/float_exponent-vu-cleanup.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
drop procedure p1_float_exponent
go

drop procedure p2_float_exponent
go

drop function f1_float_exponent
go

drop view v1_float_exponent
go

drop table t1_float_exponent
go
15 changes: 15 additions & 0 deletions test/JDBC/expected/float_exponent-vu-prepare.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
create table t1_float_exponent(a int, b real, c float, d decimal(10,2))
go

create view v1_float_exponent as select 2e as c
go

create procedure p1_float_exponent @p float as select @p
go

create procedure p2_float_exponent as
insert t1_float_exponent values (2e+, 3.1e, -.4e-, 5.e-)
go

create function f1_float_exponent (@p float) returns float as begin return @p end
go
Loading

0 comments on commit 93471a7

Please sign in to comment.