Support floating-point notation without exponent (#2197)

T-SQL supports floating-point without specifying an exponent, or specifying the exponent only as '+' or '-'. This defaults the exponent to 0. Some examples: 2.1E, -.2e+, -2.e- . This fix adds support by rewriting the exponent in ANTLR by appending a 0 in these cases. In addition, adding entry/exitEveryRule listeners in tsqlMutator for debugging infrastucture: these floating-points cases may occur also as parameters in stored proc calls meaning we have to handle them in tsqlMutator (as well as in tsqlBuilder where this debugging infrastructure is already present). Task: BABEL-4108 Signed-off-by: Rob Verschoor [email protected]
babelfish-for-postgresql · Jan 3, 2024 · dfe9a38 · dfe9a38
1 parent 2273124
commit dfe9a38
Show file tree

Hide file tree

Showing 9 changed files with 712 additions and 15 deletions.
diff --git a/contrib/babelfishpg_tsql/antlr/TSqlLexer.g4 b/contrib/babelfishpg_tsql/antlr/TSqlLexer.g4
@@ -1137,7 +1137,7 @@ DECIMAL:             DEC_DIGIT+;
 ID:                  ( [_#] | LETTER) ( [_#$@0-9] | LETTER)*;
 BINARY:              '0' [Xx] ( HEX_DIGIT | '\\' [\r]? [\n] )*;
 FLOAT:               DEC_DOT_DEC;
-REAL:                (DECIMAL | DEC_DOT_DEC) ([Ee] ([+-]? DEC_DIGIT+)?);
+REAL:                (DECIMAL | DEC_DOT_DEC) ([Ee] ([+-]? DEC_DIGIT*)?);
 
 MONEY:               CURRENCY_SYMBOL [ ]* ('+'|'-')? (DECIMAL | DEC_DOT_DEC);
 

diff --git a/contrib/babelfishpg_tsql/src/tsqlIface.cpp b/contrib/babelfishpg_tsql/src/tsqlIface.cpp
@@ -185,6 +185,7 @@ static bool in_execute_body_batch_parameter = false;
 static const std::string fragment_SELECT_prefix = "SELECT "; // fragment prefix for expressions
 static const std::string fragment_EXEC_prefix   = "EXEC ";   // fragment prefix for execute_body_batch
 static PLtsql_stmt *makeChangeDbOwnerStatement(TSqlParser::Alter_authorizationContext *ctx);
+static void handleFloatWithoutExponent(TSqlParser::ConstantContext *ctx); 
 
 /*
  * Structure / Utility function for general purpose of query string modification
@@ -2010,6 +2011,7 @@ class tsqlBuilder : public tsqlCommonMutator
 		clear_rewritten_query_fragment();
 	}
 
+	// NB: similar code is in tsqlMutator
 	void exitChar_string(TSqlParser::Char_stringContext *ctx) override
 	{
 		std::string str = getFullText(ctx);			
@@ -2121,6 +2123,13 @@ class tsqlBuilder : public tsqlCommonMutator
 		// TO-DO
 	}
 
+	// NB: this is copied in tsqlMutator
+	void exitConstant(TSqlParser::ConstantContext *ctx) override
+	{	
+		// Check for floating-point number without exponent
+		handleFloatWithoutExponent(ctx);
+	}
+
 	//////////////////////////////////////////////////////////////////////////////
 	// Special handling of non-statement context
 	//////////////////////////////////////////////////////////////////////////////
@@ -2419,18 +2428,19 @@ class tsqlBuilder : public tsqlCommonMutator
 
 class tsqlMutator : public TSqlParserBaseListener
 {
-public:
-    MyInputStream &stream;
+public:		
+	const std::vector<std::string> &ruleNames;	   		
+	MyInputStream &stream; 
 	bool in_procedure_parameter = false;    
 	bool in_procedure_parameter_id = false;    
 
 	std::vector<int> double_quota_places;
 
-    explicit tsqlMutator(MyInputStream &s)
-        : stream(s)
+    explicit tsqlMutator(const std::vector<std::string> &rules, MyInputStream &s)
+        : ruleNames(rules), stream(s)
     {
     }
-	    
+
 public:
     void enterFunc_proc_name_schema(TSqlParser::Func_proc_name_schemaContext *ctx) override
     {	
@@ -2472,7 +2482,31 @@ class tsqlMutator : public TSqlParserBaseListener
 		stream.setText(ctx->start->getStartIndex(), " chr");
 	}
     }	
+
+	std::string
+	getNodeDesc(ParseTree *t)
+	{
+		std::string result = Trees::getNodeText(t, this->ruleNames);
+		return result;
+	}
+
+	// Tree listener overrides		
+	void enterEveryRule(ParserRuleContext *ctx) override
+	{
+		std::string desc{getNodeDesc(ctx)};
+
+		if (pltsql_enable_antlr_detailed_log)
+			std::cout << "+entering (tsqlMutator)" << (void *) ctx << "[" << desc << "]" << std::endl;
+	}
+
+	void exitEveryRule(ParserRuleContext *ctx) override
+	{
+		std::string desc{getNodeDesc(ctx)};
 
+		if (pltsql_enable_antlr_detailed_log)
+			std::cout << "-leaving (tsqlMutator)" << (void *) ctx << "[" << desc << "]" << std::endl;
+	}    
+
     void enterFunc_proc_name_server_database_schema(TSqlParser::Func_proc_name_server_database_schemaContext *ctx) override
     {
 	// We are looking at a function name; it may be a function call, or a
@@ -2665,6 +2699,7 @@ class tsqlMutator : public TSqlParserBaseListener
 		}
 	}
 
+	// NB: similar code is in tsqlBuilder
 	void exitChar_string(TSqlParser::Char_stringContext *ctx) override
 	{
 		if (in_procedure_parameter)
@@ -2695,6 +2730,13 @@ class tsqlMutator : public TSqlParserBaseListener
 			}
 		}
 	}
+
+	// NB: this is copied in tsqlBuilder
+	void exitConstant(TSqlParser::ConstantContext *ctx) override
+	{	
+		// Check for floating-point number without exponent
+		handleFloatWithoutExponent(ctx);
+	}	
 };
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -2989,7 +3031,7 @@ antlr_parse_query(const char *sourceText, bool useSLLParsing) {
 			unsupportedFeatureHandler->visit(tree);
 		}
 
-		std::unique_ptr<tsqlMutator> mutator = std::make_unique<tsqlMutator>(sourceStream);
+		std::unique_ptr<tsqlMutator> mutator = std::make_unique<tsqlMutator>(parser.getRuleNames(), sourceStream);
 		antlr4::tree::ParseTreeWalker firstPass;
 		firstPass.walk(mutator.get(), tree);
 
@@ -4607,6 +4649,7 @@ makeDeclareStmt(TSqlParser::Declare_statementContext *ctx, std::map<PLtsql_stmt
 			{
 				std::string typeStr = ::getFullText(local->data_type());
 				PLtsql_type *type = parse_datatype(typeStr.c_str(), 0);  // FIXME: the second arg should be 'location'
+
 				if (is_tsql_text_ntext_or_image_datatype(type->typoid))
 				{
 					throw PGErrorWrapperException(ERROR, ERRCODE_DATATYPE_MISMATCH, "The text, ntext, and image data types are invalid for local variables.", getLineAndPos(local->data_type()));
@@ -6013,7 +6056,7 @@ void process_execsql_destination_select(TSqlParser::Select_statement_standaloneC
 
 			if (elem->EQUAL())
 			{
-				// in PG main parser, '@a=1' will be treaed as a boolean expression to compare @a and 1. This is different T-SQL expected.
+				// in PG main parser, '@a=1' will be treated as a boolean expression to compare @a and 1. This is different T-SQL expected.
 				// We'll remove '@a=' from the query string so that main parser will return the expected result.
 				removeTokenStringFromQuery(stmt->sqlstmt, elem->LOCAL_ID(), ctx);
 				removeTokenStringFromQuery(stmt->sqlstmt, elem->EQUAL(), ctx);
@@ -6126,7 +6169,7 @@ void process_execsql_destination_update(TSqlParser::Update_statementContext *uct
 					removeCtxStringFromQuery(stmt->sqlstmt, elem->expression(), uctx);
 				}
 
-				// Concetually we have to remove any nearest COMMA.
+				// Conceptually we have to remove any nearest COMMA.
 				// But code is little bit dirty to handle some corner cases (the first few elems are removed or the last few elems are removed)
 				if ((i==0 || comma_carry_over) && i<uctx->COMMA().size())
 				{
@@ -6383,7 +6426,7 @@ static void
 post_process_column_definition(TSqlParser::Column_definitionContext *ctx, PLtsql_stmt_execsql *stmt, TSqlParser::Ddl_statementContext *baseCtx)
 {
 	/*
-	 * TSQL allows timestamp datatype without column name in create/alter table/type
+	 * T-SQL allows TIMESTAMP datatype without column name in create/alter table/type
 	 * statement and internally assumes "timestamp" as column name. So here if
 	 * we find TIMESTAMP token then we will prepend "timestamp" as a column name
 	 * in the column definition.
@@ -6757,7 +6800,7 @@ post_process_declare_table_statement(PLtsql_stmt_decl_table *stmt, TSqlParser::T
 		for (auto cdtctx : ctx->column_def_table_constraints()->column_def_table_constraint())
 		{
 			/*
-			 * TSQL allows timestamp datatype without column name in declare table type
+			 * T-SQL allows TIMESTAMP datatype without column name in declare table type
 			 * statement and internally assumes "timestamp" as column name. So here if
 			 * we find TIMESTAMP token then we will prepend "timestamp" as a column name
 			 * in the column definition.
@@ -7305,10 +7348,10 @@ bool
 is_top_level_query_specification(TSqlParser::Query_specificationContext *ctx)
 {
 	/*
-	 * in ANTLR t-sql grammar, top-level select statement is represented as select_statement_standalone.
-	 * subquery, derived table, cte can contain query specification via select statement but it is just a select_statement not via select_statement_standalone.
-	 * To figure out the query-specification is corresponding to top-level select statement,
-	 * iterate its ancestors and check if encoutering subquery, derived_table or common_table_expression.
+	 * In ANTLR T-SQL grammar, top-level SELECT statement is represented as select_statement_standalone.
+	 * subquery, derived table, CTE can contain query specification via SELECT statement but it is just a select_statement not via select_statement_standalone.
+	 * To figure out the query-specification is corresponding to top-level SELECT statement,
+	 * iterate its ancestors and check if encountering subquery, derived_table or common_table_expression.
 	 * if it is query specification in top-level statement, it will never meet those grammar element.
 	 */
 	Assert(ctx);
@@ -7615,3 +7658,81 @@ makeChangeDbOwnerStatement(TSqlParser::Alter_authorizationContext *ctx)
 
 	return (PLtsql_stmt *) result;
 }
+
+// Look for '<number>E' : T-SQL allows the exponent to be omitted (defaults to 0), but PG raises an error 
+// The REAL token is generated by the lexer; check the actual string to see if this is REAL notation		
+// Notes: 
+//  * the mantissa may also start with a '.', i.e. '.5e'
+//  * the exponent may just be a + or - sign (means '0'; 1e+ ==> 1e0 )
+void
+handleFloatWithoutExponent(TSqlParser::ConstantContext *ctx) 
+{			
+	std::string str = getFullText(ctx);		
+
+	// Check for case where exponent is only a sign: 2E+ , 2E-		
+	if ((str.back() == '+') || (str.back() == '-'))
+	{
+		// remove terminating sign	
+		str.pop_back();	
+
+		if ((str.back() == 'E') || (str.back() == 'e'))
+		{		
+			// ends in 'E+' or 'E-', continue below
+		}
+		else 
+		{
+			// Whatever it is, it's not the notation we're looking for 	
+			return;
+		}
+	}
+
+	if ((str.back() == 'E') || (str.back() == 'e'))
+	{
+		// remove terminating E		
+		str.pop_back();
+
+		if ((str.front() == '+') || (str.front() == '-'))
+		{
+			 // remove leading sign
+			 str.erase(0,1);
+		}
+
+		// Now check if this is a valid number. Note that it may start or end with '.' 
+		// but in both cases it must have at least one digit as well.  
+		size_t dot = str.find(".");
+		if (dot != std::string::npos)
+		{
+			 // remove the dot
+			 str.erase(dot,1);
+		}
+
+    	// What we have left now should be all digits
+    	bool is_number = true;
+    	if (str.length() == 0) 
+		{
+			is_number = false;
+		}
+		else        		
+    	{			
+			for(size_t i = 0; i < str.length(); i++) 
+			{
+			    if (!isdigit(str[i])) 
+			    {
+			    	is_number = false;
+			    	break;
+			    }
+			}
+		}
+
+		if (is_number)
+		{	
+			// Rewrite the exponent by adding a '0'
+			std::string str = getFullText(ctx);
+			size_t startPosition = ctx->start->getStartIndex();
+			if (in_execute_body_batch_parameter) startPosition += fragment_EXEC_prefix.length(); // add length of prefix prepended internally for execute_body_batch  		
+			rewritten_query_fragment.emplace(std::make_pair(startPosition, std::make_pair(str, str+"0")));
+		}	
+	}
+
+	return;
+}
diff --git a/test/JDBC/expected/float_exponent-vu-cleanup.out b/test/JDBC/expected/float_exponent-vu-cleanup.out
@@ -0,0 +1,14 @@
+drop procedure p1_float_exponent
+go
+
+drop procedure p2_float_exponent
+go
+
+drop function f1_float_exponent
+go
+
+drop view v1_float_exponent
+go
+
+drop table t1_float_exponent
+go
diff --git a/test/JDBC/expected/float_exponent-vu-prepare.out b/test/JDBC/expected/float_exponent-vu-prepare.out
@@ -0,0 +1,15 @@
+create table t1_float_exponent(a int, b real, c float, d decimal(10,2))
+go
+
+create view v1_float_exponent as select 2e as c
+go
+
+create procedure p1_float_exponent @p float as select @p
+go
+
+create procedure p2_float_exponent as
+insert t1_float_exponent values (2e+, 3.1e, -.4e-, 5.e-) 
+go
+
+create function f1_float_exponent (@p float) returns float as begin return @p end
+go