Support floating-point notation without exponent

babelfish-for-postgresql · Jan 2, 2024 · 93471a7 · 93471a7
1 parent 5c67087
commit 93471a7
Show file tree

Hide file tree

Showing 9 changed files with 712 additions and 15 deletions.
diff --git a/contrib/babelfishpg_tsql/antlr/TSqlLexer.g4 b/contrib/babelfishpg_tsql/antlr/TSqlLexer.g4
@@ -1136,7 +1136,7 @@ DECIMAL:             DEC_DIGIT+;
 ID:                  ( [_#] | LETTER) ( [_#$@0-9] | LETTER)*;
 BINARY:              '0' [Xx] ( HEX_DIGIT | '\\' [\r]? [\n] )*;
 FLOAT:               DEC_DOT_DEC;
-REAL:                (DECIMAL | DEC_DOT_DEC) ([Ee] ([+-]? DEC_DIGIT+)?);
+REAL:                (DECIMAL | DEC_DOT_DEC) ([Ee] ([+-]? DEC_DIGIT*)?);
 
 MONEY:               CURRENCY_SYMBOL [ ]* ('+'|'-')? (DECIMAL | DEC_DOT_DEC);
 

diff --git a/contrib/babelfishpg_tsql/src/tsqlIface.cpp b/contrib/babelfishpg_tsql/src/tsqlIface.cpp
@@ -185,6 +185,7 @@ static bool in_execute_body_batch_parameter = false;
 static const std::string fragment_SELECT_prefix = "SELECT "; // fragment prefix for expressions
 static const std::string fragment_EXEC_prefix   = "EXEC ";   // fragment prefix for execute_body_batch
 static PLtsql_stmt *makeChangeDbOwnerStatement(TSqlParser::Alter_authorizationContext *ctx);
+static void handleFloatWithoutExponent(TSqlParser::ConstantContext *ctx); 
 
 /*
  * Structure / Utility function for general purpose of query string modification
@@ -2010,6 +2011,7 @@ class tsqlBuilder : public tsqlCommonMutator
 		clear_rewritten_query_fragment();
 	}
 
+	// NB: similar code is in tsqlMutator
 	void exitChar_string(TSqlParser::Char_stringContext *ctx) override
 	{
 		std::string str = getFullText(ctx);			
@@ -2121,6 +2123,13 @@ class tsqlBuilder : public tsqlCommonMutator
 		// TO-DO
 	}
 
+	// NB: this is copied in tsqlMutator
+	void exitConstant(TSqlParser::ConstantContext *ctx) override
+	{	
+		// Check for floating-point number without exponent
+		handleFloatWithoutExponent(ctx);
+	}
+
 	//////////////////////////////////////////////////////////////////////////////
 	// Special handling of non-statement context
 	//////////////////////////////////////////////////////////////////////////////
@@ -2419,18 +2428,19 @@ class tsqlBuilder : public tsqlCommonMutator
 
 class tsqlMutator : public TSqlParserBaseListener
 {
-public:
-    MyInputStream &stream;
+public:		
+	const std::vector<std::string> &ruleNames;	   		
+	MyInputStream &stream; 
 	bool in_procedure_parameter = false;    
 	bool in_procedure_parameter_id = false;    
 
 	std::vector<int> double_quota_places;
 
-    explicit tsqlMutator(MyInputStream &s)
-        : stream(s)
+    explicit tsqlMutator(const std::vector<std::string> &rules, MyInputStream &s)
+        : ruleNames(rules), stream(s)
     {
     }
-	    
+
 public:
     void enterFunc_proc_name_schema(TSqlParser::Func_proc_name_schemaContext *ctx) override
     {	
@@ -2472,7 +2482,31 @@ class tsqlMutator : public TSqlParserBaseListener
 		stream.setText(ctx->start->getStartIndex(), " chr");
 	}
     }	
+
+	std::string
+	getNodeDesc(ParseTree *t)
+	{
+		std::string result = Trees::getNodeText(t, this->ruleNames);
+		return result;
+	}
+
+	// Tree listener overrides		
+	void enterEveryRule(ParserRuleContext *ctx) override
+	{
+		std::string desc{getNodeDesc(ctx)};
+
+		if (pltsql_enable_antlr_detailed_log)
+			std::cout << "+entering (tsqlMutator)" << (void *) ctx << "[" << desc << "]" << std::endl;
+	}
+
+	void exitEveryRule(ParserRuleContext *ctx) override
+	{
+		std::string desc{getNodeDesc(ctx)};
 
+		if (pltsql_enable_antlr_detailed_log)
+			std::cout << "-leaving (tsqlMutator)" << (void *) ctx << "[" << desc << "]" << std::endl;
+	}    
+
     void enterFunc_proc_name_server_database_schema(TSqlParser::Func_proc_name_server_database_schemaContext *ctx) override
     {
 	// We are looking at a function name; it may be a function call, or a
@@ -2665,6 +2699,7 @@ class tsqlMutator : public TSqlParserBaseListener
 		}
 	}
 
+	// NB: similar code is in tsqlBuilder
 	void exitChar_string(TSqlParser::Char_stringContext *ctx) override
 	{
 		if (in_procedure_parameter)
@@ -2695,6 +2730,13 @@ class tsqlMutator : public TSqlParserBaseListener
 			}
 		}
 	}
+
+	// NB: this is copied in tsqlBuilder
+	void exitConstant(TSqlParser::ConstantContext *ctx) override
+	{	
+		// Check for floating-point number without exponent
+		handleFloatWithoutExponent(ctx);
+	}	
 };
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -2989,7 +3031,7 @@ antlr_parse_query(const char *sourceText, bool useSLLParsing) {
 			unsupportedFeatureHandler->visit(tree);
 		}
 
-		std::unique_ptr<tsqlMutator> mutator = std::make_unique<tsqlMutator>(sourceStream);
+		std::unique_ptr<tsqlMutator> mutator = std::make_unique<tsqlMutator>(parser.getRuleNames(), sourceStream);
 		antlr4::tree::ParseTreeWalker firstPass;
 		firstPass.walk(mutator.get(), tree);
 
@@ -4607,6 +4649,7 @@ makeDeclareStmt(TSqlParser::Declare_statementContext *ctx, std::map<PLtsql_stmt
 			{
 				std::string typeStr = ::getFullText(local->data_type());
 				PLtsql_type *type = parse_datatype(typeStr.c_str(), 0);  // FIXME: the second arg should be 'location'
+
 				if (is_tsql_text_ntext_or_image_datatype(type->typoid))
 				{
 					throw PGErrorWrapperException(ERROR, ERRCODE_DATATYPE_MISMATCH, "The text, ntext, and image data types are invalid for local variables.", getLineAndPos(local->data_type()));
@@ -6013,7 +6056,7 @@ void process_execsql_destination_select(TSqlParser::Select_statement_standaloneC
 
 			if (elem->EQUAL())
 			{
-				// in PG main parser, '@a=1' will be treaed as a boolean expression to compare @a and 1. This is different T-SQL expected.
+				// in PG main parser, '@a=1' will be treated as a boolean expression to compare @a and 1. This is different T-SQL expected.
 				// We'll remove '@a=' from the query string so that main parser will return the expected result.
 				removeTokenStringFromQuery(stmt->sqlstmt, elem->LOCAL_ID(), ctx);
 				removeTokenStringFromQuery(stmt->sqlstmt, elem->EQUAL(), ctx);
@@ -6126,7 +6169,7 @@ void process_execsql_destination_update(TSqlParser::Update_statementContext *uct
 					removeCtxStringFromQuery(stmt->sqlstmt, elem->expression(), uctx);
 				}
 
-				// Concetually we have to remove any nearest COMMA.
+				// Conceptually we have to remove any nearest COMMA.
 				// But code is little bit dirty to handle some corner cases (the first few elems are removed or the last few elems are removed)
 				if ((i==0 || comma_carry_over) && i<uctx->COMMA().size())
 				{
@@ -6383,7 +6426,7 @@ static void
 post_process_column_definition(TSqlParser::Column_definitionContext *ctx, PLtsql_stmt_execsql *stmt, TSqlParser::Ddl_statementContext *baseCtx)
 {
 	/*
-	 * TSQL allows timestamp datatype without column name in create/alter table/type
+	 * T-SQL allows TIMESTAMP datatype without column name in create/alter table/type
 	 * statement and internally assumes "timestamp" as column name. So here if
 	 * we find TIMESTAMP token then we will prepend "timestamp" as a column name
 	 * in the column definition.
@@ -6757,7 +6800,7 @@ post_process_declare_table_statement(PLtsql_stmt_decl_table *stmt, TSqlParser::T
 		for (auto cdtctx : ctx->column_def_table_constraints()->column_def_table_constraint())
 		{
 			/*
-			 * TSQL allows timestamp datatype without column name in declare table type
+			 * T-SQL allows TIMESTAMP datatype without column name in declare table type
 			 * statement and internally assumes "timestamp" as column name. So here if
 			 * we find TIMESTAMP token then we will prepend "timestamp" as a column name
 			 * in the column definition.
@@ -7305,10 +7348,10 @@ bool
 is_top_level_query_specification(TSqlParser::Query_specificationContext *ctx)
 {
 	/*
-	 * in ANTLR t-sql grammar, top-level select statement is represented as select_statement_standalone.
-	 * subquery, derived table, cte can contain query specification via select statement but it is just a select_statement not via select_statement_standalone.
-	 * To figure out the query-specification is corresponding to top-level select statement,
-	 * iterate its ancestors and check if encoutering subquery, derived_table or common_table_expression.
+	 * In ANTLR T-SQL grammar, top-level SELECT statement is represented as select_statement_standalone.
+	 * subquery, derived table, CTE can contain query specification via SELECT statement but it is just a select_statement not via select_statement_standalone.
+	 * To figure out the query-specification is corresponding to top-level SELECT statement,
+	 * iterate its ancestors and check if encountering subquery, derived_table or common_table_expression.
 	 * if it is query specification in top-level statement, it will never meet those grammar element.
 	 */
 	Assert(ctx);
@@ -7615,3 +7658,81 @@ makeChangeDbOwnerStatement(TSqlParser::Alter_authorizationContext *ctx)
 
 	return (PLtsql_stmt *) result;
 }
+
+// Look for '<number>E' : T-SQL allows the exponent to be omitted (defaults to 0), but PG raises an error 
+// The REAL token is generated by the lexer; check the actual string to see if this is REAL notation		
+// Notes: 
+//  * the mantissa may also start with a '.', i.e. '.5e'
+//  * the exponent may just be a + or - sign (means '0'; 1e+ ==> 1e0 )
+void
+handleFloatWithoutExponent(TSqlParser::ConstantContext *ctx) 
+{			
+	std::string str = getFullText(ctx);		
+
+	// Check for case where exponent is only a sign: 2E+ , 2E-		
+	if ((str.back() == '+') || (str.back() == '-'))
+	{
+		// remove terminating sign	
+		str.pop_back();	
+
+		if ((str.back() == 'E') || (str.back() == 'e'))
+		{		
+			// ends in 'E+' or 'E-', continue below
+		}
+		else 
+		{
+			// Whatever it is, it's not the notation we're looking for 	
+			return;
+		}
+	}
+
+	if ((str.back() == 'E') || (str.back() == 'e'))
+	{
+		// remove terminating E		
+		str.pop_back();
+
+		if ((str.front() == '+') || (str.front() == '-'))
+		{
+			 // remove leading sign
+			 str.erase(0,1);
+		}
+
+		// Now check if this is a valid number. Note that it may start or end with '.' 
+		// but in both cases it must have at least one digit as well.  
+		size_t dot = str.find(".");
+		if (dot != std::string::npos)
+		{
+			 // remove the dot
+			 str.erase(dot,1);
+		}
+
+    	// What we have left now should be all digits
+    	bool is_number = true;
+    	if (str.length() == 0) 
+		{
+			is_number = false;
+		}
+		else        		
+    	{			
+			for(size_t i = 0; i < str.length(); i++) 
+			{
+			    if (!isdigit(str[i])) 
+			    {
+			    	is_number = false;
+			    	break;
+			    }
+			}
+		}
+
+		if (is_number)
+		{	
+			// Rewrite the exponent by adding a '0'
+			std::string str = getFullText(ctx);
+			size_t startPosition = ctx->start->getStartIndex();
+			if (in_execute_body_batch_parameter) startPosition += fragment_EXEC_prefix.length(); // add length of prefix prepended internally for execute_body_batch  		
+			rewritten_query_fragment.emplace(std::make_pair(startPosition, std::make_pair(str, str+"0")));
+		}	
+	}
+
+	return;
+}
diff --git a/test/JDBC/expected/float_exponent-vu-cleanup.out b/test/JDBC/expected/float_exponent-vu-cleanup.out
@@ -0,0 +1,14 @@
+drop procedure p1_float_exponent
+go
+
+drop procedure p2_float_exponent
+go
+
+drop function f1_float_exponent
+go
+
+drop view v1_float_exponent
+go
+
+drop table t1_float_exponent
+go
diff --git a/test/JDBC/expected/float_exponent-vu-prepare.out b/test/JDBC/expected/float_exponent-vu-prepare.out
@@ -0,0 +1,15 @@
+create table t1_float_exponent(a int, b real, c float, d decimal(10,2))
+go
+
+create view v1_float_exponent as select 2e as c
+go
+
+create procedure p1_float_exponent @p float as select @p
+go
+
+create procedure p2_float_exponent as
+insert t1_float_exponent values (2e+, 3.1e, -.4e-, 5.e-) 
+go
+
+create function f1_float_exponent (@p float) returns float as begin return @p end
+go