Skip to content

Commit

Permalink
Fix difference in behaviour for CONCAT() and CONCAT_WS() functions wi…
Browse files Browse the repository at this point in the history
…th multibyte characters (babelfish-for-postgresql#2949)

Issues

Arguments datatype and return datatype of CONCAT() and CONCAT_WS() string function is not defined correctly in
Babelfish, which is causing output difference between TSQL and Babelfish.

CONCAT() and CONCAT_WS() functions does not raise error when called with < 2 or < 3 arguments respectively.

BABEL-4299 missed to add SQL changes to upgrade script, as a result after upgrade to 15.5, 14.10 or later, the
definition will remain same as the previous one and there would be no performance improvement.

Changes made to fix the issues

Added wrapper function handling for CONCAT() and CONCAT_WS() function to ensure correct input arguments
datatype and return datatype. Also ensured to throw appropriate error when CONCAT() and CONCAT_WS() functions
are called with < 2 or < 3 arguments respectively.

Task: BABEL- 3409, BABEL- 3648
Signed-off-by: Anikait Agrawal <[email protected]>
  • Loading branch information
Anikait143 authored and staticlibs committed Oct 27, 2024
1 parent 55017e5 commit 28605cb
Show file tree
Hide file tree
Showing 54 changed files with 38,405 additions and 401 deletions.
1 change: 1 addition & 0 deletions contrib/babelfishpg_tsql/antlr/TSqlLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ COMPRESSION: C O M P R E S S I O N;
COMPUTE: C O M P U T E;
CONCAT: C O N C A T;
CONCAT_NULL_YIELDS_NULL: C O N C A T UNDERLINE N U L L UNDERLINE Y I E L D S UNDERLINE N U L L;
CONCAT_WS: C O N C A T UNDERLINE W S;
CONFIGURATION: C O N F I G U R A T I O N;
CONNECT: C O N N E C T;
CONNECTION: C O N N E C T I O N;
Expand Down
1 change: 1 addition & 0 deletions contrib/babelfishpg_tsql/antlr/TSqlParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -4288,6 +4288,7 @@ keyword
| COMPRESSION
| CONCAT
| CONCAT_NULL_YIELDS_NULL
| CONCAT_WS
| CONFIGURATION
| CONNECT
| CONNECTION
Expand Down
16 changes: 8 additions & 8 deletions contrib/babelfishpg_tsql/sql/babelfishpg_tsql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1619,9 +1619,9 @@ CAST(t6.data_type AS SMALLINT) AS DATA_TYPE,

CASE -- cases for when they are of type identity.
WHEN a.attidentity <> ''::"char" AND (t1.name = 'decimal' OR t1.name = 'numeric')
THEN CAST(CONCAT(t1.name, '() identity') AS sys.sysname)
THEN CAST(PG_CATALOG.CONCAT(t1.name, '() identity') AS sys.sysname)
WHEN a.attidentity <> ''::"char" AND (t1.name != 'decimal' AND t1.name != 'numeric')
THEN CAST(CONCAT(t1.name, ' identity') AS sys.sysname)
THEN CAST(PG_CATALOG.CONCAT(t1.name, ' identity') AS sys.sysname)
ELSE CAST(t1.name AS sys.sysname)
END AS TYPE_NAME,

Expand Down Expand Up @@ -2006,8 +2006,8 @@ CAST(d.name AS sys.sysname) COLLATE sys.database_default AS PROCEDURE_QUALIFIER,
CAST(s1.name AS sys.sysname) AS PROCEDURE_OWNER,

CASE
WHEN p.prokind = 'p' THEN CAST(concat(p.proname, ';1') AS sys.nvarchar(134))
ELSE CAST(concat(p.proname, ';0') AS sys.nvarchar(134))
WHEN p.prokind = 'p' THEN CAST(PG_CATALOG.concat(p.proname, ';1') AS sys.nvarchar(134))
ELSE CAST(PG_CATALOG.concat(p.proname, ';0') AS sys.nvarchar(134))
END AS PROCEDURE_NAME,

-1 AS NUM_INPUT_PARAMS,
Expand All @@ -2028,8 +2028,8 @@ SELECT CAST((SELECT sys.db_name()) AS sys.sysname) COLLATE sys.database_default
CAST(nspname AS sys.sysname) AS PROCEDURE_OWNER,

CASE
WHEN prokind = 'p' THEN cast(concat(proname, ';1') AS sys.nvarchar(134))
ELSE cast(concat(proname, ';0') AS sys.nvarchar(134))
WHEN prokind = 'p' THEN cast(PG_CATALOG.concat(proname, ';1') AS sys.nvarchar(134))
ELSE cast(PG_CATALOG.concat(proname, ';0') AS sys.nvarchar(134))
END AS PROCEDURE_NAME,

-1 AS NUM_INPUT_PARAMS,
Expand Down Expand Up @@ -2609,8 +2609,8 @@ CAST(sys.db_name() AS sys.sysname) AS PROCEDURE_QUALIFIER -- This will always be
, CAST(ss.schema_name AS sys.sysname) AS PROCEDURE_OWNER
, CAST(
CASE
WHEN ss.prokind = 'p' THEN CONCAT(ss.proname, ';1')
ELSE CONCAT(ss.proname, ';0')
WHEN ss.prokind = 'p' THEN PG_CATALOG.CONCAT(ss.proname, ';1')
ELSE PG_CATALOG.CONCAT(ss.proname, ';0')
END
AS sys.nvarchar(134)) AS PROCEDURE_NAME
, CAST(
Expand Down
4 changes: 2 additions & 2 deletions contrib/babelfishpg_tsql/sql/information_schema_tsql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -783,14 +783,14 @@ GRANT SELECT ON information_schema_tsql.sequences TO PUBLIC;
*/
CREATE OR REPLACE VIEW information_schema_tsql.schemata AS
SELECT CAST(sys.db_name() AS sys.sysname) AS "CATALOG_NAME",
CAST(CASE WHEN np.nspname LIKE CONCAT(sys.db_name(),'%') THEN RIGHT(np.nspname, LENGTH(np.nspname) - LENGTH(sys.db_name()) - 1)
CAST(CASE WHEN np.nspname LIKE PG_CATALOG.CONCAT(sys.db_name(),'%') THEN RIGHT(np.nspname, LENGTH(np.nspname) - LENGTH(sys.db_name()) - 1)
ELSE np.nspname END AS sys.nvarchar(128)) AS "SCHEMA_NAME",
-- For system-defined schemas, schema-owner name will be same as schema_name
-- For user-defined schemas having default owner, schema-owner will be dbo
-- For user-defined schemas with explicit owners, rolname contains dbname followed
-- by owner name, so need to extract the owner name from rolname always.
CAST(CASE WHEN sys.bbf_is_shared_schema(np.nspname) = TRUE THEN np.nspname
WHEN r.rolname LIKE CONCAT(sys.db_name(),'%') THEN
WHEN r.rolname LIKE PG_CATALOG.CONCAT(sys.db_name(),'%') THEN
CASE WHEN RIGHT(r.rolname, LENGTH(r.rolname) - LENGTH(sys.db_name()) - 1) = 'db_owner' THEN 'dbo'
ELSE RIGHT(r.rolname, LENGTH(r.rolname) - LENGTH(sys.db_name()) - 1) END ELSE 'dbo' END
AS sys.nvarchar(128)) AS "SCHEMA_OWNER",
Expand Down
578 changes: 289 additions & 289 deletions contrib/babelfishpg_tsql/sql/sys_function_helpers.sql

Large diffs are not rendered by default.

116 changes: 113 additions & 3 deletions contrib/babelfishpg_tsql/sql/sys_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,7 @@ BEGIN
ELSE '-'
END
);
v_string := CONCAT(v_resdatetime::pg_catalog.text,v_sign,abs(p_hour_offset)::SMALLINT::text,':',
v_string := PG_CATALOG.CONCAT(v_resdatetime::pg_catalog.text,v_sign,abs(p_hour_offset)::SMALLINT::text,':',
abs(p_minute_offset)::SMALLINT::text);
RETURN CAST(v_string AS sys.DATETIMEOFFSET);
EXCEPTION
Expand Down Expand Up @@ -2804,7 +2804,7 @@ BEGIN
END IF;

-- Surround with double-quotes to handle names that contain periods/spaces
qualified_name := concat('"', pg_schema, '"."', object_name, '"');
qualified_name := PG_CATALOG.concat('"', pg_schema, '"."', object_name, '"');

SELECT oid INTO namespace_id FROM pg_catalog.pg_namespace WHERE nspname = pg_schema COLLATE sys.database_default;

Expand Down Expand Up @@ -3817,6 +3817,84 @@ BEGIN
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE;

-- wrapper functions for concat --
CREATE OR REPLACE FUNCTION sys.concat(VARIADIC args sys.VARCHAR[] DEFAULT '{}')
RETURNS sys.VARCHAR
AS $$
DECLARE
arr_len INTEGER;
BEGIN
arr_len := array_length(args, 1);

-- PG has limitation for max number of args = 100
IF arr_len IS NULL OR arr_len < 2 OR arr_len > 100 THEN
RAISE EXCEPTION 'The concat function requires 2 to 100 arguments.';
END IF;

RETURN (PG_CATALOG.ARRAY_TO_STRING(args, ''));
END;
$$ LANGUAGE plpgsql STABLE PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.concat(VARIADIC args sys.NVARCHAR[])
RETURNS sys.NVARCHAR
AS $$
DECLARE
arr_len INTEGER;
BEGIN
arr_len := array_length(args, 1);

-- PG has limitation for max number of args = 100
IF arr_len < 2 OR arr_len > 100 THEN
RAISE EXCEPTION 'The concat function requires 2 to 100 arguments.';
END IF;

RETURN (PG_CATALOG.ARRAY_TO_STRING(args, ''));
END;
$$ LANGUAGE plpgsql STABLE PARALLEL SAFE;

-- wrapper functions for concat_ws --
CREATE OR REPLACE FUNCTION sys.concat_ws(seperator sys.VARCHAR DEFAULT '', VARIADIC args sys.VARCHAR[] DEFAULT '{}')
RETURNS sys.VARCHAR
AS $$
DECLARE
arr_len INTEGER;
BEGIN
arr_len := array_length(args, 1);

-- PG has limitation for max number of args = 100
IF arr_len IS NULL OR arr_len < 2 OR arr_len > 99 THEN
RAISE EXCEPTION 'The concat_ws function requires 3 to 100 arguments.';
END IF;

IF seperator IS NULL THEN
RETURN (PG_CATALOG.ARRAY_TO_STRING(args, ''));
END IF;

RETURN (PG_CATALOG.ARRAY_TO_STRING(args, seperator));
END;
$$ LANGUAGE plpgsql STABLE PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.concat_ws(seperator sys.NVARCHAR, VARIADIC args sys.NVARCHAR[])
RETURNS sys.NVARCHAR
AS $$
DECLARE
arr_len INTEGER;
BEGIN
arr_len := array_length(args, 1);

-- PG has limitation for max number of args = 100
IF arr_len < 2 OR arr_len > 99 THEN
RAISE EXCEPTION 'The concat_ws function requires 3 to 100 arguments.';
END IF;

IF seperator IS NULL THEN
RETURN (PG_CATALOG.ARRAY_TO_STRING(args, ''));
END IF;

RETURN (PG_CATALOG.ARRAY_TO_STRING(args, seperator));
END;
$$ LANGUAGE plpgsql STABLE PARALLEL SAFE;

-- For getting host os from PG_VERSION_STR
CREATE OR REPLACE FUNCTION sys.get_host_os()
RETURNS sys.NVARCHAR
Expand Down Expand Up @@ -3964,7 +4042,7 @@ BEGIN
-- To convert input jsonpath to the required jsonb_path format
json_path_convert = regexp_replace(json_path, '\$\.|]|\$\[' , '' , 'ig'); -- To remove "$." and "]" sign from the string
json_path_convert = regexp_replace(json_path_convert, '\.|\[' , ',' , 'ig'); -- To replace "." and "[" with "," to change into required format
new_jsonb_path = CONCAT('{',json_path_convert,'}'); -- Final required format of path by jsonb_set
new_jsonb_path = PG_CATALOG.CONCAT('{',json_path_convert,'}'); -- Final required format of path by jsonb_set

key_exists = jsonb_path_exists(json_expression,json_path::jsonpath); -- To check if key exist in the given path

Expand Down Expand Up @@ -4477,6 +4555,38 @@ RETURNS INT AS
'babelfishpg_tsql', 'type_id'
LANGUAGE C STABLE;

-- wrapper functions for reverse
CREATE OR REPLACE FUNCTION sys.reverse(string ANYELEMENT)
RETURNS sys.VARCHAR
AS
$BODY$
DECLARE
string_arg_datatype text;
string_arg_typeid oid;
string_basetype oid;
BEGIN
string_arg_typeid := pg_typeof(string)::oid;
string_arg_datatype := sys.translate_pg_type_to_tsql(string_arg_typeid);
IF string_arg_datatype IS NULL THEN
-- for User Defined Datatype, use immediate base type to check for argument datatype validation
string_basetype := sys.bbf_get_immediate_base_type_of_UDT(string_arg_typeid);
string_arg_datatype := sys.translate_pg_type_to_tsql(string_basetype);
END IF;

-- restricting arguments with invalid datatypes for reverse function
IF string_arg_datatype IN ('image', 'sql_variant', 'xml', 'geometry', 'geography') THEN
RAISE EXCEPTION 'Argument data type % is invalid for argument 1 of reverse function.', string_arg_datatype;
END IF;

IF string IS NULL THEN
RETURN NULL;
END IF;

RETURN PG_CATALOG.reverse(string::sys.varchar);
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.reverse(string sys.NCHAR)
RETURNS sys.NVARCHAR
AS
Expand Down
2 changes: 1 addition & 1 deletion contrib/babelfishpg_tsql/sql/sys_procedures.sql
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ BEGIN
IF lower("@option_name") like 'babelfishpg_tsql.%' collate "C" THEN
SELECT "@option_name" INTO normalized_name;
ELSE
SELECT concat('babelfishpg_tsql.',"@option_name") INTO normalized_name;
SELECT pg_catalog.concat('babelfishpg_tsql.',"@option_name") INTO normalized_name;
END IF;

IF lower("@option_scope") = 'server' THEN
Expand Down
24 changes: 12 additions & 12 deletions contrib/babelfishpg_tsql/src/hooks.c
Original file line number Diff line number Diff line change
Expand Up @@ -2112,24 +2112,24 @@ pltsql_report_proc_not_found_error(List *names, List *fargs, List *given_argname
{
const char *arg_str = (max_nargs < 2) ? "argument" : "arguments";

if (!proc_call)
{
/* deconstruct the names list */
DeconstructQualifiedName(names, &schemaname, &funcname);

/*
* Check whether function is an special function or not, and
* report appropriate error if applicable
*/
validate_special_function(schemaname, funcname, fargs, nargs, input_typeids, found);
}

/*
* Found the proc/func having the same number of arguments. possibly
* data-type mistmatch.
*/
if (found)
{
if (!proc_call)
{
/* deconstruct the names list */
DeconstructQualifiedName(names, &schemaname, &funcname);

/*
* Check whether function is an special function or not, and
* report appropriate error if applicable
*/
validate_special_function(schemaname, funcname, fargs, nargs, input_typeids);
}

ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("The %s %s is found but cannot be used. Possibly due to datatype mismatch and implicit casting is not allowed.", obj_type, NameListToString(names))),
Expand Down
2 changes: 1 addition & 1 deletion contrib/babelfishpg_tsql/src/pltsql.h
Original file line number Diff line number Diff line change
Expand Up @@ -2132,7 +2132,7 @@ extern void exec_alter_role_cmd(char *query_str, RoleSpec *role);
/*
* Functions in pltsql_coerce.c
*/
extern bool validate_special_function(char *proc_nsname, char *proc_name, List* fargs, int nargs, Oid *input_typeids);
extern bool validate_special_function(char *proc_nsname, char *proc_name, List* fargs, int nargs, Oid *input_typeids, bool num_args_match);
extern void init_special_function_list(void);

#endif /* PLTSQL_H */
Loading

0 comments on commit 28605cb

Please sign in to comment.