From cb6592cc3058c7356557f331642090d5644baeeb Mon Sep 17 00:00:00 2001 From: Rohit bhagat <65351720+rohit01010@users.noreply.github.com> Date: Mon, 8 Jan 2024 10:56:53 +0530 Subject: [PATCH] Fixed blankspace padding in char datatype for multibyte characters (#2167) Currently when multi-byte character is casted to char(1) space padding was not done properly. This PR will fix this issue. Task: BABEL-4638 Signed-off-by: Rohit Bhagat --- contrib/babelfishpg_common/src/varchar.c | 1 + test/JDBC/expected/BABEL-4638-vu-cleanup.out | 16 + test/JDBC/expected/BABEL-4638-vu-prepare.out | 37 ++ test/JDBC/expected/BABEL-4638-vu-verify.out | 451 +++++++++++++++++++ test/JDBC/input/BABEL-4638-vu-cleanup.sql | 16 + test/JDBC/input/BABEL-4638-vu-prepare.sql | 27 ++ test/JDBC/input/BABEL-4638-vu-verify.sql | 144 ++++++ 7 files changed, 692 insertions(+) create mode 100644 test/JDBC/expected/BABEL-4638-vu-cleanup.out create mode 100644 test/JDBC/expected/BABEL-4638-vu-prepare.out create mode 100644 test/JDBC/expected/BABEL-4638-vu-verify.out create mode 100644 test/JDBC/input/BABEL-4638-vu-cleanup.sql create mode 100644 test/JDBC/input/BABEL-4638-vu-prepare.sql create mode 100644 test/JDBC/input/BABEL-4638-vu-verify.sql diff --git a/contrib/babelfishpg_common/src/varchar.c b/contrib/babelfishpg_common/src/varchar.c index f9e7d880c23..ecc0b55daa2 100644 --- a/contrib/babelfishpg_common/src/varchar.c +++ b/contrib/babelfishpg_common/src/varchar.c @@ -1133,6 +1133,7 @@ bpchar(PG_FUNCTION_ARGS) size_t maxmblen; maxmblen = pg_encoding_mbcliplen(collInfo.enc, tmp, byteLen, maxByteLen); + blankSpace = maxByteLen - maxmblen; if (!isExplicit && !(suppress_string_truncation_error_hook && (*suppress_string_truncation_error_hook) ())) diff --git a/test/JDBC/expected/BABEL-4638-vu-cleanup.out b/test/JDBC/expected/BABEL-4638-vu-cleanup.out new file mode 100644 index 00000000000..5a3b7dec2fc --- /dev/null +++ b/test/JDBC/expected/BABEL-4638-vu-cleanup.out @@ -0,0 +1,16 @@ +DROP TABLE babel_4638_t1 +DROP TABLE babel_4638_t2 +DROP TABLE babel_4638_t3 +DROP TABLE babel_4638_t4 +DROP TABLE babel_4638_t5 +DROP TABLE babel_4638_char_t1 +DROP TABLE babel_4638_char_t2 +DROP TABLE babel_4638_char_t3 +DROP TABLE babel_4638_char_t4 +DROP TABLE babel_4638_char_t5 +DROP TABLE babel_4638_nchar_t1 +DROP TABLE babel_4638_nchar_t2 +DROP TABLE babel_4638_nchar_t3 +DROP TABLE babel_4638_nchar_t4 +DROP TABLE babel_4638_nchar_t5 +GO diff --git a/test/JDBC/expected/BABEL-4638-vu-prepare.out b/test/JDBC/expected/BABEL-4638-vu-prepare.out new file mode 100644 index 00000000000..7b622ef33e2 --- /dev/null +++ b/test/JDBC/expected/BABEL-4638-vu-prepare.out @@ -0,0 +1,37 @@ +CREATE TABLE babel_4638_t1(a VARCHAR(10) COLLATE arabic_ci_as); +CREATE TABLE babel_4638_t2(a VARCHAR(10) COLLATE chinese_prc_ci_as); +CREATE TABLE babel_4638_t3(a VARCHAR(10) COLLATE japanese_ci_as); +CREATE TABLE babel_4638_t4(a VARCHAR(10) COLLATE hebrew_ci_as); +CREATE TABLE babel_4638_t5(a VARCHAR(10)); +GO + +CREATE TABLE babel_4638_char_t1(a CHAR(10) COLLATE arabic_ci_as); +CREATE TABLE babel_4638_char_t2(a CHAR(10) COLLATE chinese_prc_ci_as); +CREATE TABLE babel_4638_char_t3(a CHAR(10) COLLATE japanese_ci_as); +CREATE TABLE babel_4638_char_t4(a CHAR(10) COLLATE hebrew_ci_as); +CREATE TABLE babel_4638_char_t5(a CHAR(10)); +GO + +CREATE TABLE babel_4638_nchar_t1(a NCHAR(10) COLLATE arabic_ci_as); +CREATE TABLE babel_4638_nchar_t2(a NCHAR(10) COLLATE chinese_prc_ci_as); +CREATE TABLE babel_4638_nchar_t3(a NCHAR(10) COLLATE japanese_ci_as); +CREATE TABLE babel_4638_nchar_t4(a NCHAR(10) COLLATE hebrew_ci_as); +CREATE TABLE babel_4638_nchar_t5(a NCHAR(10)); +GO + +INSERT INTO babel_4638_t1 VALUES('ح'), ('غ'), ('سسس'), ('للل'); +INSERT INTO babel_4638_t2 VALUES('五'), ('九'), ('乙乙乙'), ('魚魚魚'); +INSERT INTO babel_4638_t3 VALUES('あ'), ('九'), ('ちちち'), ('さささ'); +INSERT INTO babel_4638_t4 VALUES('ב'), ('א'), ('קקק'), ('מממ'); +INSERT INTO babel_4638_t5 VALUES('a'), ('🙂'), ('🙂🙂🙂'), ('さささ'); +GO +~~ROW COUNT: 4~~ + +~~ROW COUNT: 4~~ + +~~ROW COUNT: 4~~ + +~~ROW COUNT: 4~~ + +~~ROW COUNT: 4~~ + diff --git a/test/JDBC/expected/BABEL-4638-vu-verify.out b/test/JDBC/expected/BABEL-4638-vu-verify.out new file mode 100644 index 00000000000..7a95b57a9ee --- /dev/null +++ b/test/JDBC/expected/BABEL-4638-vu-verify.out @@ -0,0 +1,451 @@ +-- arabic_ci_as +SELECT * FROM babel_4638_t1 +GO +~~START~~ +varchar +ح +غ +سسس +للل +~~END~~ + + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t1 +GO +~~START~~ +varchar +ح| +غ| +س| +ل| +~~END~~ + + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t1 +GO +~~START~~ +varchar +ح | +غ | +سسس | +للل | +~~END~~ + + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t1 +GO +~~START~~ +nvarchar +ح| +غ| +س| +ل| +~~END~~ + + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t1 +GO +~~START~~ +nvarchar +ح | +غ | +سسس | +للل | +~~END~~ + + +INSERT INTO babel_4638_char_t1 VALUES('ح'), ('غ'), ('سسس'), ('للل') +INSERT INTO babel_4638_nchar_t1 VALUES('ح'), ('غ'), ('سسس'), ('للل') +GO +~~ROW COUNT: 4~~ + +~~ROW COUNT: 4~~ + + +-- here a is defined as CHAR(10) COLLATE arabic_ci_as +SELECT a + '|' FROM babel_4638_char_t1 +GO +~~START~~ +varchar +ح | +غ | +سسس | +للل | +~~END~~ + + +-- here a is defined as NCHAR(10) COLLATE arabic_ci_as +SELECT a + '|' FROM babel_4638_nchar_t1 +GO +~~START~~ +nvarchar +ح | +غ | +سسس | +للل | +~~END~~ + + +-- chinese_prc_ci_as +SELECT * FROM babel_4638_t2 +GO +~~START~~ +varchar +五 +九 +乙乙乙 +魚魚魚 +~~END~~ + + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t2 +GO +~~START~~ +varchar + | + | + | + | +~~END~~ + + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t2 +GO +~~START~~ +varchar +五 | +九 | +乙乙 | +魚魚 | +~~END~~ + + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t2 +GO +~~START~~ +nvarchar +五| +九| +乙| +魚| +~~END~~ + + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t2 +GO +~~START~~ +nvarchar +五 | +九 | +乙乙乙 | +魚魚魚 | +~~END~~ + + +INSERT INTO babel_4638_char_t2 VALUES('五'), ('九'), ('乙乙乙'), ('魚魚魚') +INSERT INTO babel_4638_nchar_t2 VALUES('五'), ('九'), ('乙乙乙'), ('魚魚魚') +GO +~~ROW COUNT: 4~~ + +~~ROW COUNT: 4~~ + + +-- here a is defined as CHAR(10) COLLATE chinese_prc_ci_as +SELECT a + '|' FROM babel_4638_char_t2 +GO +~~START~~ +varchar +五 | +九 | +乙乙乙 | +魚魚魚 | +~~END~~ + + +-- here a is defined as NCHAR(10) COLLATE chinese_prc_ci_as +SELECT a + '|' FROM babel_4638_nchar_t2 +GO +~~START~~ +nvarchar +五 | +九 | +乙乙乙 | +魚魚魚 | +~~END~~ + + +-- japanese_ci_as +SELECT * FROM babel_4638_t3 +GO +~~START~~ +varchar +あ +九 +ちちち +さささ +~~END~~ + + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t3 +GO +~~START~~ +varchar + | + | + | + | +~~END~~ + + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t3 +GO +~~START~~ +varchar +あ | +九 | +ちち | +ささ | +~~END~~ + + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t3 +GO +~~START~~ +nvarchar +あ| +九| +ち| +さ| +~~END~~ + + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t3 +GO +~~START~~ +nvarchar +あ | +九 | +ちちち | +さささ | +~~END~~ + + +INSERT INTO babel_4638_char_t3 VALUES('あ'), ('九'), ('ちちち'), ('さささ') +INSERT INTO babel_4638_nchar_t3 VALUES('あ'), ('九'), ('ちちち'), ('さささ') +GO +~~ROW COUNT: 4~~ + +~~ROW COUNT: 4~~ + + +-- here a is defined as CHAR(10) COLLATE japanese_ci_as +SELECT a + '|' FROM babel_4638_char_t3 +GO +~~START~~ +varchar +あ | +九 | +ちちち | +さささ | +~~END~~ + + +-- here a is defined as NCHAR(10) COLLATE japanese_ci_as +SELECT a + '|' FROM babel_4638_nchar_t3 +GO +~~START~~ +nvarchar +あ | +九 | +ちちち | +さささ | +~~END~~ + + +-- hebrew_ci_as +SELECT * FROM babel_4638_t4 +GO +~~START~~ +varchar +ב +א +קקק +מממ +~~END~~ + + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t4 +GO +~~START~~ +varchar +ב| +א| +ק| +מ| +~~END~~ + + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t4 +GO +~~START~~ +varchar +ב | +א | +קקק | +מממ | +~~END~~ + + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t4 +GO +~~START~~ +nvarchar +ב| +א| +ק| +מ| +~~END~~ + + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t4 +GO +~~START~~ +nvarchar +ב | +א | +קקק | +מממ | +~~END~~ + + +INSERT INTO babel_4638_char_t4 VALUES('ב'), ('א'), ('קקק'), ('מממ'); +INSERT INTO babel_4638_nchar_t4 VALUES('ב'), ('א'), ('קקק'), ('מממ'); +GO +~~ROW COUNT: 4~~ + +~~ROW COUNT: 4~~ + + +-- here a is defined as CHAR(10) COLLATE hebrew_ci_as +SELECT a + '|' FROM babel_4638_char_t4 +GO +~~START~~ +varchar +ב | +א | +קקק | +מממ | +~~END~~ + + +-- here a is defined as NCHAR(10) COLLATE hebrew_ci_as +SELECT a + '|' FROM babel_4638_nchar_t4 +GO +~~START~~ +nvarchar +ב | +א | +קקק | +מממ | +~~END~~ + + + +-- Default +SELECT * FROM babel_4638_t5 +GO +~~START~~ +varchar +a +? +??? +??? +~~END~~ + + +SELECT CAST(a as NVARCHAR(10)) FROM babel_4638_t5 +GO +~~START~~ +nvarchar +a +🙂 +🙂🙂🙂 +さささ +~~END~~ + + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t5 +GO +~~START~~ +varchar +a| +?| +?| +?| +~~END~~ + + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t5 +GO +~~START~~ +varchar +a | +? | +??? | +??? | +~~END~~ + + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t5 +GO +~~START~~ +nvarchar +a| +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: value too long for type character(1) as UTF16 output)~~ + + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t5 +GO +~~START~~ +nvarchar +a | +🙂 | +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: value too long for type character(5) as UTF16 output)~~ + + +INSERT INTO babel_4638_char_t5 VALUES('a'), ('🙂'), ('🙂🙂🙂'), ('さささ'); +INSERT INTO babel_4638_nchar_t5 VALUES('a'), ('🙂'), ('🙂🙂🙂'), ('さささ'); +GO +~~ROW COUNT: 4~~ + +~~ROW COUNT: 4~~ + + +-- here a is defined as CHAR(10) +SELECT a + '|' FROM babel_4638_char_t5 +GO +~~START~~ +varchar +a | +? | +??? | +??? | +~~END~~ + + +-- here a is defined as NCHAR(10) +SELECT a + '|' FROM babel_4638_nchar_t5 +GO +~~START~~ +nvarchar +a | +🙂 | +🙂🙂🙂 | +さささ | +~~END~~ + + diff --git a/test/JDBC/input/BABEL-4638-vu-cleanup.sql b/test/JDBC/input/BABEL-4638-vu-cleanup.sql new file mode 100644 index 00000000000..089aa75c011 --- /dev/null +++ b/test/JDBC/input/BABEL-4638-vu-cleanup.sql @@ -0,0 +1,16 @@ +DROP TABLE babel_4638_t1 +DROP TABLE babel_4638_t2 +DROP TABLE babel_4638_t3 +DROP TABLE babel_4638_t4 +DROP TABLE babel_4638_t5 +DROP TABLE babel_4638_char_t1 +DROP TABLE babel_4638_char_t2 +DROP TABLE babel_4638_char_t3 +DROP TABLE babel_4638_char_t4 +DROP TABLE babel_4638_char_t5 +DROP TABLE babel_4638_nchar_t1 +DROP TABLE babel_4638_nchar_t2 +DROP TABLE babel_4638_nchar_t3 +DROP TABLE babel_4638_nchar_t4 +DROP TABLE babel_4638_nchar_t5 +GO \ No newline at end of file diff --git a/test/JDBC/input/BABEL-4638-vu-prepare.sql b/test/JDBC/input/BABEL-4638-vu-prepare.sql new file mode 100644 index 00000000000..09e485e9ecf --- /dev/null +++ b/test/JDBC/input/BABEL-4638-vu-prepare.sql @@ -0,0 +1,27 @@ +CREATE TABLE babel_4638_t1(a VARCHAR(10) COLLATE arabic_ci_as); +CREATE TABLE babel_4638_t2(a VARCHAR(10) COLLATE chinese_prc_ci_as); +CREATE TABLE babel_4638_t3(a VARCHAR(10) COLLATE japanese_ci_as); +CREATE TABLE babel_4638_t4(a VARCHAR(10) COLLATE hebrew_ci_as); +CREATE TABLE babel_4638_t5(a VARCHAR(10)); +GO + +CREATE TABLE babel_4638_char_t1(a CHAR(10) COLLATE arabic_ci_as); +CREATE TABLE babel_4638_char_t2(a CHAR(10) COLLATE chinese_prc_ci_as); +CREATE TABLE babel_4638_char_t3(a CHAR(10) COLLATE japanese_ci_as); +CREATE TABLE babel_4638_char_t4(a CHAR(10) COLLATE hebrew_ci_as); +CREATE TABLE babel_4638_char_t5(a CHAR(10)); +GO + +CREATE TABLE babel_4638_nchar_t1(a NCHAR(10) COLLATE arabic_ci_as); +CREATE TABLE babel_4638_nchar_t2(a NCHAR(10) COLLATE chinese_prc_ci_as); +CREATE TABLE babel_4638_nchar_t3(a NCHAR(10) COLLATE japanese_ci_as); +CREATE TABLE babel_4638_nchar_t4(a NCHAR(10) COLLATE hebrew_ci_as); +CREATE TABLE babel_4638_nchar_t5(a NCHAR(10)); +GO + +INSERT INTO babel_4638_t1 VALUES('ح'), ('غ'), ('سسس'), ('للل'); +INSERT INTO babel_4638_t2 VALUES('五'), ('九'), ('乙乙乙'), ('魚魚魚'); +INSERT INTO babel_4638_t3 VALUES('あ'), ('九'), ('ちちち'), ('さささ'); +INSERT INTO babel_4638_t4 VALUES('ב'), ('א'), ('קקק'), ('מממ'); +INSERT INTO babel_4638_t5 VALUES('a'), ('🙂'), ('🙂🙂🙂'), ('さささ'); +GO diff --git a/test/JDBC/input/BABEL-4638-vu-verify.sql b/test/JDBC/input/BABEL-4638-vu-verify.sql new file mode 100644 index 00000000000..6dd7bf778ed --- /dev/null +++ b/test/JDBC/input/BABEL-4638-vu-verify.sql @@ -0,0 +1,144 @@ +-- arabic_ci_as +SELECT * FROM babel_4638_t1 +GO + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t1 +GO + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t1 +GO + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t1 +GO + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t1 +GO + +INSERT INTO babel_4638_char_t1 VALUES('ح'), ('غ'), ('سسس'), ('للل') +INSERT INTO babel_4638_nchar_t1 VALUES('ح'), ('غ'), ('سسس'), ('للل') +GO + +-- here a is defined as CHAR(10) COLLATE arabic_ci_as +SELECT a + '|' FROM babel_4638_char_t1 +GO + +-- here a is defined as NCHAR(10) COLLATE arabic_ci_as +SELECT a + '|' FROM babel_4638_nchar_t1 +GO + +-- chinese_prc_ci_as +SELECT * FROM babel_4638_t2 +GO + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t2 +GO + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t2 +GO + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t2 +GO + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t2 +GO + +INSERT INTO babel_4638_char_t2 VALUES('五'), ('九'), ('乙乙乙'), ('魚魚魚') +INSERT INTO babel_4638_nchar_t2 VALUES('五'), ('九'), ('乙乙乙'), ('魚魚魚') +GO + +-- here a is defined as CHAR(10) COLLATE chinese_prc_ci_as +SELECT a + '|' FROM babel_4638_char_t2 +GO + +-- here a is defined as NCHAR(10) COLLATE chinese_prc_ci_as +SELECT a + '|' FROM babel_4638_nchar_t2 +GO + +-- japanese_ci_as +SELECT * FROM babel_4638_t3 +GO + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t3 +GO + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t3 +GO + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t3 +GO + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t3 +GO + +INSERT INTO babel_4638_char_t3 VALUES('あ'), ('九'), ('ちちち'), ('さささ') +INSERT INTO babel_4638_nchar_t3 VALUES('あ'), ('九'), ('ちちち'), ('さささ') +GO + +-- here a is defined as CHAR(10) COLLATE japanese_ci_as +SELECT a + '|' FROM babel_4638_char_t3 +GO + +-- here a is defined as NCHAR(10) COLLATE japanese_ci_as +SELECT a + '|' FROM babel_4638_nchar_t3 +GO + +-- hebrew_ci_as +SELECT * FROM babel_4638_t4 +GO + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t4 +GO + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t4 +GO + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t4 +GO + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t4 +GO + +INSERT INTO babel_4638_char_t4 VALUES('ב'), ('א'), ('קקק'), ('מממ'); +INSERT INTO babel_4638_nchar_t4 VALUES('ב'), ('א'), ('קקק'), ('מממ'); +GO + +-- here a is defined as CHAR(10) COLLATE hebrew_ci_as +SELECT a + '|' FROM babel_4638_char_t4 +GO + +-- here a is defined as NCHAR(10) COLLATE hebrew_ci_as +SELECT a + '|' FROM babel_4638_nchar_t4 +GO + + +-- Default +SELECT * FROM babel_4638_t5 +GO + +SELECT CAST(a as NVARCHAR(10)) FROM babel_4638_t5 +GO + +SELECT CAST(a AS CHAR(1)) + '|' FROM babel_4638_t5 +GO + +SELECT CAST(a AS CHAR(5)) + '|' FROM babel_4638_t5 +GO + +SELECT CAST(a AS NCHAR(1)) + '|' FROM babel_4638_t5 +GO + +SELECT CAST(a AS NCHAR(5)) + '|' FROM babel_4638_t5 +GO + +INSERT INTO babel_4638_char_t5 VALUES('a'), ('🙂'), ('🙂🙂🙂'), ('さささ'); +INSERT INTO babel_4638_nchar_t5 VALUES('a'), ('🙂'), ('🙂🙂🙂'), ('さささ'); +GO + +-- here a is defined as CHAR(10) +SELECT a + '|' FROM babel_4638_char_t5 +GO + +-- here a is defined as NCHAR(10) +SELECT a + '|' FROM babel_4638_nchar_t5 +GO +