From 82ade618286889c8d21a73baeb94143fcce3b86c Mon Sep 17 00:00:00 2001 From: wyb Date: Tue, 6 Aug 2024 12:28:44 +0800 Subject: [PATCH] [Enhancement] Support json logical type in parquet files() (#49385) Signed-off-by: wyb (cherry picked from commit 914ca69eeb17bd275e1346c92b652cc37d23c7f0) --- be/src/exec/parquet_schema_builder.cpp | 2 + test/sql/test_files/R/json_parquet | 42 ++++++++++++++++++ test/sql/test_files/T/json_parquet | 18 ++++++++ .../test_files/parquet_format/json.parquet | Bin 0 -> 911 bytes 4 files changed, 62 insertions(+) create mode 100644 test/sql/test_files/R/json_parquet create mode 100644 test/sql/test_files/T/json_parquet create mode 100644 test/sql/test_files/parquet_format/json.parquet diff --git a/be/src/exec/parquet_schema_builder.cpp b/be/src/exec/parquet_schema_builder.cpp index 0a2186e3d9062..8a3ac6bc1f9ea 100644 --- a/be/src/exec/parquet_schema_builder.cpp +++ b/be/src/exec/parquet_schema_builder.cpp @@ -88,6 +88,8 @@ static Status get_parquet_type_from_primitive(const ::parquet::schema::NodePtr& auto decimal_logical_type = std::dynamic_pointer_cast(logical_type); *type_desc = TypeDescriptor::create_decimalv3_type(TYPE_DECIMAL128, decimal_logical_type->precision(), decimal_logical_type->scale()); + } else if (logical_type->is_JSON()) { + *type_desc = TypeDescriptor::create_json_type(); } else { *type_desc = TypeDescriptor::create_varbinary_type(TypeDescriptor::MAX_VARCHAR_LENGTH); } diff --git a/test/sql/test_files/R/json_parquet b/test/sql/test_files/R/json_parquet new file mode 100644 index 0000000000000..7b021596645d3 --- /dev/null +++ b/test/sql/test_files/R/json_parquet @@ -0,0 +1,42 @@ +-- name: test_json_parquet + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/parquet_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null + +shell: ossutil64 cp --force ./sql/test_files/parquet_format/json.parquet oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ | grep -Pv "(average|elapsed)" +-- result: +0 + +Succeed: Total num: 1, size: 911. OK num: 1(upload 1 files). +-- !result + +select * from files('path' = 'oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*', 'format' = 'parquet'); +-- result: +1 ["a", "b"] +2 ["c", "d"] +-- !result + +create table t1 as select * from files('path' = 'oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*', 'format' = 'parquet'); + +desc t1; +-- result: +id bigint YES true None +json_data json YES false None +-- !result + +create table t2 (id bigint, array_col array); + +insert into t2 select * from files('path' = 'oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*', 'format' = 'parquet'); +-- result: +-- !result + +select * from t2; +-- result: +1 ["a","b"] +2 ["c","d"] +-- !result + + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/T/json_parquet b/test/sql/test_files/T/json_parquet new file mode 100644 index 0000000000000..ad1faa49adb25 --- /dev/null +++ b/test/sql/test_files/T/json_parquet @@ -0,0 +1,18 @@ +-- name: test_json_parquet + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/parquet_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null +shell: ossutil64 cp --force ./sql/test_files/parquet_format/json.parquet oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ | grep -Pv "(average|elapsed)" + +select * from files('path' = 'oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*', 'format' = 'parquet'); + +create table t1 as select * from files('path' = 'oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*', 'format' = 'parquet'); +desc t1; + +create table t2 (id bigint, array_col array); +insert into t2 select * from files('path' = 'oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*', 'format' = 'parquet'); +select * from t2; + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/parquet_format/json.parquet b/test/sql/test_files/parquet_format/json.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a720e76ace1902dd8b561f1574e471fb2001da7f GIT binary patch literal 911 zcmaJ=O>5gg5FM$)xFy#W68In%V$$LmyC0zxU3%*=l-z?Zt(iHE>*&rVaH?ey>Jvx z#>)}$LIR~JO`#I0xVRHF{SsYn=u(QDCyEsUebwYSj$8=C z>=aLteqJ?2wNA{svOTm+Z$)P8syeF2L?Q14Kb6XeYoY0Offb#C=cuXV7UI9uyL_J- zvClp}!`#KUljyJc=;MK2=IWL8DPNv;H>DQLlp2?_p}9;o(`&n^C96%v^v_1L+gb@- z%Tn3QZ2EaRr>K{!Xkm;=W7 w^uzR1w>fR3K^=#ri!B824&