From 43296460ff4d7ec1c914e27ddbba5a63c197fb00 Mon Sep 17 00:00:00 2001 From: wyb Date: Thu, 8 Aug 2024 05:45:37 +0800 Subject: [PATCH] [BugFix] Fix orc tinyint on aarch64 (#49517) Why I'm doing: https://developer.arm.com/documentation/den0013/d/Porting/Miscellaneous-C-porting-issues/unsigned-char-and-signed-char compiler treat char as unsigned on aarch64, negative tinyint will be wrong, and the query result from orc is null. What I'm doing: use int8_t not char to cast Signed-off-by: wyb (cherry picked from commit 6de3ab7f7199e5f2cf1eaa3e95088245a4cda49d) --- .../orc/apache-orc/c++/src/ColumnReader.cc | 4 ++- test/sql/test_files/R/orc_tinyint_aarch64 | 27 ++++++++++++++++++ test/sql/test_files/T/orc_tinyint_aarch64 | 14 +++++++++ test/sql/test_files/orc_format/tinyint.orc | Bin 0 -> 247 bytes 4 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 test/sql/test_files/R/orc_tinyint_aarch64 create mode 100644 test/sql/test_files/T/orc_tinyint_aarch64 create mode 100644 test/sql/test_files/orc_format/tinyint.orc diff --git a/be/src/formats/orc/apache-orc/c++/src/ColumnReader.cc b/be/src/formats/orc/apache-orc/c++/src/ColumnReader.cc index 35f35af941f05..50362043a561f 100644 --- a/be/src/formats/orc/apache-orc/c++/src/ColumnReader.cc +++ b/be/src/formats/orc/apache-orc/c++/src/ColumnReader.cc @@ -141,7 +141,9 @@ void ColumnReader::seekToRowGroup(PositionProviderMap* positions) { */ void expandBytesToLongs(int64_t* buffer, uint64_t numValues) { for (size_t i = numValues - 1; i < numValues; --i) { - buffer[i] = reinterpret_cast(buffer)[i]; + // compiler treat char as unsigned on aarch64, negative tinyint will be wrong, + // so use 'int8_t' not 'char' to cast + buffer[i] = reinterpret_cast(buffer)[i]; } } diff --git a/test/sql/test_files/R/orc_tinyint_aarch64 b/test/sql/test_files/R/orc_tinyint_aarch64 new file mode 100644 index 0000000000000..21c33d70ab0a0 --- /dev/null +++ b/test/sql/test_files/R/orc_tinyint_aarch64 @@ -0,0 +1,27 @@ +-- name: orc_tinyint_aarch64 + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/orc_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null + +shell: ossutil64 cp --force ./sql/test_files/orc_format/tinyint.orc oss://${oss_bucket}/test_files/orc_format/${uuid0}/ | grep -Pv "(average|elapsed)" +-- result: +0 + +Succeed: Total num: 1, size: 247. OK num: 1(upload 1 files). +-- !result + +select * from files('path' = 'oss://${oss_bucket}/test_files/orc_format/${uuid0}/*', 'format' = 'orc'); +-- result: +-128 +-- !result + +create table t1 as select * from files('path' = 'oss://${oss_bucket}/test_files/orc_format/${uuid0}/*', 'format' = 'orc'); + +desc t1; +-- result: +k1 tinyint YES true None +-- !result + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/orc_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/T/orc_tinyint_aarch64 b/test/sql/test_files/T/orc_tinyint_aarch64 new file mode 100644 index 0000000000000..c59259b8608f4 --- /dev/null +++ b/test/sql/test_files/T/orc_tinyint_aarch64 @@ -0,0 +1,14 @@ +-- name: orc_tinyint_aarch64 + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/orc_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null +shell: ossutil64 cp --force ./sql/test_files/orc_format/tinyint.orc oss://${oss_bucket}/test_files/orc_format/${uuid0}/ | grep -Pv "(average|elapsed)" + +select * from files('path' = 'oss://${oss_bucket}/test_files/orc_format/${uuid0}/*', 'format' = 'orc'); + +create table t1 as select * from files('path' = 'oss://${oss_bucket}/test_files/orc_format/${uuid0}/*', 'format' = 'orc'); +desc t1; + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/orc_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/orc_format/tinyint.orc b/test/sql/test_files/orc_format/tinyint.orc new file mode 100644 index 0000000000000000000000000000000000000000..dc435b12d46d8264d4bea95a9cb11f4b0f95aa7f GIT binary patch literal 247 zcmZ9Gy$ZrG6ov0iOSuNY5Rk?$N~bDVszrxRf`cOXgZKhkDvF?xisBZ00w300i``t# zcb4