From 3deca380798a44822ce940c78fac32d7c08fd522 Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Fri, 22 Nov 2024 16:57:48 +0800 Subject: [PATCH] [Enhancement](jdbc catalog) Support reading some array types of PostgreSQL --- .../postgresql/init/02-create-table.sql | 88 ++++++++ .../postgresql/init/04-insert.sql | 210 ++++++++++++++++++ .../doris/jdbc/PostgreSQLJdbcExecutor.java | 85 ++++++- .../java/org/apache/doris/common/Config.java | 2 +- .../jdbc/client/JdbcPostgreSQLClient.java | 91 +++++++- .../datasource/jdbc/util/JdbcFieldSchema.java | 15 ++ .../jdbc/test_pg_jdbc_catalog.out | 11 + .../jdbc/test_pg_jdbc_catalog.groovy | 5 + 8 files changed, 503 insertions(+), 4 deletions(-) diff --git a/docker/thirdparties/docker-compose/postgresql/init/02-create-table.sql b/docker/thirdparties/docker-compose/postgresql/init/02-create-table.sql index 7983d55a018917..bafdd615839e14 100644 --- a/docker/thirdparties/docker-compose/postgresql/init/02-create-table.sql +++ b/docker/thirdparties/docker-compose/postgresql/init/02-create-table.sql @@ -263,3 +263,91 @@ CREATE TABLE catalog_pg_test.num_zero ( id varchar(20) NULL, num numeric NULL ); + +CREATE TABLE catalog_pg_test.test_all_types_array ( + id int4 NOT NULL, + char_value _bpchar NULL, + varchar_value _varchar NULL, + date_value _date NULL, + smallint_value _int2 NULL, + int_value _int4 NULL, + bigint_value _int8 NULL, + timestamp_value _timestamp NULL, + decimal_value _numeric NULL, + bit_value _bit NULL, + real_value _float4 NULL, + cidr_value _cidr NULL, + inet_value _inet NULL, + macaddr_value _macaddr NULL, + bitn_value _bit NULL, + bitnv_value _varbit NULL, + jsonb_value _jsonb NULL, + point_value _point NULL, + line_value _line NULL, + lseg_value _lseg NULL, + box_value _box NULL, + path_value _path NULL, + polygon_value _polygon NULL, + circle_value _circle NULL +); + + +CREATE TABLE catalog_pg_test.test_all_types_2d_array ( + id int4 NOT NULL, + char_value _bpchar NULL, + varchar_value _varchar NULL, + date_value _date NULL, + smallint_value _int2 NULL, + int_value _int4 NULL, + bigint_value _int8 NULL, + timestamp_value _timestamp NULL, + decimal_value _numeric NULL, + bit_value _bit NULL, + real_value _float4 NULL, + cidr_value _cidr NULL, + inet_value _inet NULL, + macaddr_value _macaddr NULL, + bitn_value _bit NULL, + bitnv_value _varbit NULL, + jsonb_value _jsonb NULL, + point_value _point NULL, + line_value _line NULL, + lseg_value _lseg NULL, + box_value _box NULL, + path_value _path NULL, + polygon_value _polygon NULL, + circle_value _circle NULL +); + +CREATE TABLE catalog_pg_test.test_all_support_types_array ( + id int4 NOT NULL, + char_value bpchar[] NULL, + varchar_value varchar[] NULL, + text_value text[] null, + date_value date[] NULL, + smallint_value int2[] NULL, + int_value int4[] NULL, + bigint_value int8[] NULL, + timestamp_value timestamp[] NULL, + timestamptz_value timestamptz[] null, + bool_value bool[] NULL, + float4_value float4[] null, + float8_value float8[] null +); + +CREATE TABLE catalog_pg_test.test_all_support_types_array_2d ( + id int4 NOT NULL, + char_value bpchar[][] NULL, + varchar_value varchar[][] NULL, + text_value text[][] NULL, + date_value date[][] NULL, + smallint_value int2[][] NULL, + int_value int4[][] NULL, + bigint_value int8[][] NULL, + timestamp_value timestamp[][] NULL, + timestamptz_value timestamptz[][] NULL, + bool_value bool[][] NULL, + float4_value float4[][] NULL, + float8_value float8[][] NULL +); + diff --git a/docker/thirdparties/docker-compose/postgresql/init/04-insert.sql b/docker/thirdparties/docker-compose/postgresql/init/04-insert.sql index ae3570dfc186ce..b2a14148ead53e 100644 --- a/docker/thirdparties/docker-compose/postgresql/init/04-insert.sql +++ b/docker/thirdparties/docker-compose/postgresql/init/04-insert.sql @@ -2730,3 +2730,213 @@ values('1','A','2020-09-02 00:00:00') INSERT INTO catalog_pg_test.num_zero (id, num) VALUES ('001', 123.45); INSERT INTO catalog_pg_test.num_zero (id, num) VALUES ('002', 678.90); INSERT INTO catalog_pg_test.num_zero (id, num) VALUES ('003', 1011.12); + +INSERT INTO catalog_pg_test.test_all_types_2d_array VALUES +(1, + ARRAY[ARRAY['abc', 'def']::text[], ARRAY['ghi', 'jkl']::text[]], + ARRAY[ARRAY['abc', 'def']::text[], ARRAY['ghi', 'jkl']::text[]], + ARRAY[ARRAY['2022-10-11'::date, '2022-11-12'::date], ARRAY['2023-01-01'::date, '2023-02-02'::date]], + ARRAY[ARRAY[1, 2], ARRAY[3, 4]], + ARRAY[ARRAY[1, 2], ARRAY[2, 3]], + ARRAY[ARRAY[3, 4], ARRAY[4, 5]], + ARRAY[ARRAY['2022-10-22 10:59:59'::timestamp, '2023-01-01 12:00:00'::timestamp], + ARRAY['2023-02-01 14:30:00'::timestamp, '2023-02-10 16:45:00'::timestamp]], + ARRAY[ARRAY[34.123, 45.678], ARRAY[56.789, 67.890]], + ARRAY[ARRAY[0::bit, 1::bit], ARRAY[1::bit, 0::bit]], + ARRAY[ARRAY[12.123456, 34.567890], ARRAY[56.789123, 45.678345]], + ARRAY[ARRAY['10.16.10.14'::cidr, '192.168.1.1'::cidr], + ARRAY['172.16.0.1'::cidr, '192.168.2.2'::cidr]], + ARRAY[ARRAY['10.16.10.14'::inet, '192.168.1.1'::inet], + ARRAY['172.16.0.1'::inet, '192.168.2.2'::inet]], + ARRAY[ARRAY['ff:ff:ff:ff:ff:AA'::macaddr, '00:14:22:01:23:45'::macaddr], + ARRAY['11:22:33:44:55:66'::macaddr, '77:88:99:00:11:22'::macaddr]], + ARRAY[ARRAY['1010101010'::bit(10), '1100101010'::bit(10)], + ARRAY['1110001111'::bit(10), '0001110000'::bit(10)]], + ARRAY[ARRAY[cast(10 as bit(5)), cast(20 as bit(5))], + ARRAY[cast(30 as bit(5)), cast(40 as bit(5))]], + ARRAY[ARRAY['{"id":1}'::jsonb, '{"id":2}'::jsonb], + ARRAY['{"id":3}'::jsonb, '{"id":4}'::jsonb]], + ARRAY[ARRAY['(1,1)'::point, '(2,2)'::point], + ARRAY['(3,3)'::point, '(4,4)'::point]], + ARRAY[ARRAY['{1,1,1}'::line, '{2,2,2}'::line], + ARRAY['{3,3,3}'::line, '{4,4,4}'::line]], + ARRAY[ARRAY['(1,1),(2,2)'::lseg, '(3,3),(4,4)'::lseg], + ARRAY['(5,5),(6,6)'::lseg, '(7,7),(8,8)'::lseg]], + ARRAY[ARRAY['(1,1),(2,2)'::box, '(5,5),(6,6)'::box], + ARRAY['(7,7),(8,8)'::box, '(9,9),(10,10)'::box]], + ARRAY[ARRAY['(1,1),(2,2),(2,1)'::path, '(3,3),(4,4),(5,5)'::path], + ARRAY['(6,6),(7,7),(8,8)'::path, '(9,9),(10,10),(11,11)'::path]], + ARRAY[ARRAY['((1,1),(2,2),(2,1))'::polygon, '((3,3),(4,4),(5,5))'::polygon], + ARRAY['((6,6),(7,7),(8,8))'::polygon, '((9,9),(10,10),(11,11))'::polygon]], + ARRAY[ARRAY['<(0,0),1>'::circle, '<(5,5),10>'::circle], + ARRAY['<(2,2),3>'::circle, '<(7,7),14>'::circle]] +); + +INSERT INTO catalog_pg_test.test_all_types_array VALUES +(1, + ARRAY['abc', 'def'], + ARRAY['ghi', 'jkl'], + ARRAY['2022-10-11'::date, '2022-11-12'::date], + ARRAY[1, 2], + ARRAY[2, 3], + ARRAY[3, 4], + ARRAY['2022-10-22 10:59:59'::timestamp, '2023-01-01 12:00:00'::timestamp], + ARRAY[34.123, 45.678], + ARRAY[0::bit, 1::bit], + ARRAY[12.123456, 34.567890], + ARRAY['10.16.10.14'::cidr, '192.168.1.1'::cidr], + ARRAY['10.16.10.14'::inet, '192.168.0.1'::inet], + ARRAY['ff:ff:ff:ff:ff:AA'::macaddr, '00:14:22:01:23:45'::macaddr], + ARRAY['1010101010'::bit(10), '1100101010'::bit(10)], + ARRAY[cast('00010' as bit(5)), cast('10100' as bit(5))], -- bit(5) values need 5 bits + ARRAY['{"id":1}'::jsonb, '{"id":2}'::jsonb], + ARRAY['(1,1)'::point, '(2,2)'::point], + ARRAY['{1,1,1}'::line, '{2,2,2}'::line], + ARRAY['(1,1),(2,2)'::lseg, '(3,3),(4,4)'::lseg], + ARRAY['(1,1),(2,2)'::box, '(5,5),(6,6)'::box], + ARRAY['(1,1),(2,2),(2,1)'::path, '(3,3),(4,4),(5,5)'::path], + ARRAY['((1,1),(2,2),(2,1))'::polygon, '((3,3),(4,4),(5,5))'::polygon], + ARRAY['<(0,0),1>'::circle, '<(5,5),10>'::circle] +); + +INSERT INTO catalog_pg_test.test_all_support_types_array ( + id, char_value, varchar_value, text_value, date_value, + smallint_value, int_value, bigint_value, timestamp_value, + timestamptz_value, bool_value, float4_value, float8_value +) VALUES ( + 1, + ARRAY['A', 'B', 'C'], + ARRAY['Hello', 'World'], + ARRAY['Text1', 'Text2'], + ARRAY['2024-01-01'::date, '2024-01-02'::date], + ARRAY[10, 20], + ARRAY[100, 200], + ARRAY[1000, 2000], + ARRAY['2024-01-01 10:00:00'::timestamp, '2024-01-02 12:00:00'::timestamp], + ARRAY['2024-01-01 10:00:00+00'::timestamptz, '2024-01-02 12:00:00+00'::timestamptz], + ARRAY[true, false], + ARRAY[1.1, 2.2], + ARRAY[3.3, 4.4] +); + +INSERT INTO catalog_pg_test.test_all_support_types_array ( + id, char_value, varchar_value, text_value, date_value, + smallint_value, int_value, bigint_value, timestamp_value, + timestamptz_value, bool_value, float4_value, float8_value +) VALUES ( + 2, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null +); + +INSERT INTO catalog_pg_test.test_all_support_types_array ( + id, char_value, varchar_value, text_value, date_value, + smallint_value, int_value, bigint_value, timestamp_value, + timestamptz_value, bool_value, float4_value, float8_value +) VALUES ( + 3, + ARRAY['A', 'B', 'C', null], + ARRAY['Hello', 'World', null], + ARRAY['Text1', 'Text2', null], + ARRAY['2024-01-01'::date, '2024-01-02'::date, null], + ARRAY[10, 20, null], + ARRAY[100, 200, null], + ARRAY[1000, 2000, null], + ARRAY['2024-01-01 10:00:00'::timestamp, '2024-01-02 12:00:00'::timestamp, null], + ARRAY['2024-01-01 10:00:00+00'::timestamptz, '2024-01-02 12:00:00+00'::timestamptz, null], + ARRAY[true, false, null], + ARRAY[1.1, 2.2, null], + ARRAY[3.3, 4.4, null] +); + +INSERT INTO catalog_pg_test.test_all_support_types_array_2d ( + id, char_value, varchar_value, text_value, date_value, + smallint_value, int_value, bigint_value, timestamp_value, + timestamptz_value, bool_value, float4_value, float8_value +) VALUES ( + 1, + ARRAY[ARRAY['A', 'B'], ARRAY['C', 'D']], + ARRAY[ARRAY['Hello', 'World'], ARRAY['Foo', 'Bar']], + ARRAY[ARRAY['Text1', 'Text2'], ARRAY['Text3', 'Text4']], + ARRAY[ARRAY['2024-01-01'::date, '2024-01-02'::date], ARRAY['2024-02-01'::date, '2024-02-02'::date]], + ARRAY[ARRAY[10, 20], ARRAY[30, 40]], + ARRAY[ARRAY[100, 200], ARRAY[300, 400]], + ARRAY[ARRAY[1000, 2000], ARRAY[3000, 4000]], + ARRAY[ARRAY['2024-01-01 10:00:00'::timestamp, '2024-01-02 12:00:00'::timestamp], ARRAY['2024-03-01 10:00:00'::timestamp, '2024-03-02 12:00:00'::timestamp]], + ARRAY[ARRAY['2024-01-01 10:00:00+00'::timestamptz, '2024-01-02 12:00:00+00'::timestamptz], ARRAY['2024-03-01 10:00:00+00'::timestamptz, '2024-03-02 12:00:00+00'::timestamptz]], + ARRAY[ARRAY[true, false], ARRAY[false, true]], + ARRAY[ARRAY[1.1, 2.2], ARRAY[3.3, 4.4]], + ARRAY[ARRAY[5.5, 6.6], ARRAY[7.7, 8.8]] +); + +INSERT INTO catalog_pg_test.test_all_support_types_array_2d ( + id, char_value, varchar_value, text_value, date_value, + smallint_value, int_value, bigint_value, timestamp_value, + timestamptz_value, bool_value, float4_value, float8_value +) VALUES ( + 2, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null +); + +INSERT INTO catalog_pg_test.test_all_support_types_array_2d ( + id, char_value, varchar_value, text_value, date_value, + smallint_value, int_value, bigint_value, timestamp_value, + timestamptz_value, bool_value, float4_value, float8_value +) VALUES ( + 3, + ARRAY[ARRAY['A', null], ARRAY['C', 'D']], + ARRAY[ARRAY['Hello', null], ARRAY['Foo', 'Bar']], + ARRAY[ARRAY['Text1', null], ARRAY['Text3', 'Text4']], + ARRAY[ARRAY['2024-01-01'::date, null], ARRAY['2024-02-01'::date, '2024-02-02'::date]], + ARRAY[ARRAY[10, null], ARRAY[30, 40]], + ARRAY[ARRAY[100, null], ARRAY[300, 400]], + ARRAY[ARRAY[1000, null], ARRAY[3000, 4000]], + ARRAY[ARRAY['2024-01-01 10:00:00'::timestamp, null], ARRAY['2024-03-01 10:00:00'::timestamp, '2024-03-02 12:00:00'::timestamp]], + ARRAY[ARRAY['2024-01-01 10:00:00+00'::timestamptz, null], ARRAY['2024-03-01 10:00:00+00'::timestamptz, '2024-03-02 12:00:00+00'::timestamptz]], + ARRAY[ARRAY[true,null], ARRAY[false, true]], + ARRAY[ARRAY[1.1, null], ARRAY[3.3, 4.4]], + ARRAY[ARRAY[5.5, null], ARRAY[7.7, 8.8]] +); + + +INSERT INTO catalog_pg_test.test_all_support_types_array_2d ( + id, char_value, varchar_value, text_value, date_value, + smallint_value, int_value, bigint_value, timestamp_value, + timestamptz_value, bool_value, float4_value, float8_value +) VALUES ( + 4, + ARRAY[ARRAY['A', 'B'], ARRAY[NULL, NULL]], + ARRAY[ARRAY['Hello', 'World'], ARRAY[NULL, NULL]], + ARRAY[ARRAY['Text1', 'Text2'], ARRAY[NULL, NULL]], + ARRAY[ARRAY['2024-01-01'::date, '2024-01-02'::date], ARRAY[NULL::date, NULL::date]], + ARRAY[ARRAY[10, 20], ARRAY[NULL::smallint, NULL::smallint]], + ARRAY[ARRAY[100, 200], ARRAY[NULL::int, NULL::int]], + ARRAY[ARRAY[1000, 2000], ARRAY[NULL::bigint, NULL::bigint]], + ARRAY[ARRAY['2024-01-01 10:00:00'::timestamp, '2024-01-02 12:00:00'::timestamp], ARRAY[NULL::timestamp, NULL::timestamp]], + ARRAY[ARRAY['2024-01-01 10:00:00+00'::timestamptz, '2024-01-02 12:00:00+00'::timestamptz], ARRAY[NULL::timestamptz, NULL::timestamptz]], + ARRAY[ARRAY[true, false], ARRAY[NULL::boolean, NULL::boolean]], + ARRAY[ARRAY[1.1, 2.2], ARRAY[NULL::real, NULL::real]], + ARRAY[ARRAY[5.5, 6.6], ARRAY[NULL::double precision, NULL::double precision]] +); + diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLJdbcExecutor.java index 89fdb0ba997773..c25bcec9a497f1 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/PostgreSQLJdbcExecutor.java @@ -22,14 +22,22 @@ import org.apache.doris.common.jni.vec.ColumnValueConverter; import org.apache.doris.common.jni.vec.VectorTable; +import com.google.common.collect.Lists; +import org.apache.log4j.Logger; + import java.math.BigDecimal; +import java.sql.Date; import java.sql.SQLException; import java.sql.Timestamp; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.List; public class PostgreSQLJdbcExecutor extends BaseJdbcExecutor { + private static final Logger LOG = Logger.getLogger(PostgreSQLJdbcExecutor.class); + public PostgreSQLJdbcExecutor(byte[] thriftParams) throws Exception { super(thriftParams); } @@ -41,7 +49,8 @@ protected void initializeBlock(int columnCount, String[] replaceStringList, int if (outputTable.getColumnType(i).getType() == Type.DATETIME || outputTable.getColumnType(i).getType() == Type.DATETIMEV2) { block.add(new Object[batchSizeNum]); - } else if (outputTable.getColumnType(i).getType() == Type.STRING) { + } else if (outputTable.getColumnType(i).getType() == Type.STRING + || outputTable.getColumnType(i).getType() == Type.ARRAY) { block.add(new Object[batchSizeNum]); } else { block.add(outputTable.getColumn(i).newObjectContainerArray(batchSizeNum)); @@ -78,6 +87,9 @@ protected Object getColumnValue(int columnIndex, ColumnType type, String[] repla case VARCHAR: case STRING: return resultSet.getObject(columnIndex + 1); + case ARRAY: + java.sql.Array array = resultSet.getArray(columnIndex + 1); + return array == null ? null : convertArrayToList(array.getArray()); default: throw new IllegalArgumentException("Unsupported column type: " + type.getType()); } @@ -110,6 +122,10 @@ protected ColumnValueConverter getOutputConverter(ColumnType columnType, String return input.toString(); } }, String.class); + case ARRAY: + return createConverter( + (Object input) -> convertArray((List) input, columnType.getChildTypes().get(0)), + List.class); default: return null; } @@ -122,4 +138,71 @@ private static String pgByteArrayToHexString(byte[] bytes) { } return hexString.toString(); } + + private List convertArrayToList(Object array) { + if (array == null) { + return null; + } + + int length = java.lang.reflect.Array.getLength(array); + List list = new ArrayList<>(length); + + for (int i = 0; i < length; i++) { + Object element = java.lang.reflect.Array.get(array, i); + list.add(element); + } + + return list; + } + + private List convertArray(List array, ColumnType type) { + if (array == null) { + return null; + } + switch (type.getType()) { + case DATE: + case DATEV2: { + List result = new ArrayList<>(); + for (Object element : array) { + if (element == null) { + result.add(null); + } else { + result.add(((Date) element).toLocalDate()); + } + } + return result; + } + case DATETIME: + case DATETIMEV2: { + List result = new ArrayList<>(); + for (Object element : array) { + if (element == null) { + result.add(null); + } else { + if (element instanceof Timestamp) { + result.add(((Timestamp) element).toLocalDateTime()); + } else if (element instanceof OffsetDateTime) { + result.add(((OffsetDateTime) element).toLocalDateTime()); + } else { + result.add((LocalDateTime) element); + } + } + } + return result; + } + case ARRAY: + List> resultArray = Lists.newArrayList(); + for (Object element : array) { + if (element == null) { + resultArray.add(null); + } else { + List nestedList = convertArrayToList(element); + resultArray.add(convertArray(nestedList, type.getChildTypes().get(0))); + } + } + return resultArray; + default: + return array; + } + } } diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 689c85940dfc39..f74600133869a3 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -164,7 +164,7 @@ public class Config extends ConfigBase { "MySQL Jdbc Catalog mysql does not support pushdown functions"}) public static String[] jdbc_mysql_unsupported_pushdown_functions = {"date_trunc", "money_format", "negative"}; - @ConfField(description = {"强制 SQLServer Jdbc Catalog 加密为 false", + @ConfField(mutable = true, masterOnly = true, description = {"强制 SQLServer Jdbc Catalog 加密为 false", "Force SQLServer Jdbc Catalog encrypt to false"}) public static boolean force_sqlserver_jdbc_encrypt_false = false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcPostgreSQLClient.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcPostgreSQLClient.java index e6694ffdc674bb..59cd91f156a8ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcPostgreSQLClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcPostgreSQLClient.java @@ -17,17 +17,69 @@ package org.apache.doris.datasource.jdbc.client; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; +import org.apache.doris.common.util.Util; import org.apache.doris.datasource.jdbc.util.JdbcFieldSchema; +import com.google.common.collect.Lists; + +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Types; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + public class JdbcPostgreSQLClient extends JdbcClient { protected JdbcPostgreSQLClient(JdbcClientConfig jdbcClientConfig) { super(jdbcClientConfig); } + @Override + public List getJdbcColumnsInfo(String localDbName, String localTableName) { + Connection conn = null; + ResultSet rs = null; + List tableSchema = Lists.newArrayList(); + String remoteDbName = getRemoteDatabaseName(localDbName); + String remoteTableName = getRemoteTableName(localDbName, localTableName); + try { + conn = getConnection(); + DatabaseMetaData databaseMetaData = conn.getMetaData(); + String catalogName = getCatalogName(conn); + rs = getRemoteColumns(databaseMetaData, catalogName, remoteDbName, remoteTableName); + while (rs.next()) { + int dataType = rs.getInt("DATA_TYPE"); + int arrayDimensions = 0; + if (dataType == Types.ARRAY) { + String columnName = rs.getString("COLUMN_NAME"); + try (PreparedStatement pstmt = conn.prepareStatement( + String.format("SELECT array_ndims(%s) FROM %s.%s LIMIT 1", + columnName, remoteDbName, remoteTableName))) { + try (ResultSet arrayRs = pstmt.executeQuery()) { + if (arrayRs.next()) { + arrayDimensions = arrayRs.getInt(1); + } + } + } + } + tableSchema.add(new JdbcFieldSchema(rs, arrayDimensions)); + } + } catch (SQLException e) { + throw new JdbcClientException("failed to get jdbc columns info for remote table `%s.%s`: %s", + remoteDbName, remoteTableName, Util.getRootCauseMessage(e)); + } finally { + close(rs, conn); + } + return tableSchema; + } + @Override protected String[] getTableTypes() { return new String[] {"TABLE", "PARTITIONED TABLE", "VIEW", "MATERIALIZED VIEW", "FOREIGN TABLE"}; @@ -99,8 +151,43 @@ protected Type jdbcTypeToDoris(JdbcFieldSchema fieldSchema) { case "json": case "jsonb": return ScalarType.createStringType(); - default: - return Type.UNSUPPORTED; + default: { + if (fieldSchema.getDataType() == Types.ARRAY && pgType.startsWith("_")) { + return convertArrayType(fieldSchema); + } else { + return Type.UNSUPPORTED; + } + } + } + } + + private Type convertArrayType(JdbcFieldSchema fieldSchema) { + int arrayDimensions = fieldSchema.getArrayDimensions().orElse(0); + if (arrayDimensions == 0) { + return Type.UNSUPPORTED; + } + + String innerType = fieldSchema.getDataTypeName().orElse("unknown").substring(1); + String[] supportedInnerType = new String[] { + "int2", "int4", "int8", "smallserial", "serial", + "bigserial", "float4", "float8", "timestamp", "timestamptz", + "date", "bool", "bpchar", "varchar", "text" + }; + + boolean isSupported = Arrays.asList(supportedInnerType).contains(innerType); + if (!isSupported) { + return Type.UNSUPPORTED; + } + if (innerType.equals("bpchar")) { + innerType = "text"; + } + JdbcFieldSchema innerFieldSchema = new JdbcFieldSchema(fieldSchema); + innerFieldSchema.setDataTypeName(Optional.of(innerType)); + Type arrayInnerType = jdbcTypeToDoris(innerFieldSchema); + Type arrayType = ArrayType.create(arrayInnerType, true); + for (int i = 1; i < arrayDimensions; i++) { + arrayType = ArrayType.create(arrayType, true); } + return arrayType; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/util/JdbcFieldSchema.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/util/JdbcFieldSchema.java index 7d643fac25c7e4..8cbd27ed684072 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/util/JdbcFieldSchema.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/util/JdbcFieldSchema.java @@ -37,6 +37,7 @@ public class JdbcFieldSchema { // For NUMERIC/DECIMAL, columnSize means precision. protected Optional columnSize; protected Optional decimalDigits; + protected Optional arrayDimensions; // Base number (usually 10 or 2) protected int numPrecRadix; // column description @@ -53,6 +54,7 @@ public JdbcFieldSchema(JdbcFieldSchema other) { this.dataTypeName = other.dataTypeName; this.columnSize = other.columnSize; this.decimalDigits = other.decimalDigits; + this.arrayDimensions = other.arrayDimensions; this.numPrecRadix = other.numPrecRadix; this.remarks = other.remarks; this.charOctetLength = other.charOctetLength; @@ -71,6 +73,19 @@ public JdbcFieldSchema(ResultSet rs) throws SQLException { this.charOctetLength = rs.getInt("CHAR_OCTET_LENGTH"); } + public JdbcFieldSchema(ResultSet rs, int arrayDimensions) throws SQLException { + this.columnName = rs.getString("COLUMN_NAME"); + this.dataType = getInteger(rs, "DATA_TYPE").orElseThrow(() -> new IllegalStateException("DATA_TYPE is null")); + this.dataTypeName = Optional.ofNullable(rs.getString("TYPE_NAME")); + this.columnSize = getInteger(rs, "COLUMN_SIZE"); + this.decimalDigits = getInteger(rs, "DECIMAL_DIGITS"); + this.numPrecRadix = rs.getInt("NUM_PREC_RADIX"); + this.isAllowNull = rs.getInt("NULLABLE") != DatabaseMetaData.columnNoNulls; + this.remarks = rs.getString("REMARKS"); + this.charOctetLength = rs.getInt("CHAR_OCTET_LENGTH"); + this.arrayDimensions = Optional.of(arrayDimensions); + } + public JdbcFieldSchema(ResultSet rs, Map dataTypeOverrides) throws SQLException { this.columnName = rs.getString("COLUMN_NAME"); this.dataType = getInteger(rs, "DATA_TYPE").orElseThrow(() -> new IllegalStateException("DATA_TYPE is null")); diff --git a/regression-test/data/external_table_p0/jdbc/test_pg_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_pg_jdbc_catalog.out index 1950235745749e..e2993ffbaa7fcd 100644 --- a/regression-test/data/external_table_p0/jdbc/test_pg_jdbc_catalog.out +++ b/regression-test/data/external_table_p0/jdbc/test_pg_jdbc_catalog.out @@ -2234,6 +2234,17 @@ doris3 20 1 abc def 2022-10-11 1 2 3 2022-10-22T10:59:59 34.123 false 12.123456 10.16.10.14/32 10.16.10.14 ff:ff:ff:ff:ff:aa 1010101010 01010 1 {"id": 1} (1.0,1.0) {1.0,1.0,1.0} [(1.0,1.0),(2.0,2.0)] (2.0,2.0),(1.0,1.0) ((1.0,1.0),(2.0,2.0),(2.0,1.0)) ((1.0,1.0),(2.0,2.0),(2.0,1.0)) <(0.0,0.0),1.0> 2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N 2 \N \N \N \N \N \N \N \N +-- !select_all_arr_types -- +1 ["A", "B", "C"] ["Hello", "World"] ["Text1", "Text2"] ["2024-01-01", "2024-01-02"] [10, 20] [100, 200] [1000, 2000] ["2024-01-01 10:00:00.000000", "2024-01-02 12:00:00.000000"] ["2024-01-01 18:00:00.000000", "2024-01-02 20:00:00.000000"] [1, 0] [1.1, 2.2] [3.3, 4.4] +2 \N \N \N \N \N \N \N \N \N \N \N \N +3 ["A", "B", "C", null] ["Hello", "World", null] ["Text1", "Text2", null] ["2024-01-01", "2024-01-02", null] [10, 20, null] [100, 200, null] [1000, 2000, null] ["2024-01-01 10:00:00.000000", "2024-01-02 12:00:00.000000", null] ["2024-01-01 18:00:00.000000", "2024-01-02 20:00:00.000000", null] [1, 0, null] [1.1, 2.2, null] [3.3, 4.4, null] + +-- !select_all_arr2d_types -- +1 [["A", "B"], ["C", "D"]] [["Hello", "World"], ["Foo", "Bar"]] [["Text1", "Text2"], ["Text3", "Text4"]] [["2024-01-01", "2024-01-02"], ["2024-02-01", "2024-02-02"]] [[10, 20], [30, 40]] [[100, 200], [300, 400]] [[1000, 2000], [3000, 4000]] [["2024-01-01 10:00:00.000000", "2024-01-02 12:00:00.000000"], ["2024-03-01 10:00:00.000000", "2024-03-02 12:00:00.000000"]] [["2024-01-01 18:00:00.000000", "2024-01-02 20:00:00.000000"], ["2024-03-01 18:00:00.000000", "2024-03-02 20:00:00.000000"]] [[1, 0], [0, 1]] [[1.1, 2.2], [3.3, 4.4]] [[5.5, 6.6], [7.7, 8.8]] +2 \N \N \N \N \N \N \N \N \N \N \N \N +3 [["A", null], ["C", "D"]] [["Hello", null], ["Foo", "Bar"]] [["Text1", null], ["Text3", "Text4"]] [["2024-01-01", null], ["2024-02-01", "2024-02-02"]] [[10, null], [30, 40]] [[100, null], [300, 400]] [[1000, null], [3000, 4000]] [["2024-01-01 10:00:00.000000", null], ["2024-03-01 10:00:00.000000", "2024-03-02 12:00:00.000000"]] [["2024-01-01 18:00:00.000000", null], ["2024-03-01 18:00:00.000000", "2024-03-02 20:00:00.000000"]] [[1, null], [0, 1]] [[1.1, null], [3.3, 4.4]] [[5.5, null], [7.7, 8.8]] +4 [["A", "B"], [null, null]] [["Hello", "World"], [null, null]] [["Text1", "Text2"], [null, null]] [["2024-01-01", "2024-01-02"], [null, null]] [[10, 20], [null, null]] [[100, 200], [null, null]] [[1000, 2000], [null, null]] [["2024-01-01 10:00:00.000000", "2024-01-02 12:00:00.000000"], [null, null]] [["2024-01-01 18:00:00.000000", "2024-01-02 20:00:00.000000"], [null, null]] [[1, 0], [null, null]] [[1.1, 2.2], [null, null]] [[5.5, 6.6], [null, null]] + -- !ctas -- 1 abc def 2022-10-11 1 2 3 2022-10-22T10:59:59 34.123 false 12.123456 10.16.10.14/32 10.16.10.14 ff:ff:ff:ff:ff:aa 1010101010 01010 1 {"id": 1} (1.0,1.0) {1.0,1.0,1.0} [(1.0,1.0),(2.0,2.0)] (2.0,2.0),(1.0,1.0) ((1.0,1.0),(2.0,2.0),(2.0,1.0)) ((1.0,1.0),(2.0,2.0),(2.0,1.0)) <(0.0,0.0),1.0> 2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N 2 \N \N \N \N \N \N \N \N diff --git a/regression-test/suites/external_table_p0/jdbc/test_pg_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_pg_jdbc_catalog.groovy index 7f597493b9183d..c898f31ace65b3 100644 --- a/regression-test/suites/external_table_p0/jdbc/test_pg_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/jdbc/test_pg_jdbc_catalog.groovy @@ -146,6 +146,11 @@ suite("test_pg_jdbc_catalog", "p0,external,pg,external_docker,external_docker_pg // test select all types order_qt_select_all_types """select * from ${test_all_types}; """ + // test select all array types + order_qt_select_all_arr_types """select * from test_all_support_types_array order by 1;""" + + order_qt_select_all_arr2d_types """select * from test_all_support_types_array_2d order by 1;""" + // test test ctas sql """ drop table if exists internal.${internal_db_name}.${test_ctas} """ sql """ create table internal.${internal_db_name}.${test_ctas}