From af48b310c59593c48bc0491a0b6960d2d8a83e16 Mon Sep 17 00:00:00 2001 From: Zihan Li Date: Wed, 9 Aug 2023 12:15:48 -0700 Subject: [PATCH] [GOBBLIN-1871] Fix bug that hiveMetadataWriter may make the hive schema columns inconsistent with the Avro.schema.literal (#3734) * address comments * use connectionmanager when httpclient is not cloesable * [GOBBLIN-1871]Fix bug that hiveMetadataWriter may make the hive schema columns inconsistent with the Avro.schema.literal --------- Co-authored-by: Zihan Li --- .../org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java | 2 +- .../apache/gobblin/hive/writer/HiveMetadataWriter.java | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java index 9a7376e9b7..98adb4ca7e 100644 --- a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java +++ b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java @@ -255,7 +255,6 @@ private static boolean isORC(Path file, FileSystem fs) private void addSchemaProperties(Path path, HiveRegistrationUnit hiveUnit) throws IOException { - Preconditions.checkArgument(this.fs.getFileStatus(path).isDirectory(), path + " is not a directory."); try (Timer.Context context = metricContext.timer(HIVE_SPEC_SCHEMA_READING_TIMER).time()) { addSchemaPropertiesHelper(path, hiveUnit); } @@ -281,6 +280,7 @@ protected void addSchemaPropertiesHelper(Path path, HiveRegistrationUnit hiveUni schema = TypeInfoUtils.getTypeInfoFromObjectInspector( TypeDescriptionToObjectInspectorUtil.getObjectInspector(orcSchema)); } else { + Preconditions.checkArgument(this.fs.getFileStatus(path).isDirectory(), path + " is not a directory."); schema = getSchemaFromLatestFile(path, this.fs); } if (schema instanceof StructTypeInfo) { diff --git a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/writer/HiveMetadataWriter.java b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/writer/HiveMetadataWriter.java index 138ac3d941..e5ac2741c3 100644 --- a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/writer/HiveMetadataWriter.java +++ b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/writer/HiveMetadataWriter.java @@ -456,12 +456,9 @@ private void schemaUpdateHelper(GobblinMetadataChangeEvent gmce, HiveSpec spec, //Force to set the schema even there is no schema literal defined in the spec String latestSchema = latestSchemaMap.get(tableKey); if (latestSchema != null) { - String tableSchema = spec.getTable().getSerDeProps().getProp(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()); - if (tableSchema == null || !tableSchema.equals(latestSchema)) { - spec.getTable().getSerDeProps() - .setProp(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), latestSchemaMap.get(tableKey)); - HiveMetaStoreUtils.updateColumnsInfoIfNeeded(spec); - } + spec.getTable().getSerDeProps() + .setProp(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), latestSchema); + HiveMetaStoreUtils.updateColumnsInfoIfNeeded(spec); } }