Core,Open-API: Don't expose the last-column-id

Okay, I've added this to the spec a while ago: apache#7445 But I think this was a mistake, and we should not expose this to the public APIs, as it is much better to track this internally. I noticed this while reviewing apache/iceberg-rust#587 Removing this as part of the APIs in Java, and the Open-API update makes it much more resilient, and don't require the clients to compute this value. For example. when there are two conflicting schema changes, the last-column-id must be recomputed correctly when doing the retry operation.
Fokko · Nov 11, 2024 · 23aeca3 · 23aeca3
1 parent 82a2362
commit 23aeca3
Show file tree

Hide file tree

Showing 7 changed files with 41 additions and 16 deletions.
diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java
@@ -86,6 +86,16 @@ class AddSchema implements MetadataUpdate {
     private final Schema schema;
     private final int lastColumnId;
 
+    public AddSchema(Schema schema) {
+      this(schema, schema.highestFieldId());
+    }
+
+    /**
+     * Set the schema
+     *
+     * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use AddSchema(schema).
+     */
+    @Deprecated
     public AddSchema(Schema schema, int lastColumnId) {
       this.schema = schema;
       this.lastColumnId = lastColumnId;

diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java b/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java
@@ -30,8 +30,11 @@
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.util.JsonUtil;
 import org.apache.iceberg.view.ViewVersionParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class MetadataUpdateParser {
+  private static final Logger LOG = LoggerFactory.getLogger(MetadataUpdateParser.class);
 
   private MetadataUpdateParser() {}
 
@@ -462,6 +465,8 @@ private static MetadataUpdate readAddSchema(JsonNode node) {
     Schema schema = SchemaParser.fromJson(schemaNode);
     int lastColumnId;
     if (node.has(LAST_COLUMN_ID)) {
+      LOG.warn(
+          "Field last-column-id of MetadataUpdate.AddSchema is since 1.8.0, will be removed 1.9.0 or 2.0.0");
       lastColumnId = JsonUtil.getInt(LAST_COLUMN_ID, node);
     } else {
       lastColumnId = schema.highestFieldId();

diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
@@ -444,7 +444,7 @@ public Schema apply() {
 
   @Override
   public void commit() {
-    TableMetadata update = applyChangesToMetadata(base.updateSchema(apply(), lastColumnId));
+    TableMetadata update = applyChangesToMetadata(base.updateSchema(apply()));
     ops.commit(base, update);
   }
 

diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java
@@ -562,10 +562,23 @@ public TableMetadata withUUID() {
     return new Builder(this).assignUUID().build();
   }
 
+  /**
+   * Updates the schema
+   *
+   * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use updateSchema(schema).
+   */
+  @Deprecated
   public TableMetadata updateSchema(Schema newSchema, int newLastColumnId) {
     return new Builder(this).setCurrentSchema(newSchema, newLastColumnId).build();
   }
 
+  /** Updates the schema */
+  public TableMetadata updateSchema(Schema newSchema) {
+    return new Builder(this)
+        .setCurrentSchema(newSchema, Math.max(this.lastColumnId, newSchema.highestFieldId()))
+        .build();
+  }
+
   // The caller is responsible to pass a newPartitionSpec with correct partition field IDs
   public TableMetadata updatePartitionSpec(PartitionSpec newPartitionSpec) {
     return new Builder(this).setDefaultPartitionSpec(newPartitionSpec).build();
@@ -1081,8 +1094,18 @@ public Builder setCurrentSchema(int schemaId) {
       return this;
     }
 
+    public Builder addSchema(Schema schema) {
+      addSchemaInternal(schema, schema.highestFieldId());
+      return this;
+    }
+
+    /**
+     * Add a new schema.
+     *
+     * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use AddSchema(schema).
+     */
+    @Deprecated
     public Builder addSchema(Schema schema, int newLastColumnId) {
-      // TODO: remove requirement for newLastColumnId
       addSchemaInternal(schema, newLastColumnId);
       return this;
     }

diff --git a/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java b/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java
@@ -372,20 +372,15 @@ private int addSchemaInternal(Schema schema) {
         newSchema = schema;
       }
 
-      int highestFieldId = Math.max(highestFieldId(), newSchema.highestFieldId());
       schemas.add(newSchema);
       schemasById.put(newSchema.schemaId(), newSchema);
-      changes.add(new MetadataUpdate.AddSchema(newSchema, highestFieldId));
+      changes.add(new MetadataUpdate.AddSchema(newSchema));
 
       this.lastAddedSchemaId = newSchemaId;
 
       return newSchemaId;
     }
 
-    private int highestFieldId() {
-      return schemas.stream().map(Schema::highestFieldId).max(Integer::compareTo).orElse(0);
-    }
-
     private int reuseOrCreateNewSchemaId(Schema newSchema) {
       // if the schema already exists, use its id; otherwise use the highest id + 1
       int newSchemaId = INITIAL_SCHEMA_ID;

diff --git a/open-api/rest-catalog-open-api.py b/open-api/rest-catalog-open-api.py
@@ -1149,11 +1149,6 @@ class ViewMetadata(BaseModel):
 class AddSchemaUpdate(BaseUpdate):
     action: Literal['add-schema']
     schema_: Schema = Field(..., alias='schema')
-    last_column_id: Optional[int] = Field(
-        None,
-        alias='last-column-id',
-        description='The highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side.',
-    )
 
 
 class TableUpdate(BaseModel):

diff --git a/open-api/rest-catalog-open-api.yaml b/open-api/rest-catalog-open-api.yaml
@@ -2690,9 +2690,6 @@ components:
           enum: ["add-schema"]
         schema:
           $ref: '#/components/schemas/Schema'
-        last-column-id:
-          type: integer
-          description: The highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side.
 
     SetCurrentSchemaUpdate:
       allOf: