Skip to content

Commit

Permalink
Core,Open-API: Don't expose the last-column-id
Browse files Browse the repository at this point in the history
Okay, I've added this to the spec a while ago:

apache#7445

But I think this was a mistake, and we should not expose this
to the public APIs, as it is much better to track this internally.

I noticed this while reviewing apache/iceberg-rust#587

Removing this as part of the APIs in Java, and the Open-API
update makes it much more resilient, and don't require the
clients to compute this value. For example. when there are two conflicting
schema changes, the last-column-id must be recomputed correctly when doing
the retry operation.
  • Loading branch information
Fokko committed Nov 11, 2024
1 parent 82a2362 commit 23aeca3
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 16 deletions.
10 changes: 10 additions & 0 deletions core/src/main/java/org/apache/iceberg/MetadataUpdate.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ class AddSchema implements MetadataUpdate {
private final Schema schema;
private final int lastColumnId;

public AddSchema(Schema schema) {
this(schema, schema.highestFieldId());
}

/**
* Set the schema
*
* @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use AddSchema(schema).
*/
@Deprecated
public AddSchema(Schema schema, int lastColumnId) {
this.schema = schema;
this.lastColumnId = lastColumnId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.util.JsonUtil;
import org.apache.iceberg.view.ViewVersionParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MetadataUpdateParser {
private static final Logger LOG = LoggerFactory.getLogger(MetadataUpdateParser.class);

private MetadataUpdateParser() {}

Expand Down Expand Up @@ -462,6 +465,8 @@ private static MetadataUpdate readAddSchema(JsonNode node) {
Schema schema = SchemaParser.fromJson(schemaNode);
int lastColumnId;
if (node.has(LAST_COLUMN_ID)) {
LOG.warn(
"Field last-column-id of MetadataUpdate.AddSchema is since 1.8.0, will be removed 1.9.0 or 2.0.0");
lastColumnId = JsonUtil.getInt(LAST_COLUMN_ID, node);
} else {
lastColumnId = schema.highestFieldId();
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/java/org/apache/iceberg/SchemaUpdate.java
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ public Schema apply() {

@Override
public void commit() {
TableMetadata update = applyChangesToMetadata(base.updateSchema(apply(), lastColumnId));
TableMetadata update = applyChangesToMetadata(base.updateSchema(apply()));
ops.commit(base, update);
}

Expand Down
25 changes: 24 additions & 1 deletion core/src/main/java/org/apache/iceberg/TableMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -562,10 +562,23 @@ public TableMetadata withUUID() {
return new Builder(this).assignUUID().build();
}

/**
* Updates the schema
*
* @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use updateSchema(schema).
*/
@Deprecated
public TableMetadata updateSchema(Schema newSchema, int newLastColumnId) {
return new Builder(this).setCurrentSchema(newSchema, newLastColumnId).build();
}

/** Updates the schema */
public TableMetadata updateSchema(Schema newSchema) {
return new Builder(this)
.setCurrentSchema(newSchema, Math.max(this.lastColumnId, newSchema.highestFieldId()))
.build();
}

// The caller is responsible to pass a newPartitionSpec with correct partition field IDs
public TableMetadata updatePartitionSpec(PartitionSpec newPartitionSpec) {
return new Builder(this).setDefaultPartitionSpec(newPartitionSpec).build();
Expand Down Expand Up @@ -1081,8 +1094,18 @@ public Builder setCurrentSchema(int schemaId) {
return this;
}

public Builder addSchema(Schema schema) {
addSchemaInternal(schema, schema.highestFieldId());
return this;
}

/**
* Add a new schema.
*
* @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use AddSchema(schema).
*/
@Deprecated
public Builder addSchema(Schema schema, int newLastColumnId) {
// TODO: remove requirement for newLastColumnId
addSchemaInternal(schema, newLastColumnId);
return this;
}
Expand Down
7 changes: 1 addition & 6 deletions core/src/main/java/org/apache/iceberg/view/ViewMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -372,20 +372,15 @@ private int addSchemaInternal(Schema schema) {
newSchema = schema;
}

int highestFieldId = Math.max(highestFieldId(), newSchema.highestFieldId());
schemas.add(newSchema);
schemasById.put(newSchema.schemaId(), newSchema);
changes.add(new MetadataUpdate.AddSchema(newSchema, highestFieldId));
changes.add(new MetadataUpdate.AddSchema(newSchema));

this.lastAddedSchemaId = newSchemaId;

return newSchemaId;
}

private int highestFieldId() {
return schemas.stream().map(Schema::highestFieldId).max(Integer::compareTo).orElse(0);
}

private int reuseOrCreateNewSchemaId(Schema newSchema) {
// if the schema already exists, use its id; otherwise use the highest id + 1
int newSchemaId = INITIAL_SCHEMA_ID;
Expand Down
5 changes: 0 additions & 5 deletions open-api/rest-catalog-open-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,11 +1149,6 @@ class ViewMetadata(BaseModel):
class AddSchemaUpdate(BaseUpdate):
action: Literal['add-schema']
schema_: Schema = Field(..., alias='schema')
last_column_id: Optional[int] = Field(
None,
alias='last-column-id',
description='The highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side.',
)


class TableUpdate(BaseModel):
Expand Down
3 changes: 0 additions & 3 deletions open-api/rest-catalog-open-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2690,9 +2690,6 @@ components:
enum: ["add-schema"]
schema:
$ref: '#/components/schemas/Schema'
last-column-id:
type: integer
description: The highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side.

SetCurrentSchemaUpdate:
allOf:
Expand Down

0 comments on commit 23aeca3

Please sign in to comment.