diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index 44f65ff56a54..3d3f9975fad0 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -56,7 +56,7 @@ public class Schema implements Serializable { private static final int DEFAULT_SCHEMA_ID = 0; private static final int DEFAULT_VALUES_MIN_FORMAT_VERSION = 3; private static final Map MIN_FORMAT_VERSIONS = - ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3); + ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3, Type.TypeID.VARIANT, 3); private final StructType struct; private final int schemaId; diff --git a/api/src/main/java/org/apache/iceberg/VariantLike.java b/api/src/main/java/org/apache/iceberg/VariantLike.java deleted file mode 100644 index d4768dc8efda..000000000000 --- a/api/src/main/java/org/apache/iceberg/VariantLike.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import org.apache.iceberg.types.Type; - -/** Interface for accessing Variant fields. */ -public interface VariantLike { - int size(); - - /** - * Retrieves the value of the current variant element based on the provided `javaClass` type. The - * `javaClass` parameter should be the `javaClass` field value of an Iceberg data type TypeID - * {@link Type#typeId()}, such as `Boolean.class`. - * - *

If the current variant element holds a primitive value that can be extracted or promoted to - * match the specified `javaClass`, the method will return that value. The method errors out if - * the value is an object or fails to cast to the desired type. - * - * @param javaClass the Java class type to extract the value from the current variant element. - * @return the extracted value if successful, or if the value cannot be extracted or promoted. - */ - T get(Class javaClass); - - /** - * Retrieves the sub-element from the current variant based on the provided path. The path is an - * array of strings that represents the hierarchical path to access the sub-element within the - * variant, such as ["a", "b"], for the path `a.b`. - * - *

If the sub-element exists for the specified path, it will return the corresponding - * VariantLike` element. Otherwise, if the path is invalid or the sub-element is not found, this - * method will return `null`. Empty array and null are invalid inputs. - * - * @param path an array of strings representing the hierarchical path to the sub-element. - * @return the sub-element at the specified path as a `VariantLike`, or `null` if not found. - */ - VariantLike get(String[] path); - - /** - * Returns the JSON representation of the current variant. - * - *

If the variant element is an object, this method serializes it into a JSON string. For - * primitive types such as boolean, int, long, float, double, and string, it returns the exact - * value in its serialized form. For any other types, the value is serialized as a double-quoted - * string. - * - * @return a JSON string representing the current variant. - */ - String toJson(); -} diff --git a/api/src/main/java/org/apache/iceberg/transforms/Identity.java b/api/src/main/java/org/apache/iceberg/transforms/Identity.java index d4e5e532943b..e25b9520dbc4 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Identity.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Identity.java @@ -38,6 +38,8 @@ class Identity implements Transform { */ @Deprecated public static Identity get(Type type) { + Preconditions.checkArgument(!type.isVariantType(), "Unsupported type for identity: %s", type); + return new Identity<>(type); } diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 2f67494a1934..9ac7fa84a1de 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -26,7 +26,6 @@ import java.util.Map; import java.util.Objects; import org.apache.iceberg.StructLike; -import org.apache.iceberg.VariantLike; public interface Type extends Serializable { enum TypeID { @@ -47,7 +46,7 @@ enum TypeID { STRUCT(StructLike.class), LIST(List.class), MAP(Map.class), - VARIANT(VariantLike.class); + VARIANT(Object.class); private final Class javaClass; @@ -94,6 +93,10 @@ default boolean isListType() { return false; } + default boolean isVariantType() { + return false; + } + default boolean isMapType() { return false; } diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index ce87aa8429a7..e0d9eccc77f3 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -420,6 +420,16 @@ public static VariantType get() { return INSTANCE; } + @Override + public boolean isPrimitiveType() { + return false; + } + + @Override + public boolean isVariantType() { + return true; + } + @Override public TypeID typeId() { return TypeID.VARIANT; diff --git a/api/src/test/java/org/apache/iceberg/TestAccessors.java b/api/src/test/java/org/apache/iceberg/TestAccessors.java index 4df522c5191c..7b4feb845f12 100644 --- a/api/src/test/java/org/apache/iceberg/TestAccessors.java +++ b/api/src/test/java/org/apache/iceberg/TestAccessors.java @@ -22,14 +22,9 @@ import static org.apache.iceberg.types.Types.NestedField.required; import static org.assertj.core.api.Assertions.assertThat; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import java.math.BigDecimal; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.Base64; import java.util.UUID; -import org.apache.iceberg.TestHelpers.JsonVariant; import org.apache.iceberg.TestHelpers.Row; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -252,70 +247,4 @@ public void testEmptySchema() { Schema emptySchema = new Schema(); assertThat(emptySchema.accessorForField(17)).isNull(); } - - @Test - public void testVariant() throws JsonProcessingException { - Base64.Encoder encoder = Base64.getEncoder(); - boolean expectedTrue = true; - boolean expectedFalse = false; - int expectedInt = 2147483647; - long expectedLong = 2147483648L; - float expectedFloat = 1.2345f; - double expectedDouble = 1.23456; - BigDecimal expectedDecimal = new BigDecimal(123456); - String expectedString = "abc"; - String expectedBytes = - new String(encoder.encode(expectedString.getBytes()), StandardCharsets.UTF_8); - int nestInt = 10; - - String json = - "{\"false\":" - + expectedFalse - + ", \"true\":" - + expectedTrue - + ", \"string\": \"" - + expectedString - + "\"," - + "\"int\":" - + expectedInt - + "," - + "\"long\":" - + expectedLong - + ", \"float\":" - + expectedFloat - + "," - + "\"double\":" - + expectedDouble - + ", \"bytes\":\"" - + expectedBytes - + "\", \"decimal\":" - + expectedDecimal - + "," - + "\"nest1\": {\"nest2\":" - + nestInt - + "}" - + "}"; - - VariantLike variant = JsonVariant.of(json); - assertAccessorReturns(Types.VariantType.get(), variant); - - assertThat(variant.get(new String[] {"true"}).get(Boolean.class)).isEqualTo(expectedTrue); - assertThat(variant.get(new String[] {"false"}).get(Boolean.class)).isEqualTo(expectedFalse); - assertThat(variant.get(new String[] {"string"}).get(String.class)).isEqualTo(expectedString); - assertThat(variant.get(new String[] {"int"}).get(Integer.class)).isEqualTo(expectedInt); - assertThat(variant.get(new String[] {"long"}).get(Long.class)).isEqualTo(expectedLong); - assertThat(variant.get(new String[] {"float"}).get(Float.class)).isEqualTo(expectedFloat); - assertThat(variant.get(new String[] {"double"}).get(Double.class)).isEqualTo(expectedDouble); - assertThat(variant.get(new String[] {"decimal"}).get(BigDecimal.class)) - .isEqualTo(expectedDecimal); - assertThat( - StandardCharsets.UTF_8 - .decode(variant.get(new String[] {"bytes"}).get(ByteBuffer.class)) - .toString()) - .isEqualTo(expectedString); - assertThat(variant.get(new String[] {"nest1", "nest2"}).get(Integer.class)).isEqualTo(nestInt); - assertThat(variant.get(new String[] {"nest1", "invalid"})).isNull(); - assertThat(new ObjectMapper().readTree(variant.toJson())) - .isEqualTo(new ObjectMapper().readTree(json)); - } } diff --git a/api/src/test/java/org/apache/iceberg/TestHelpers.java b/api/src/test/java/org/apache/iceberg/TestHelpers.java index 5f440a235d75..ca3b1a908ac6 100644 --- a/api/src/test/java/org/apache/iceberg/TestHelpers.java +++ b/api/src/test/java/org/apache/iceberg/TestHelpers.java @@ -25,8 +25,6 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import com.esotericsoftware.kryo.serializers.ClosureSerializer; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -36,12 +34,10 @@ import java.io.OutputStream; import java.io.Serializable; import java.lang.invoke.SerializedLambda; -import java.math.BigDecimal; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.stream.IntStream; import org.apache.iceberg.expressions.BoundPredicate; @@ -406,101 +402,6 @@ public int hashCode() { } } - /** A VariantLike implementation for testing accepting JSON input */ - public static class JsonVariant implements VariantLike { - public static JsonVariant of(String json) { - return new JsonVariant(json); - } - - private final JsonNode node; - - private JsonVariant(String json) { - try { - ObjectMapper mapper = new ObjectMapper(); - this.node = mapper.readTree(json); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private JsonVariant(JsonNode node) { - this.node = node; - } - - @Override - public int size() { - return node.size(); - } - - @Override - public T get(Class javaClass) { - if (javaClass.equals(Boolean.class)) { - return (T) (Boolean) node.asBoolean(); - } else if (javaClass.equals(Integer.class)) { - return (T) (Integer) node.asInt(); - } else if (javaClass.equals(Long.class)) { - return (T) (Long) node.asLong(); - } else if (javaClass.equals(Float.class)) { - return (T) Float.valueOf((float) node.asDouble()); - } else if (javaClass.equals(Double.class)) { - return (T) (Double) (node.asDouble()); - } else if (CharSequence.class.isAssignableFrom(javaClass)) { - return (T) node.asText(); - } else if (javaClass.equals(ByteBuffer.class)) { - try { - return (T) ByteBuffer.wrap(node.binaryValue()); - } catch (IOException e) { - throw new RuntimeException(e); - } - } else if (javaClass.equals(BigDecimal.class)) { - return (T) node.decimalValue(); - } - - throw new IllegalArgumentException("Unsupported type: " + javaClass); - } - - @Override - public VariantLike get(String[] path) { - Preconditions.checkState( - path != null && path.length > 0, "path must contain at least one element"); - - JsonNode childNode = node; - for (String pathElement : path) { - childNode = childNode.get(pathElement); - if (childNode == null) { - return null; - } - } - - return new JsonVariant(childNode); - } - - @Override - public String toJson() { - return node.toString(); - } - - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } - - if (other == null || getClass() != other.getClass()) { - return false; - } - - JsonVariant that = (JsonVariant) other; - - return Objects.equals(node, that.node); - } - - @Override - public int hashCode() { - return Objects.hashCode(node); - } - } - public static class TestFieldSummary implements ManifestFile.PartitionFieldSummary { private final boolean containsNull; private final Boolean containsNaN; diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java index 263db427aa16..971f5a9e4510 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java @@ -21,6 +21,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; @@ -34,7 +35,8 @@ public class TestPartitionSpecValidation { NestedField.required(3, "another_ts", Types.TimestampType.withZone()), NestedField.required(4, "d", Types.TimestampType.withZone()), NestedField.required(5, "another_d", Types.TimestampType.withZone()), - NestedField.required(6, "s", Types.StringType.get())); + NestedField.required(6, "s", Types.StringType.get()), + NestedField.required(7, "v", Types.VariantType.get())); @Test public void testMultipleTimestampPartitions() { @@ -312,4 +314,15 @@ public void testAddPartitionFieldsWithAndWithoutFieldIds() { assertThat(spec.fields().get(2).fieldId()).isEqualTo(1006); assertThat(spec.lastAssignedFieldId()).isEqualTo(1006); } + + @Test + public void testVariantUnsupported() { + assertThatThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA) + .add(7, 1005, "variant_partition1", Transforms.bucket(5)) + .build()) + .isInstanceOf(ValidationException.class) + .hasMessage("Cannot partition by non-primitive source field: variant"); + } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index fc4333d7c6c5..5f0cac2b5e8c 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -417,6 +417,20 @@ public void testVerifiedIllegalNumBuckets() { .hasMessage("Invalid number of buckets: 0 (must be > 0)"); } + @Test + public void testVariantUnsupported() { + assertThatThrownBy(() -> Transforms.bucket(Types.VariantType.get(), 3)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot bucket by type: variant"); + + Transform bucket = Transforms.bucket(3); + assertThatThrownBy(() -> bucket.bind(Types.VariantType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot bucket by type: variant"); + + assertThat(bucket.canTransform(Types.VariantType.get())).isFalse(); + } + private byte[] randomBytes(int length) { byte[] bytes = new byte[length]; testRandom.nextBytes(bytes); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java index 93d3281411f3..b5076e08a947 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.math.BigDecimal; import java.nio.ByteBuffer; @@ -155,4 +156,21 @@ public void testBigDecimalToHumanString() { .as("Should not modify Strings") .isEqualTo(decimalString); } + + @Test + public void testVariantUnsupported() { + assertThatThrownBy(() -> Transforms.identity().bind(Types.VariantType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot bind to unsupported type: variant"); + + assertThatThrownBy(() -> Transforms.fromString(Types.VariantType.get(), "identity")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type for identity: variant"); + + assertThatThrownBy(() -> Transforms.identity(Types.VariantType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type for identity: variant"); + + assertThat(Transforms.identity().canTransform(Types.VariantType.get())).isFalse(); + } } diff --git a/core/src/test/java/org/apache/iceberg/TestSortOrder.java b/core/src/test/java/org/apache/iceberg/TestSortOrder.java index ad773192b417..1044ed83fb38 100644 --- a/core/src/test/java/org/apache/iceberg/TestSortOrder.java +++ b/core/src/test/java/org/apache/iceberg/TestSortOrder.java @@ -337,6 +337,22 @@ public void testSortedColumnNames() { assertThat(sortedCols).containsExactly("s.id", "data"); } + @TestTemplate + public void testVariantUnsupported() { + Schema v3Schema = + new Schema( + Types.NestedField.required(3, "id", Types.LongType.get()), + Types.NestedField.required(4, "data", Types.StringType.get()), + Types.NestedField.required( + 5, + "struct", + Types.StructType.of(Types.NestedField.optional(6, "v", Types.VariantType.get())))); + + assertThatThrownBy(() -> SortOrder.builderFor(v3Schema).withOrderId(10).asc("struct.v").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type for identity: variant"); + } + @TestTemplate public void testPreservingOrderSortedColumnNames() { SortOrder order = diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 71254b3abb1b..cb0fa5bcafab 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -1687,6 +1687,44 @@ public void testV3TimestampNanoTypeSupport() { 3); } + @Test + public void testV3VariantTypeSupport() { + Schema v3Schema = + new Schema( + Types.NestedField.required(3, "id", Types.LongType.get()), + Types.NestedField.required(4, "data", Types.StringType.get()), + Types.NestedField.required( + 5, + "struct", + Types.StructType.of(Types.NestedField.optional(6, "v", Types.VariantType.get())))); + + for (int unsupportedFormatVersion : ImmutableList.of(1, 2)) { + assertThatThrownBy( + () -> + TableMetadata.newTableMetadata( + v3Schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + TEST_LOCATION, + ImmutableMap.of(), + unsupportedFormatVersion)) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Invalid schema for v%s:\n" + + "- Invalid type for struct.v: variant is not supported until v3", + unsupportedFormatVersion); + } + + // should be allowed in v3 + TableMetadata.newTableMetadata( + v3Schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + TEST_LOCATION, + ImmutableMap.of(), + 3); + } + @Test public void onlyMetadataLocationIsUpdatedWithoutTimestampAndMetadataLogEntry() { String uuid = "386b9f01-002b-4d8c-b77f-42c3fd3b7c9b";