Skip to content

Commit

Permalink
API: Add Variant data type
Browse files Browse the repository at this point in the history
  • Loading branch information
aihuaxu committed Oct 29, 2024
1 parent 1e3ee1e commit 5e5a53e
Show file tree
Hide file tree
Showing 10 changed files with 209 additions and 4 deletions.
52 changes: 52 additions & 0 deletions api/src/main/java/org/apache/iceberg/VariantLike.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import java.math.BigDecimal;

/**
* Interface for accessing Variant fields.
*
* <p>This interface supports accessing data in top-level fields, not in nested fields.
*/
public interface VariantLike {
int size();

VariantLike getFieldByKey(String key);

VariantLike getFieldAtIndex(int index);

boolean getBoolean();

int getInt();

long getLong();

float getFloat();

double getDouble();

BigDecimal getDecimal();

String getString();

byte[] getBinary();

String toJson();
}
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,8 @@ private static String sanitize(Type type, Object value, long now, int today) {
case DECIMAL:
case FIXED:
case BINARY:
// for boolean, uuid, decimal, fixed, and binary, match the string result
case VARIANT:
// for boolean, uuid, decimal, fixed, variant, and binary, match the string result
return sanitizeSimpleString(value.toString());
}
throw new UnsupportedOperationException(
Expand Down Expand Up @@ -562,7 +563,7 @@ private static String sanitize(Literal<?> literal, long now, int today) {
} else if (literal instanceof Literals.DoubleLiteral) {
return sanitizeNumber(((Literals.DoubleLiteral) literal).value(), "float");
} else {
// for uuid, decimal, fixed, and binary, match the string result
// for uuid, decimal, fixed, variant, and binary, match the string result
return sanitizeSimpleString(literal.value().toString());
}
}
Expand Down
4 changes: 3 additions & 1 deletion api/src/main/java/org/apache/iceberg/types/Type.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.Map;
import java.util.Objects;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.VariantLike;

public interface Type extends Serializable {
enum TypeID {
Expand All @@ -45,7 +46,8 @@ enum TypeID {
DECIMAL(BigDecimal.class),
STRUCT(StructLike.class),
LIST(List.class),
MAP(Map.class);
MAP(Map.class),
VARIANT(VariantLike.class);

private final Class<?> javaClass;

Expand Down
1 change: 1 addition & 0 deletions api/src/main/java/org/apache/iceberg/types/TypeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ private static int estimateSize(Type type) {
case FIXED:
return ((Types.FixedType) type).length();
case BINARY:
case VARIANT:
return 80;
case DECIMAL:
// 12 (header) + (12 + 12 + 4) (BigInteger) + 4 (scale) = 44 bytes
Expand Down
19 changes: 19 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/Types.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ private Types() {}
.put(StringType.get().toString(), StringType.get())
.put(UUIDType.get().toString(), UUIDType.get())
.put(BinaryType.get().toString(), BinaryType.get())
.put(VariantType.get().toString(), VariantType.get())
.buildOrThrow();

private static final Pattern FIXED = Pattern.compile("fixed\\[\\s*(\\d+)\\s*\\]");
Expand Down Expand Up @@ -412,6 +413,24 @@ public String toString() {
}
}

public static class VariantType extends PrimitiveType {
private static final VariantType INSTANCE = new VariantType();

public static VariantType get() {
return INSTANCE;
}

@Override
public TypeID typeId() {
return TypeID.VARIANT;
}

@Override
public String toString() {
return "variant";
}
}

public static class DecimalType extends PrimitiveType {
public static DecimalType of(int precision, int scale) {
return new DecimalType(precision, scale);
Expand Down
7 changes: 7 additions & 0 deletions api/src/test/java/org/apache/iceberg/TestAccessors.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.UUID;
import org.apache.iceberg.TestHelpers.JsonVariant;
import org.apache.iceberg.TestHelpers.Row;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -247,4 +248,10 @@ public void testEmptySchema() {
Schema emptySchema = new Schema();
assertThat(emptySchema.accessorForField(17)).isNull();
}

@Test
public void testVariant() {
VariantLike variant = JsonVariant.of("{\"name\":\"John\",\"age\":30}");
assertAccessorReturns(Types.VariantType.get(), variant);
}
}
112 changes: 112 additions & 0 deletions api/src/test/java/org/apache/iceberg/TestHelpers.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import com.esotericsoftware.kryo.serializers.ClosureSerializer;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
Expand All @@ -34,10 +36,12 @@
import java.io.OutputStream;
import java.io.Serializable;
import java.lang.invoke.SerializedLambda;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.IntStream;
import org.apache.iceberg.expressions.BoundPredicate;
Expand Down Expand Up @@ -402,6 +406,114 @@ public int hashCode() {
}
}

/** A VariantLike implementation for testing accepting JSON input */
public static class JsonVariant implements VariantLike {
public static JsonVariant of(String json) {
return new JsonVariant(json);
}

private final JsonNode node;

private JsonVariant(String json) {
try {
ObjectMapper mapper = new ObjectMapper();
this.node = mapper.readTree(json);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private JsonVariant(JsonNode node) {
this.node = node;
}

@Override
public int size() {
return node.size();
}

@Override
public VariantLike getFieldByKey(String key) {
JsonNode childNode = node.get(key);
return new JsonVariant(childNode);
}

@Override
public VariantLike getFieldAtIndex(int index) {
JsonNode childNode = node.get(index);
return new JsonVariant(childNode);
}

@Override
public boolean getBoolean() {
return node.asBoolean();
}

@Override
public int getInt() {
return node.asInt();
}

@Override
public long getLong() {
return node.asLong();
}

@Override
public float getFloat() {
return (float) node.asDouble();
}

@Override
public double getDouble() {
return node.asDouble();
}

@Override
public BigDecimal getDecimal() {
return new BigDecimal(node.asText());
}

@Override
public String getString() {
return node.asText();
}

@Override
public byte[] getBinary() {
try {
return node.binaryValue();
} catch (IOException e) {
return null;
}
}

@Override
public String toJson() {
return node.toString();
}

@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}

if (other == null || getClass() != other.getClass()) {
return false;
}

JsonVariant that = (JsonVariant) other;

return Objects.equals(node, that.node);
}

@Override
public int hashCode() {
return Objects.hashCode(node);
}
}

public static class TestFieldSummary implements ManifestFile.PartitionFieldSummary {
private final boolean containsNull;
private final Boolean containsNaN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ public void testIdentityTypes() throws Exception {
Types.TimestampNanoType.withZone(),
Types.StringType.get(),
Types.UUIDType.get(),
Types.BinaryType.get()
Types.BinaryType.get(),
Types.VariantType.get()
};

for (Type type : identityPrimitives) {
Expand Down
2 changes: 2 additions & 0 deletions api/src/test/java/org/apache/iceberg/types/TestTypes.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ public void fromPrimitiveString() {

assertThat(Types.fromPrimitiveString("Decimal(2,3)")).isEqualTo(Types.DecimalType.of(2, 3));

assertThat(Types.fromPrimitiveString("Variant")).isEqualTo(Types.VariantType.get());

assertThatExceptionOfType(IllegalArgumentException.class)
.isThrownBy(() -> Types.fromPrimitiveString("Unknown"))
.withMessageContaining("Unknown");
Expand Down
8 changes: 8 additions & 0 deletions api/src/test/java/org/apache/iceberg/util/RandomUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.math.BigInteger;
import java.util.Arrays;
import java.util.Random;
import org.apache.iceberg.TestHelpers.JsonVariant;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;

Expand Down Expand Up @@ -144,6 +145,9 @@ public static Object generatePrimitive(Type.PrimitiveType primitive, Random rand
BigDecimal bigDecimal = new BigDecimal(unscaled, type.scale());
return negate(choice) ? bigDecimal.negate() : bigDecimal;

case VARIANT:
return randomVariant();

default:
throw new IllegalArgumentException(
"Cannot generate random value for unknown type: " + primitive);
Expand Down Expand Up @@ -225,4 +229,8 @@ private static BigInteger randomUnscaled(int precision, Random random) {

return new BigInteger(sb.toString());
}

private static JsonVariant randomVariant() {
return JsonVariant.of("{\"name\": \"John\"}");
}
}

0 comments on commit 5e5a53e

Please sign in to comment.