diff --git a/docs/wiki/v1/streams.md b/docs/wiki/v1/streams.md new file mode 100644 index 000000000..4fac790da --- /dev/null +++ b/docs/wiki/v1/streams.md @@ -0,0 +1,97 @@ +# PartiQL Data Streams + +*This document defines the PSink and PSource in relation to Datum and Java primitives* + +* * * + +### Background + +We have defined +an [encoding of PartiQL values using the Ion data format](https://quip-amazon.com/5Su8AQhKG5xA/PartiQL-Values-in-Ion), +but how does this fit in? Let’s look at two questions. + +1. How does PartiQL represent values in memory? +2. How does PartiQL read values from a stream into memory? + +An in-memory PartiQL value has a layer of indirection between the Java primitive and its view to the rest of the +program. This is called the “Datum” and is a fat interface which allows the partiql-lang-kotlin engine to not worry +about a value’s Java type, and instead switch on an int tag (ptype) to then pull out a value. Effectively the fat +interface removes checking classes and casting with tag checking then accessing without a cast. It’s effectively a +unifying interface over the old values, so how does the variant fit in? + +A variant is an implementation of a Datum whose value is opaque to the rest of the system. When the system checks the +tag, it simply gets back “variant” where T might tell us a set of capabilities (or traits) this type system / value +has. This value is not lowered into a PartiQL value such as an INT or ARRAY, but is kept in its “container” or “box”. +Think of the variant types of other engines or jsonb of PostgreSQL. + +So how does PartiQL read values from a stream into Datums, and how does it handle variants? It depends because an +encoding may include a data type or it may not. Also, the reader itself may expect a type (or not). Consider that a +PartiQL value carries a type with it along with the value itself. + +## Writing Data + +### PSink + +The PSink interface is used to write PartiQL data. It has APIs just like the IonWriter, and similarly, it has different +implementations for the actual encoding like how Ion has both a text and a binary encoding. A PSink is used without any +assumptions about the actual encoding. + +### DatumWriter + +The DatumWriter is a class which facilitates writing datums via a PSink implementation; it is handles materializing a +datum and calling the appropriate sink methods. + +**Example** + +```kotlin +val writer = DatumWriter.standard(sink) +writer.write(datum1) +writer.write(datum2) +writer.write(datum3) +writer.close() +``` + +### IonSink + +This example shows how to encode a datum as Ion; type decorations are omitted where possible. + +```kotlin +val sink = IonSink(System.out) // printing +val writer = DatumWriter(sink) + +// bool +writer.write(Datum.bool(true)) // >> true + +// ints +writer.write(Datum.smallint(1)) // >> smallint::1 +writer.write(Datum.int(2)) // >> int::2 +writer.write(Datum.bigint(3)) // >> 3 + +// exact and approx numeric +writer.write(Datum.decimal(BigDecimal("3.14"), 3, 2)) // >> ((decimal 3 2) 3.14) +writer.write(Datum.real(3.14f)) // >> real::3.14e0 +writer.write(Datum.doublePrecision(3.14)) // >> 3.14e0 + +// char strings +writer.write(Datum.char("abc", 3)) // >> ((char 3) "abc") +writer.write(Datum.varchar("abc", 3)) // >> ((varchar 3) "abc") +writer.write(Datum.string("abc")) // >> "abc" + +// lobs +writer.write(Datum.clob("hello".toByteArray()), 5) // >> {{ "hello" }} +writer.write(Datum.blob("hello".toByteArray()), 5) // >> {{ aGVsbG8= }} + +// datetime +// TODO blocked on https://github.com/partiql/partiql-lang-kotlin/pull/1656 + +// ion + +``` + +## Reading Data + +### DatumReader + +### PSource + +PLACEHOLDER diff --git a/partiql-spi/api/partiql-spi.api b/partiql-spi/api/partiql-spi.api index 4744da603..ef15a2698 100644 --- a/partiql-spi/api/partiql-spi.api +++ b/partiql-spi/api/partiql-spi.api @@ -464,8 +464,67 @@ public final class org/partiql/spi/function/Routine$DefaultImpls { public static fun getParameters (Lorg/partiql/spi/function/Routine;)[Lorg/partiql/spi/function/Parameter; } +public abstract interface class org/partiql/spi/stream/PSink { + public fun close ()V + public fun finish ()V + public fun flush ()V + public abstract fun setType (Lorg/partiql/types/PType;)V + public abstract fun stepIn (I)V + public abstract fun stepOut ()V + public abstract fun writeBigint (J)V + public abstract fun writeBlob ([B)V + public abstract fun writeBool (Z)V + public abstract fun writeChar (Ljava/lang/String;)V + public abstract fun writeClob ([B)V + public abstract fun writeDate (Lorg/partiql/value/datetime/Date;)V + public abstract fun writeDecimal (Ljava/math/BigDecimal;)V + public abstract fun writeDouble (D)V + public abstract fun writeField (Ljava/lang/String;)V + public abstract fun writeInt (I)V + public abstract fun writeMissing ()V + public abstract fun writeNull ()V + public abstract fun writeNumeric (Ljava/math/BigDecimal;)V + public abstract fun writeReal (F)V + public abstract fun writeSmallint (S)V + public abstract fun writeString (Ljava/lang/String;)V + public abstract fun writeTime (Lorg/partiql/value/datetime/Time;)V + public abstract fun writeTimestamp (Lorg/partiql/value/datetime/Timestamp;)V + public abstract fun writeTimestampz (Lorg/partiql/value/datetime/Timestamp;)V + public abstract fun writeTimez (Lorg/partiql/value/datetime/Time;)V + public abstract fun writeTinyint (B)V + public abstract fun writeVarchar (Ljava/lang/String;)V + public abstract fun writeVariant (Ljava/lang/Object;)V +} + +public abstract interface class org/partiql/spi/stream/PSource { + public fun close ()V + public abstract fun next ()Lorg/partiql/types/PType; + public abstract fun readBigint ()J + public abstract fun readBlob ()[B + public abstract fun readBool ()Z + public abstract fun readChar ()Ljava/lang/String; + public abstract fun readClob ()[B + public abstract fun readDate ()Lorg/partiql/value/datetime/Date; + public abstract fun readDecimal ()Ljava/math/BigDecimal; + public abstract fun readDouble ()D + public abstract fun readField (Ljava/lang/String;)Ljava/lang/String; + public abstract fun readInt ()I + public abstract fun readReal ()F + public abstract fun readSmallint ()S + public abstract fun readString ()Ljava/lang/String; + public abstract fun readTime ()Lorg/partiql/value/datetime/Time; + public abstract fun readTimestamp ()Lorg/partiql/value/datetime/Timestamp; + public abstract fun readTimestampz ()Lorg/partiql/value/datetime/Timestamp; + public abstract fun readTimez ()Lorg/partiql/value/datetime/Time; + public abstract fun readTinyint ()B + public abstract fun readVarchar ()Ljava/lang/String; + public abstract fun stepIn ()V + public abstract fun stepOut ()V +} + public abstract interface class org/partiql/spi/value/Datum : java/lang/Iterable { public static fun array (Ljava/lang/Iterable;)Lorg/partiql/spi/value/Datum; + public static fun array (Ljava/lang/Iterable;Lorg/partiql/types/PType;)Lorg/partiql/spi/value/Datum; public static fun bag (Ljava/lang/Iterable;)Lorg/partiql/spi/value/Datum; public static fun bigint (J)Lorg/partiql/spi/value/Datum; public static fun blob ([B)Lorg/partiql/spi/value/Datum; @@ -538,8 +597,10 @@ public class org/partiql/spi/value/DatumReader$Builder { public fun register (Lorg/partiql/spi/value/Encoding;Lorg/partiql/spi/value/DatumReader;)Lorg/partiql/spi/value/DatumReader$Builder; } -public abstract interface class org/partiql/spi/value/DatumWriter : java/lang/AutoCloseable { - public abstract fun write (Lorg/partiql/spi/value/Datum;)Lorg/partiql/spi/value/DatumWriter; +public final class org/partiql/spi/value/DatumWriter : java/lang/AutoCloseable { + public fun (Lorg/partiql/spi/stream/PSink;)V + public fun close ()V + public final fun write (Lorg/partiql/spi/value/Datum;)V } public class org/partiql/spi/value/Encoding : org/partiql/spi/Enum { @@ -555,6 +616,72 @@ public abstract interface class org/partiql/spi/value/Field { public static fun of (Ljava/lang/String;Lorg/partiql/spi/value/Datum;)Lorg/partiql/spi/value/Field; } +public final class org/partiql/spi/value/ion/IonSink : org/partiql/spi/stream/PSink { + public static final field Companion Lorg/partiql/spi/value/ion/IonSink$Companion; + public fun (Lcom/amazon/ion/IonWriter;Ljava/util/BitSet;)V + public static final fun binary (Ljava/io/OutputStream;)Lorg/partiql/spi/value/ion/IonSink; + public static final fun binary (Ljava/io/OutputStream;[I)Lorg/partiql/spi/value/ion/IonSink; + public fun close ()V + public static final fun decorated ()Lorg/partiql/spi/value/ion/IonSink$Builder; + public static final fun elided ()Lorg/partiql/spi/value/ion/IonSink$Builder; + public fun finish ()V + public fun flush ()V + public static final fun pretty (Ljava/lang/Appendable;)Lorg/partiql/spi/value/ion/IonSink; + public static final fun pretty (Ljava/lang/Appendable;[I)Lorg/partiql/spi/value/ion/IonSink; + public fun setType (Lorg/partiql/types/PType;)V + public static final fun standard ()Lorg/partiql/spi/value/ion/IonSink$Builder; + public static final fun standard (Lcom/amazon/ion/IonWriter;)Lorg/partiql/spi/value/ion/IonSink; + public fun stepIn (I)V + public fun stepOut ()V + public static final fun text (Ljava/lang/Appendable;)Lorg/partiql/spi/value/ion/IonSink; + public static final fun text (Ljava/lang/Appendable;[I)Lorg/partiql/spi/value/ion/IonSink; + public fun writeBigint (J)V + public fun writeBlob ([B)V + public fun writeBool (Z)V + public fun writeChar (Ljava/lang/String;)V + public fun writeClob ([B)V + public fun writeDate (Lorg/partiql/value/datetime/Date;)V + public fun writeDecimal (Ljava/math/BigDecimal;)V + public fun writeDouble (D)V + public fun writeField (Ljava/lang/String;)V + public fun writeInt (I)V + public fun writeMissing ()V + public fun writeNull ()V + public fun writeNumeric (Ljava/math/BigDecimal;)V + public fun writeReal (F)V + public fun writeSmallint (S)V + public fun writeString (Ljava/lang/String;)V + public fun writeTime (Lorg/partiql/value/datetime/Time;)V + public fun writeTimestamp (Lorg/partiql/value/datetime/Timestamp;)V + public fun writeTimestampz (Lorg/partiql/value/datetime/Timestamp;)V + public fun writeTimez (Lorg/partiql/value/datetime/Time;)V + public fun writeTinyint (B)V + public fun writeVarchar (Ljava/lang/String;)V + public fun writeVariant (Ljava/lang/Object;)V +} + +public final class org/partiql/spi/value/ion/IonSink$Builder { + public final fun build (Lcom/amazon/ion/IonWriter;)Lorg/partiql/spi/value/ion/IonSink; + public final fun decorate (I)Lorg/partiql/spi/value/ion/IonSink$Builder; + public final fun elide (I)Lorg/partiql/spi/value/ion/IonSink$Builder; +} + +public final class org/partiql/spi/value/ion/IonSink$Companion { + public final fun binary (Ljava/io/OutputStream;)Lorg/partiql/spi/value/ion/IonSink; + public final fun binary (Ljava/io/OutputStream;[I)Lorg/partiql/spi/value/ion/IonSink; + public static synthetic fun binary$default (Lorg/partiql/spi/value/ion/IonSink$Companion;Ljava/io/OutputStream;[IILjava/lang/Object;)Lorg/partiql/spi/value/ion/IonSink; + public final fun decorated ()Lorg/partiql/spi/value/ion/IonSink$Builder; + public final fun elided ()Lorg/partiql/spi/value/ion/IonSink$Builder; + public final fun pretty (Ljava/lang/Appendable;)Lorg/partiql/spi/value/ion/IonSink; + public final fun pretty (Ljava/lang/Appendable;[I)Lorg/partiql/spi/value/ion/IonSink; + public static synthetic fun pretty$default (Lorg/partiql/spi/value/ion/IonSink$Companion;Ljava/lang/Appendable;[IILjava/lang/Object;)Lorg/partiql/spi/value/ion/IonSink; + public final fun standard ()Lorg/partiql/spi/value/ion/IonSink$Builder; + public final fun standard (Lcom/amazon/ion/IonWriter;)Lorg/partiql/spi/value/ion/IonSink; + public final fun text (Ljava/lang/Appendable;)Lorg/partiql/spi/value/ion/IonSink; + public final fun text (Ljava/lang/Appendable;[I)Lorg/partiql/spi/value/ion/IonSink; + public static synthetic fun text$default (Lorg/partiql/spi/value/ion/IonSink$Companion;Ljava/lang/Appendable;[IILjava/lang/Object;)Lorg/partiql/spi/value/ion/IonSink; +} + public abstract class org/partiql/value/BagValue : org/partiql/value/CollectionValue { public fun ()V public abstract fun copy (Ljava/util/List;)Lorg/partiql/value/BagValue; diff --git a/partiql-spi/src/main/java/org/partiql/spi/stream/PSink.java b/partiql-spi/src/main/java/org/partiql/spi/stream/PSink.java new file mode 100644 index 000000000..0e083d583 --- /dev/null +++ b/partiql-spi/src/main/java/org/partiql/spi/stream/PSink.java @@ -0,0 +1,159 @@ +package org.partiql.spi.stream; + +import org.jetbrains.annotations.NotNull; +import org.partiql.types.PType; +import org.partiql.value.datetime.Date; +import org.partiql.value.datetime.Time; +import org.partiql.value.datetime.Timestamp; + +import java.math.BigDecimal; + +/** + * This is a PartiQL value stream sink. + *
+ * Each value can be written with or without type decoration based upon the actual encoding. + */ +public interface PSink { + + default void close() { + // no-op + } + + default void finish() { + // no-op + } + + default void flush() { + // no-op + } + + /** + * Set the PType for the next written value; cleared after write. + */ + void setType(@NotNull PType type); + + /** + * Write NULL value. + */ + void writeNull(); + + /** + * Write MISSING value. + */ + void writeMissing(); + + /** + * Write BOOL value. + */ + void writeBool(boolean value); + + /** + * Write TINYINT value. + */ + void writeTinyint(byte value); + + /** + * Write SMALLINT value. + */ + void writeSmallint(short value); + + /** + * Write INT value. + */ + void writeInt(int value); + + /** + * Write BIGINT value. + */ + void writeBigint(long value); + + /** + * Write NUMERIC value. + */ + void writeNumeric(@NotNull BigDecimal value); + + /** + * Write DECIMAL value. + */ + void writeDecimal(@NotNull BigDecimal value); + + /** + * Write REAL value. + */ + void writeReal(float value); + + /** + * Write DOUBLE PRECISION value. + */ + void writeDouble(double value); + + /** + * Write CHAR value. + */ + void writeChar(@NotNull String value); + + /** + * Write VARCHAR value. + */ + void writeVarchar(@NotNull String value); + + /** + * Write STRING value. + */ + void writeString(@NotNull String value); + + /** + * Write BLOB value. + */ + void writeBlob(@NotNull byte[] value); + + /** + * Write CLOB value. + */ + void writeClob(@NotNull byte[] value); + + /** + * Write DATE value. + */ + void writeDate(@NotNull Date value); + + /** + * Write TIME value. + */ + void writeTime(@NotNull Time value); + + /** + * Write TIMEZ value. + */ + void writeTimez(@NotNull Time value); + + /** + * Write TIMESTAMP value. + */ + void writeTimestamp(@NotNull Timestamp value); + + /** + * Write TIMESTAMPZ with given precision. + */ + void writeTimestampz(@NotNull Timestamp value); + + /** + * Write a VARIANT type. + */ + void writeVariant(@NotNull T value); + + /** + * Write STRUCT or ROW field name. + */ + void writeField(@NotNull String name); + + /** + * Step into container, given as PType code. + */ + void stepIn(@NotNull int container); + + /** + * Step out of container type. + */ + void stepOut(); +} diff --git a/partiql-spi/src/main/java/org/partiql/spi/stream/PSource.java b/partiql-spi/src/main/java/org/partiql/spi/stream/PSource.java new file mode 100644 index 000000000..f45e8ae4a --- /dev/null +++ b/partiql-spi/src/main/java/org/partiql/spi/stream/PSource.java @@ -0,0 +1,83 @@ +package org.partiql.spi.stream; + +import org.jetbrains.annotations.NotNull; +import org.partiql.types.PType; +import org.partiql.value.datetime.Date; +import org.partiql.value.datetime.Time; +import org.partiql.value.datetime.Timestamp; + +import java.math.BigDecimal; + +/** + * This is a PartiQL value stream source. + *
+ * Developer Note: + * - There should be a method for every Datum *java* value and all PType arguments. + * - Method names are derived from PType.Kind as pascal case. + */ +public interface PSource { + + default void close() { + // no-op + } + + /** + * Positions the source internal pointer to the next value, return its type. + */ + @NotNull + PType next(); + + boolean readBool(); + + byte readTinyint(); + + short readSmallint(); + + int readInt(); + + long readBigint(); + + @NotNull + BigDecimal readDecimal(); + + float readReal(); + + double readDouble(); + + @NotNull + String readChar(); + + @NotNull + String readVarchar(); + + @NotNull + String readString(); + + @NotNull + byte[] readBlob(); + + @NotNull + byte[] readClob(); + + @NotNull + Date readDate(); + + @NotNull + Time readTime(); + + @NotNull + Time readTimez(); + + @NotNull + Timestamp readTimestamp(); + + @NotNull + Timestamp readTimestampz(); + + @NotNull + String readField(@NotNull String name); + + void stepIn(); + + void stepOut(); +} diff --git a/partiql-spi/src/main/java/org/partiql/spi/value/Datum.java b/partiql-spi/src/main/java/org/partiql/spi/value/Datum.java index 0f3647a8c..6c31d8a92 100644 --- a/partiql-spi/src/main/java/org/partiql/spi/value/Datum.java +++ b/partiql-spi/src/main/java/org/partiql/spi/value/Datum.java @@ -23,30 +23,7 @@ import java.util.Iterator; import java.util.Objects; -import static org.partiql.types.PType.DYNAMIC; -import static org.partiql.types.PType.BOOL; -import static org.partiql.types.PType.TINYINT; -import static org.partiql.types.PType.SMALLINT; -import static org.partiql.types.PType.INTEGER; -import static org.partiql.types.PType.BIGINT; -import static org.partiql.types.PType.NUMERIC; -import static org.partiql.types.PType.DECIMAL; -import static org.partiql.types.PType.REAL; -import static org.partiql.types.PType.DOUBLE; -import static org.partiql.types.PType.CHAR; -import static org.partiql.types.PType.STRING; -import static org.partiql.types.PType.BLOB; -import static org.partiql.types.PType.CLOB; -import static org.partiql.types.PType.DATE; -import static org.partiql.types.PType.TIME; -import static org.partiql.types.PType.TIMEZ; -import static org.partiql.types.PType.TIMESTAMP; -import static org.partiql.types.PType.TIMESTAMPZ; -import static org.partiql.types.PType.ARRAY; -import static org.partiql.types.PType.BAG; -import static org.partiql.types.PType.ROW; -import static org.partiql.types.PType.STRUCT; -import static org.partiql.types.PType.UNKNOWN; +import static org.partiql.types.PType.*; /** * This is an EXPERIMENTAL representation of a value in PartiQL's type system. The intention of this modeling is to @@ -129,6 +106,7 @@ default boolean getBoolean() { *

* ! ! ! EXPERIMENTAL ! ! ! This is an experimental API under development by the PartiQL maintainers. *

+ * * @return the underlying value applicable to the types: * {@link PType#BLOB}, * {@link PType#CLOB} @@ -137,7 +115,7 @@ default boolean getBoolean() { * will throw this exception upon invocation. * @throws NullPointerException if this instance also returns true on {@link #isNull()}; callers should check that * {@link #isNull()} returns false before attempting to invoke this method. - * Please abstain from using this API until given notice otherwise. This may break between iterations without prior notice. + * Please abstain from using this API until given notice otherwise. This may break between iterations without prior notice. * @deprecated BINARY doesn't exist in SQL or Ion. This is subject to deletion. BLOB and CLOB are typically represented * in a fashion that can support much larger values -- this may be modified at any time. */ @@ -148,6 +126,7 @@ default byte[] getBytes() { /** * ! ! ! EXPERIMENTAL ! ! ! This is an experimental API under development by the PartiQL maintainers. + * * @return the underlying value applicable to the types: * {@link PType#TINYINT} * @throws UnsupportedOperationException if the operation is not applicable to the type returned from @@ -155,7 +134,7 @@ default byte[] getBytes() { * will throw this exception upon invocation. * @throws NullPointerException if this instance also returns true on {@link #isNull()}; callers should check that * {@link #isNull()} returns false before attempting to invoke this method. - * Please abstain from using this API until given notice otherwise. This may break between iterations without prior notice. + * Please abstain from using this API until given notice otherwise. This may break between iterations without prior notice. * @deprecated BYTE is not present in SQL or Ion. This is subject to deletion. */ @Deprecated @@ -207,6 +186,7 @@ default Timestamp getTimestamp() { /** * ! ! ! EXPERIMENTAL ! ! ! This is an experimental API under development by the PartiQL maintainers. + * * @return the underlying value applicable to the types: * TODO * @throws UnsupportedOperationException if the operation is not applicable to the type returned from @@ -214,7 +194,7 @@ default Timestamp getTimestamp() { * will throw this exception upon invocation. * @throws NullPointerException if this instance also returns true on {@link #isNull()}; callers should check that * {@link #isNull()} returns false before attempting to invoke this method. - * Please abstain from using this API until given notice otherwise. This may break between iterations without prior notice. + * Please abstain from using this API until given notice otherwise. This may break between iterations without prior notice. * @deprecated This implementation is likely wrong and is not recommended for use. */ @Deprecated @@ -573,6 +553,7 @@ static Datum nullValue(@NotNull PType type) { /** * Returns a typed missing value * ! EXPERIMENTAL ! This is subject to breaking changes and/or removal without prior notice. + * * @param type the type of the value * @return a typed missing value * @deprecated this may not be required. This is subject to removal. @@ -610,6 +591,9 @@ static Datum bigint(long value) { return new DatumLong(value); } + /** + * TODO implement or remove NUMERIC. + */ @Deprecated @NotNull static Datum numeric(@NotNull BigInteger value) { @@ -649,7 +633,6 @@ static Datum string(@NotNull String value) { } /** - * * @param value the string to place in the varchar * @return a varchar value with a default length of 255 */ @@ -659,7 +642,6 @@ static Datum varchar(@NotNull String value) { } /** - * * @param value the string to place in the varchar * @return a varchar value * TODO: Error or coerce here? Right now coerce, though I think this should likely error. @@ -681,7 +663,6 @@ static Datum varchar(@NotNull String value, int length) { } /** - * * @param value the string to place in the char * @return a char value with a default length of 255 */ @@ -691,7 +672,6 @@ static Datum character(@NotNull String value) { } /** - * * @param value the string to place in the char * @return a char value */ @@ -762,6 +742,11 @@ static Datum array(@NotNull Iterable values) { return new DatumCollection(values, PType.array()); } + @NotNull + static Datum array(@NotNull Iterable values, @NotNull PType typeParam) { + return new DatumCollection(values, PType.array(typeParam)); + } + // STRUCTURAL @NotNull @@ -784,6 +769,7 @@ static Datum struct(@NotNull Iterable values) { * {@link java.util.TreeSet} in combination with this {@link Comparator} to implement the before-mentioned * operations. *

+ * * @return the default comparator for {@link Datum}. The comparator orders null values first. * @see Datum * @see java.util.TreeSet @@ -804,6 +790,7 @@ static Comparator comparator() { * {@link java.util.TreeSet} in combination with this {@link Comparator} to implement the before-mentioned * operations. *

+ * * @param nullsFirst if true, nulls are ordered before non-null values, otherwise after. * @return the default comparator for {@link Datum}. * @see Datum diff --git a/partiql-spi/src/main/java/org/partiql/spi/value/DatumWriter.java b/partiql-spi/src/main/java/org/partiql/spi/value/DatumWriter.java deleted file mode 100644 index f68b45f2b..000000000 --- a/partiql-spi/src/main/java/org/partiql/spi/value/DatumWriter.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.partiql.spi.value; - -/** - * The {@link DatumWriter} interface is a low-level writer interface for writing streams of PartiQL data. - *
- * @see java.io.Writer - */ -public interface DatumWriter extends AutoCloseable { - - /** - * Like java.io.Reader with combined `append` and `write` since this does not implement Appendable. - * - * @param datum to write. - */ - public DatumWriter write(Datum datum); -} diff --git a/partiql-spi/src/main/kotlin/org/partiql/spi/value/DatumWriter.kt b/partiql-spi/src/main/kotlin/org/partiql/spi/value/DatumWriter.kt new file mode 100644 index 000000000..2d9d4a5a4 --- /dev/null +++ b/partiql-spi/src/main/kotlin/org/partiql/spi/value/DatumWriter.kt @@ -0,0 +1,98 @@ +package org.partiql.spi.value + +import org.partiql.spi.stream.PSink +import org.partiql.types.PType + +/** + * The {@link DatumWriter} provides a high-level interface for writing to a {@link PSink} implementation. + */ +public class DatumWriter : AutoCloseable { + + /** + * The underlying value encoder. + */ + private val sink: PSink + + /** + * Create a DatumWriter (private) + */ + @Suppress("ConvertSecondaryConstructorToPrimary") + public constructor(sink: PSink) { + this.sink = sink + } + + /** + * Like java.io.Writer with combined `append` and `write` since this does not implement Appendable. + */ + public fun write(datum: Datum) { + write(datum, true) + } + + /** + * TODO + * + * @param datum + * @param typed + */ + private fun write(datum: Datum, typed: Boolean) { + val type = datum.getType() + val code = type.code() + // always check MISSING + if (datum.isMissing) { + sink.writeMissing() + return + } + // types can be omitted in homogenous collections (heterogeneous is array) + if (typed) { + sink.setType(type) + } + // always check NULL + if (datum.isNull) { + sink.writeNull() + return + } + // delegate to sink + when (code) { + PType.DYNAMIC -> error("Unexpected runtime dynamic") + PType.BOOL -> sink.writeBool(datum.boolean) + PType.TINYINT -> sink.writeTinyint(datum.byte) + PType.SMALLINT -> sink.writeSmallint(datum.short) + PType.INTEGER -> sink.writeInt(datum.int) + PType.BIGINT -> sink.writeBigint(datum.long) + PType.NUMERIC -> sink.writeNumeric(datum.bigDecimal) + PType.DECIMAL -> sink.writeDecimal(datum.bigDecimal) + PType.REAL -> sink.writeReal(datum.float) + PType.DOUBLE -> sink.writeDouble(datum.double) + PType.CHAR -> sink.writeChar(datum.string) + PType.VARCHAR -> sink.writeVarchar(datum.string) + PType.STRING -> sink.writeString(datum.string) + PType.BLOB -> sink.writeBlob(datum.bytes) + PType.CLOB -> sink.writeClob(datum.bytes) + PType.DATE, + PType.TIME, + PType.TIMEZ, + PType.TIMESTAMP, + PType.TIMESTAMPZ, + -> { + TODO("datetime blocked on https://github.com/partiql/partiql-lang-kotlin/pull/1656") + } + PType.ARRAY, + PType.BAG, + -> { + sink.stepIn(code) + val dynamic = type.typeParameter.code() == PType.DYNAMIC + for (child in datum.iterator()) { + write(child, dynamic) + } + sink.stepOut() + } + else -> { + TODO("unsupported PTYPE") + } + } + } + + public override fun close() { + sink.close() + } +} diff --git a/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumWriter.kt b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumWriter.kt deleted file mode 100644 index 92b6374a7..000000000 --- a/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumWriter.kt +++ /dev/null @@ -1,15 +0,0 @@ -package org.partiql.spi.value.ion - -import org.partiql.spi.value.Datum -import org.partiql.spi.value.DatumWriter - -internal class IonDatumWriter : DatumWriter { - - override fun close() { - TODO("Not yet implemented") - } - - override fun write(datum: Datum?): DatumWriter { - TODO("Not yet implemented") - } -} diff --git a/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonSink.kt b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonSink.kt new file mode 100644 index 000000000..eb0eb2e7d --- /dev/null +++ b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonSink.kt @@ -0,0 +1,364 @@ +package org.partiql.spi.value.ion + +import com.amazon.ion.IonType +import com.amazon.ion.IonWriter +import com.amazon.ion.system.IonBinaryWriterBuilder +import com.amazon.ion.system.IonTextWriterBuilder +import org.partiql.spi.stream.PSink +import org.partiql.types.PType +import org.partiql.value.datetime.Date +import org.partiql.value.datetime.Time +import org.partiql.value.datetime.Timestamp +import java.io.OutputStream +import java.lang.Double.parseDouble +import java.math.BigDecimal +import java.util.BitSet + +/** + * IonSink is an encoder for PartiQL values using an IonWriter. + */ +public class IonSink : PSink { + + /** + * The underlying IonWriter + */ + private val writer: IonWriter + + /** + * The type elisions. + */ + private val elisions: BitSet + + /** + * Create an IonSink from an IonWriter + */ + @Suppress("ConvertSecondaryConstructorToPrimary") + public constructor(writer: IonWriter, elisions: BitSet) { + this.writer = writer + this.elisions = elisions + } + + public companion object { + + /** + * The standard Ion elisions. + */ + @JvmStatic + private val elisions = intArrayOf( + PType.BOOL, + PType.BIGINT, + PType.DOUBLE, + PType.STRING, + PType.CLOB, + PType.BLOB, + PType.ARRAY, + PType.TIMESTAMP, + PType.TIMESTAMPZ, + ) + + /** + * Create a standard IonSink backed by an Ion text writer. + */ + @JvmStatic + @JvmOverloads + public fun text(out: Appendable, elisions: IntArray? = null): IonSink { + return Builder(elisions ?: this.elisions).build((IonTextWriterBuilder.standard().build(out))) + } + + /** + * Create an IonSink backed by an Ion pretty text writer. + */ + @JvmStatic + @JvmOverloads + public fun pretty(out: Appendable, elisions: IntArray? = null): IonSink { + return Builder(elisions ?: this.elisions).build((IonTextWriterBuilder.pretty().build(out))) + } + + /** + * Create an IonSink backed by an Ion binary writer. + */ + @JvmStatic + @JvmOverloads + public fun binary(out: OutputStream, elisions: IntArray? = null): IonSink { + return Builder(elisions ?: this.elisions).build((IonBinaryWriterBuilder.standard().build(out))) + } + + /** + * Create an IonSink backed by the given IonWriter with standard type decorators. + */ + @JvmStatic + public fun standard(writer: IonWriter): IonSink { + return standard().build(writer) + } + + /** + * @return a new IonSink.Builder with standard type decorators. + */ + @JvmStatic + public fun standard(): Builder = Builder(elisions) + + /** + * @return a new IonSink.Builder with all type decorators. + */ + @JvmStatic + public fun decorated(): Builder { + return Builder(intArrayOf()) + } + + /** + * @return a new IonSink.Builder with all type elisions. + */ + @JvmStatic + public fun elided(): Builder { + return Builder(PType.codes()) + } + } + + override fun close() { + this.writer.close() + } + + override fun finish() { + this.writer.finish() + } + + override fun flush() { + this.writer.flush() + } + + override fun setType(type: PType) { + if (type.code() == PType.UNKNOWN) { + return // skip + } + this.writer.setTypeAnnotations(symbol(type)) + } + + override fun writeNull() { + this.writer.writeNull() + } + + override fun writeMissing() { + this.writer.writeSymbol("missing") + } + + override fun writeBool(value: Boolean) { + if (elisions[PType.BOOL]) { + this.writer.setTypeAnnotations() + } + this.writer.writeBool(value) + } + + override fun writeTinyint(value: Byte) { + if (elisions[PType.TINYINT]) { + this.writer.setTypeAnnotations() + } + this.writer.writeInt(value.toLong()) + } + + override fun writeSmallint(value: Short) { + if (elisions[PType.SMALLINT]) { + this.writer.setTypeAnnotations() + } + this.writer.writeInt(value.toLong()) + } + + override fun writeInt(value: Int) { + if (elisions[PType.INTEGER]) { + this.writer.setTypeAnnotations() + } + this.writer.writeInt(value.toLong()) + } + + override fun writeBigint(value: Long) { + if (elisions[PType.BIGINT]) { + this.writer.setTypeAnnotations() + } + this.writer.writeInt(value) + } + + override fun writeNumeric(value: BigDecimal) { + if (elisions[PType.NUMERIC]) { + this.writer.setTypeAnnotations() + } + this.writer.writeDecimal(value) + } + + override fun writeDecimal(value: BigDecimal) { + if (elisions[PType.DECIMAL]) { + this.writer.setTypeAnnotations() + } + this.writer.writeDecimal(value) + } + + override fun writeReal(value: Float) { + if (elisions[PType.REAL]) { + this.writer.setTypeAnnotations() + } + // IonWriter expects a double, + // 1. parseDouble((3.14f).toString()) -> PASS: real:3.14e0 + // 2. (3.14f).toDouble() -> FAIL: Expected: real::3.14e0, Actual: real::3.140000104904175e0 + val v = parseDouble(value.toString()) + this.writer.writeFloat(v) + } + + override fun writeDouble(value: Double) { + if (elisions[PType.DOUBLE]) { + this.writer.setTypeAnnotations() + } + this.writer.writeFloat(value) + } + + override fun writeChar(value: String) { + if (elisions[PType.CHAR]) { + this.writer.setTypeAnnotations() + } + this.writer.writeString(value) + } + + override fun writeVarchar(value: String) { + if (elisions[PType.VARCHAR]) { + this.writer.setTypeAnnotations() + } + this.writer.writeString(value) + } + + override fun writeString(value: String) { + if (elisions[PType.STRING]) { + this.writer.setTypeAnnotations() + } + this.writer.writeString(value) + } + + override fun writeBlob(value: ByteArray) { + if (elisions[PType.BLOB]) { + this.writer.setTypeAnnotations() + } + this.writer.writeBlob(value) + } + + override fun writeClob(value: ByteArray) { + if (elisions[PType.CLOB]) { + this.writer.setTypeAnnotations() + } + this.writer.writeClob(value) + } + + override fun writeDate(value: Date) { + TODO("datetime blocked on https://github.com/partiql/partiql-lang-kotlin/pull/1656") + } + + override fun writeTime(value: Time) { + TODO("datetime blocked on https://github.com/partiql/partiql-lang-kotlin/pull/1656") + } + + override fun writeTimez(value: Time) { + TODO("datetime blocked on https://github.com/partiql/partiql-lang-kotlin/pull/1656") + } + + override fun writeTimestamp(value: Timestamp) { + TODO("datetime blocked on https://github.com/partiql/partiql-lang-kotlin/pull/1656") + } + + override fun writeTimestampz(value: Timestamp) { + TODO("datetime blocked on https://github.com/partiql/partiql-lang-kotlin/pull/1656") + } + + override fun writeVariant(value: T) { + TODO("Not yet implemented") + } + + override fun writeField(name: String) { + this.writer.setFieldName(name) + } + + override fun stepIn(container: Int) { + when (container) { + PType.ARRAY -> this.writer.stepIn(IonType.LIST) + PType.BAG -> this.writer.stepIn(IonType.LIST) + PType.ROW -> this.writer.stepIn(IonType.STRUCT) + PType.STRUCT -> this.writer.stepIn(IonType.STRUCT) + else -> error("Expected ARRAY, BAG, ROW, or STRUCT, found code: $container") + } + } + + override fun stepOut() { + this.writer.stepOut() + } + + /** + * Writes a PartiQL type as an Ion symbol. + */ + private fun symbol(type: PType): String = when (type.code()) { + PType.BOOL -> "bool" + PType.TINYINT -> "tinyint" + PType.SMALLINT -> "smallint" + PType.INTEGER -> "int" + PType.BIGINT -> "bigint" + PType.NUMERIC -> "numeric(${type.precision},${type.scale})" + PType.DECIMAL -> "decimal(${type.precision},${type.scale})" + PType.REAL -> "real" + PType.DOUBLE -> "double" + PType.CHAR -> "char(${type.length})" + PType.VARCHAR -> "varchar(${type.length})" + PType.STRING -> "string" + PType.BLOB -> "blob(${type.length})" + PType.CLOB -> "clob(${type.length})" + PType.DATE -> "date" + PType.TIME -> "time(${type.precision})" + PType.TIMEZ -> "timez(${type.precision})" + PType.TIMESTAMP -> "timestamp(${type.precision})" + PType.TIMESTAMPZ -> "timestampz(${type.precision})" + PType.ARRAY -> "array<${symbol(type.typeParameter)}>" + PType.BAG -> "bag" + PType.ROW -> "row" + PType.STRUCT -> "struct" + PType.DYNAMIC -> "dynamic" + PType.UNKNOWN -> error("Unexpected UNKNOWN type") + else -> error("Unexpected ptype code: ${type.code()}") + } + + /** + * A builder to configure an IonSink. + */ + public class Builder internal constructor(elisions: IntArray) { + + /** + * You could make this a bit flag for some throughput gains (maybe). + */ + private val elisions = BitSet() + + init { + for (code in elisions) { + this.elide(code) + } + } + + /** + * Adds a type elision (removes the type decorator if it exists). + * + * @return this builder + */ + public fun elide(code: Int): Builder { + elisions[code] = true + return this + } + + /** + * Adds a type decorator (removes the type elision if it exists). + * + * @return this builder + */ + public fun decorate(code: Int): Builder { + elisions[code] = false + return this + } + + /** + * @return a new IonSink instance. + */ + public fun build(writer: IonWriter): IonSink { + // impls could be smart about which direction to put branches or omit them altogether + return IonSink(writer, elisions) + } + } +} diff --git a/partiql-spi/src/test/kotlin/org/partiql/spi/value/ion/IonStreamTest.kt b/partiql-spi/src/test/kotlin/org/partiql/spi/value/ion/IonStreamTest.kt new file mode 100644 index 000000000..0b5f01050 --- /dev/null +++ b/partiql-spi/src/test/kotlin/org/partiql/spi/value/ion/IonStreamTest.kt @@ -0,0 +1,168 @@ +package org.partiql.spi.value.ion + +import com.amazon.ionelement.api.loadSingleElement +import org.junit.jupiter.api.Test +import org.partiql.spi.value.Datum +import org.partiql.spi.value.DatumWriter +import org.partiql.types.PType +import java.math.BigDecimal + +/** + * Round-trip tests for encoding PartiQL values in Ion; this currently uses all decorators. + */ +class IonStreamTest { + + /** + * Apply all directions and round-trip. + */ + private fun case(ion: String, datum: Datum) { + // assertRead(ion, datum) + assertWrite(ion, datum) + // assertRoundTrip(datum) + } + + @Test + fun testNull() { + case("null", Datum.nullValue()) + case("'bool'::null", Datum.nullValue(PType.bool())) + case("'decimal(2,0)'::null", Datum.nullValue(PType.decimal(2, 0))) + } + + @Test + fun testBool() { + case("'bool'::null", Datum.nullValue(PType.bool())) + case("'bool'::true", Datum.bool(true)) + case("'bool'::false", Datum.bool(false)) + } + + @Test + fun testNumbers() { + // tinyint + case("'tinyint'::42", Datum.tinyint(42)) + case("'tinyint'::-42", Datum.tinyint(-42)) + // smallint + case("'smallint'::42", Datum.smallint(42)) + case("'smallint'::-42", Datum.smallint(-42)) + // int + case("'int'::42", Datum.integer(42)) + case("'int'::-42", Datum.integer(-42)) + // bigint + case("'bigint'::42", Datum.bigint(42)) + case("'bigint'::-42", Datum.bigint(-42)) + // decimal + case("'decimal(3,1)'::10.5", Datum.decimal(BigDecimal("10.5"), 3, 1)) + case("'decimal(3,1)'::-10.5", Datum.decimal(BigDecimal("-10.5"), 3, 1)) + // real + case("'real'::3.14e0", Datum.real(3.14f)) + case("'real'::-3.14e0", Datum.real(-3.14f)) + // double + case("'double'::3.1415e0", Datum.doublePrecision(3.1415)) + case("'double'::-3.1415e0", Datum.doublePrecision(-3.1415)) + } + + @Test + fun testText() { + // char + case("'char(1)'::\"a\"", Datum.character("a", 1)) + case("'char(3)'::\"abc\"", Datum.character("abc", 3)) + // varchar + case("'varchar(3)'::\"abc\"", Datum.varchar("abc", 3)) + case("'varchar(5)'::\"abc \"", Datum.varchar("abc ", 5)) + // string + case("'string'::\"hello\"", Datum.string("hello")) + } + + @Test + fun testLob() { + // clob + case("'clob(7)'::{{\"goodbye\"}}", Datum.clob("goodbye".toByteArray(), 7)) + // blob + case("'blob(5)'::{{aGVsbG8=}}", Datum.blob("hello".toByteArray(), 5)) + } + + @Test + fun testDatetime() { + // TODO blocked on https://github.com/partiql/partiql-lang-kotlin/pull/1656 + } + + @Test + fun testArray() { + // DYNAMIC ARRAY + case( + "'array'::[int::1, int::2, int::3]", + Datum.array(listOf(Datum.integer(1), Datum.integer(2), Datum.integer(3))) + ) + // INT ARRAY (should omit element types) + case( + "'array'::[1,2,3]", + Datum.array(listOf(Datum.integer(1), Datum.integer(2), Datum.integer(3)), PType.integer()) + ) + // ARRAY> + } + + /** + * Assert ion -> datum via IonSource (PSource). + */ + private fun assertRead(ion: String, datum: Datum) { + TODO() + } + + /** + * Assert datum -> ion via IonSink (PSink). + * + * @param ion + * @param datum + */ + private fun assertWrite(ion: String, datum: Datum) { + assertEquals(ion, write(datum)) + } + + /** + * Assert round-trip datum->ion->datum with no loss of information. + * + * @param datum + */ + private fun assertRoundTrip(datum: Datum) { + val e: Datum = datum + val a: Datum = read(write(e)) + assertEquals(e, a) + } + + private fun write(datum: Datum): String { + val sb = StringBuilder() + val sink = IonSink.text(sb, elisions = IntArray(0)) + val writer = DatumWriter(sink) + writer.write(datum) + return sb.toString() + } + + private fun read(ion: String): Datum { + // val source = IonSource.decorated().build(ion) + // val reader = DatumReader(source) + // return reader.read() + return Datum.nullValue() + } + + /** + * Assert ion elements are equal. + */ + private fun assertEquals(expected: String, actual: String) { + val e = loadSingleElement(expected) + val a = loadSingleElement(actual) + if (e != a) { + throw AssertionError("Expected: $expected, Actual: $actual") + } + } + + /** + * Assert ion elements are equal. + */ + private fun assertEquals(expected: Datum, actual: Datum) { + val comparator = Datum.comparator() + if (comparator.compare(expected, actual) != 0) { + val e = write(expected) + val a = write(actual) + throw AssertionError("Expected: $e, Actual: $a") + } + } +} diff --git a/partiql-types/src/main/java/org/partiql/types/PType.java b/partiql-types/src/main/java/org/partiql/types/PType.java index afc74138a..3de85921b 100644 --- a/partiql-types/src/main/java/org/partiql/types/PType.java +++ b/partiql-types/src/main/java/org/partiql/types/PType.java @@ -659,6 +659,7 @@ public static PType array() { */ @NotNull public static PType array(@NotNull PType typeParam) { + // TODO optional length https://github.com/partiql/partiql-lang-kotlin/issues/1686 return new PTypeCollection(ARRAY, typeParam); } @@ -676,6 +677,7 @@ public static PType bag() { */ @NotNull public static PType bag(@NotNull PType typeParam) { + // TODO optional length https://github.com/partiql/partiql-lang-kotlin/issues/1686 return new PTypeCollection(BAG, typeParam); }