-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
316 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
123 changes: 123 additions & 0 deletions
123
...re/src/main/scala/me/mnedokushev/zio/apache/parquet/core/hadoop/GroupValueConverter.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package me.mnedokushev.zio.apache.parquet.core.hadoop | ||
|
||
import me.mnedokushev.zio.apache.parquet.core.Value | ||
import me.mnedokushev.zio.apache.parquet.core.Value.{ GroupValue, PrimitiveValue } | ||
import org.apache.parquet.io.api.{ Binary, Converter, GroupConverter, PrimitiveConverter } | ||
import org.apache.parquet.schema.{ GroupType, LogicalTypeAnnotation, Type } | ||
import zio.Chunk | ||
|
||
import scala.jdk.CollectionConverters._ | ||
|
||
abstract class GroupValueConverter[V <: GroupValue[V]](schema: GroupType) extends GroupConverter { parent => | ||
|
||
def get: V = | ||
this.groupValue | ||
|
||
def put(name: String, value: Value): Unit = | ||
this.groupValue = this.groupValue.put(name, value) | ||
|
||
protected var groupValue: V = _ | ||
|
||
private val converters: Chunk[Converter] = | ||
Chunk.fromIterable(schema.getFields.asScala.toList.map(fromSchema)) | ||
|
||
private def fromSchema(schema0: Type) = { | ||
val name = schema0.getName | ||
|
||
schema0.getLogicalTypeAnnotation match { | ||
case _ if schema0.isPrimitive => | ||
primitive(name) | ||
case _: LogicalTypeAnnotation.ListLogicalTypeAnnotation => | ||
GroupValueConverter.list(schema0.asGroupType(), name, parent) | ||
case _: LogicalTypeAnnotation.MapLogicalTypeAnnotation => | ||
GroupValueConverter.map(schema0.asGroupType(), name, parent) | ||
case _ => | ||
GroupValueConverter.record(schema0.asGroupType(), name, parent) | ||
} | ||
} | ||
|
||
override def getConverter(fieldIndex: Int): Converter = | ||
converters(fieldIndex) | ||
|
||
private def primitive(name: String) = | ||
new PrimitiveConverter { | ||
|
||
override def addBinary(value: Binary): Unit = | ||
parent.groupValue = parent.groupValue.put(name, PrimitiveValue.BinaryValue(value)) | ||
|
||
override def addBoolean(value: Boolean): Unit = | ||
parent.groupValue = parent.groupValue.put(name, PrimitiveValue.BooleanValue(value)) | ||
|
||
override def addDouble(value: Double): Unit = | ||
parent.groupValue = parent.groupValue.put(name, PrimitiveValue.DoubleValue(value)) | ||
|
||
override def addFloat(value: Float): Unit = | ||
parent.groupValue = parent.groupValue.put(name, PrimitiveValue.FloatValue(value)) | ||
|
||
override def addInt(value: Int): Unit = | ||
parent.groupValue = parent.groupValue.put(name, PrimitiveValue.Int32Value(value)) | ||
|
||
override def addLong(value: Long): Unit = | ||
parent.groupValue = parent.groupValue.put(name, PrimitiveValue.Int64Value(value)) | ||
|
||
} | ||
|
||
} | ||
|
||
object GroupValueConverter { | ||
|
||
def root(schema: GroupType): GroupValueConverter[GroupValue.RecordValue] = | ||
new GroupValueConverter[GroupValue.RecordValue](schema) { | ||
|
||
override def start(): Unit = | ||
this.groupValue = Value.record( | ||
schema.getFields.asScala.toList.map(_.getName -> Value.nil).toMap | ||
) | ||
|
||
override def end(): Unit = () | ||
} | ||
|
||
def record[V <: GroupValue[V]]( | ||
schema: GroupType, | ||
name: String, | ||
parent: GroupValueConverter[V] | ||
): GroupValueConverter[GroupValue.RecordValue] = | ||
new GroupValueConverter[GroupValue.RecordValue](schema) { | ||
|
||
override def start(): Unit = | ||
this.groupValue = Value.record(Map.empty) | ||
|
||
override def end(): Unit = | ||
parent.put(name, this.groupValue) | ||
|
||
} | ||
|
||
def list[V <: GroupValue[V]]( | ||
schema: GroupType, | ||
name: String, | ||
parent: GroupValueConverter[V] | ||
): GroupValueConverter[GroupValue.ListValue] = | ||
new GroupValueConverter[GroupValue.ListValue](schema) { | ||
|
||
override def start(): Unit = | ||
this.groupValue = Value.list(Chunk.empty) | ||
|
||
override def end(): Unit = | ||
parent.put(name, this.groupValue) | ||
} | ||
|
||
def map[V <: GroupValue[V]]( | ||
schema: GroupType, | ||
name: String, | ||
parent: GroupValueConverter[V] | ||
): GroupValueConverter[GroupValue.MapValue] = | ||
new GroupValueConverter[GroupValue.MapValue](schema) { | ||
|
||
override def start(): Unit = | ||
this.groupValue = Value.map(Map.empty) | ||
|
||
override def end(): Unit = | ||
parent.put(name, this.groupValue) | ||
} | ||
|
||
} |
56 changes: 56 additions & 0 deletions
56
...les/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/hadoop/ParquetReader.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package me.mnedokushev.zio.apache.parquet.core.hadoop | ||
|
||
import me.mnedokushev.zio.apache.parquet.core.Value.GroupValue.RecordValue | ||
import me.mnedokushev.zio.apache.parquet.core.codec.ValueDecoder | ||
import org.apache.hadoop.conf.Configuration | ||
import org.apache.parquet.hadoop.{ ParquetReader => HadoopParquetReader } | ||
import org.apache.parquet.hadoop.api.{ ReadSupport => HadoopReadSupport } | ||
import org.apache.parquet.io.InputFile | ||
import zio._ | ||
import zio.stream._ | ||
|
||
trait ParquetReader[A <: Product] { | ||
|
||
def read(path: Path): ZStream[Scope, Throwable, A] | ||
|
||
} | ||
|
||
final class ParquetReaderLive[A <: Product](conf: Configuration)(implicit decoder: ValueDecoder[A]) | ||
extends ParquetReader[A] { | ||
|
||
override def read(path: Path): ZStream[Scope, Throwable, A] = | ||
for { | ||
inputFile <- ZStream.fromZIO(ZIO.attemptBlockingIO(path.toInputFile(conf))) | ||
reader <- ZStream.fromZIO( | ||
ZIO.fromAutoCloseable( | ||
ZIO.attemptBlockingIO( | ||
new ParquetReader.Builder(inputFile).withConf(conf).build() | ||
) | ||
) | ||
) | ||
value <- ZStream.repeatZIOOption( | ||
ZIO | ||
.attemptBlockingIO(reader.read()) | ||
.asSomeError | ||
.filterOrFail(_ != null)(None) | ||
.flatMap(decoder.decodeZIO(_).asSomeError) | ||
) | ||
} yield value | ||
|
||
} | ||
|
||
object ParquetReader { | ||
|
||
final class Builder(file: InputFile) extends HadoopParquetReader.Builder[RecordValue](file) { | ||
|
||
override def getReadSupport: HadoopReadSupport[RecordValue] = | ||
new ReadSupport | ||
|
||
} | ||
|
||
def configured[A <: Product: ValueDecoder: Tag]( | ||
hadoopConf: Configuration = new Configuration() | ||
): ULayer[ParquetReader[A]] = | ||
ZLayer.succeed(new ParquetReaderLive[A](hadoopConf)) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.