Skip to content

Commit

Permalink
Fix Array HSerializers
Browse files Browse the repository at this point in the history
  • Loading branch information
pomadchin committed May 26, 2022
1 parent 7aad837 commit 788f16a
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

package com.azavea.hiveless.serializers

import com.azavea.hiveless.serializers.syntax._
import com.azavea.hiveless.spark.encoders.syntax._

import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
Expand Down Expand Up @@ -106,6 +108,6 @@ object HSerializer extends Serializable {

implicit def arraySerializer[T: HSerializer: ClassTag: λ[τ => C[τ] => Seq[τ]], C[_]]: HSerializer[C[T]] = new HSerializer[C[T]] {
def dataType: DataType = ArrayType(HSerializer[T].dataType)
def serialize: C[T] => Any = seq => ArrayData.toArrayData(seq.toArray)
def serialize: C[T] => Any = seq => ArrayData.toArrayData(seq.map(_.serialize).toArray)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright 2022 Azavea
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.azavea.hiveless.serializers

import com.azavea.hiveless.HUDF

class GroupString extends HUDF[(String, Int), Array[String]] {
def function = { case (str, size) => str.grouped(size).toArray }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright 2022 Azavea
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.azavea.hiveless.serializers

import com.azavea.hiveless.{SpatialHiveTestEnvironment, SpatialTestTables}
import org.apache.spark.sql.SparkSession
import org.scalatest.funspec.AnyFunSpec

class HSerializerSpec extends AnyFunSpec with SpatialHiveTestEnvironment with SpatialTestTables {
override def registerHiveUDFs(ssc: SparkSession): Unit = {
super.registerHiveUDFs(ssc)
ssc.sql("CREATE OR REPLACE FUNCTION groupString as 'com.azavea.hiveless.serializers.GroupString';")
}

describe("HSerializerSpec") {
it("should correctly serialize arrays of strings") {
val (str, n) = ("HSerializerSpecString", 3)
val expected = str.grouped(n).toArray
val df = ssc.sql(s"SELECT groupString('$str', $n);".stripMargin)

df.collect().head.getAs[Array[String]](0) shouldBe expected
}
}
}

0 comments on commit 788f16a

Please sign in to comment.