diff --git a/README.md b/README.md index 5f902a7..293cc5d 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ This library contains several APIs to read data from various sources of differen This library supports below source systems: * Text +* Excel ## text @@ -18,8 +19,6 @@ Supported text formats are: Please see the detailed documentation [here](text/README.md). -* Excel - ## excel User can use this library to read the data from an excel file and parse it to the spark dataframe. diff --git a/build.sbt b/build.sbt index 62e0c6c..db9b2a0 100644 --- a/build.sbt +++ b/build.sbt @@ -50,6 +50,8 @@ val scalaParserCombinatorsVersion = "2.3.0" val sparkVersion = "3.4.1" val sparkXMLVersion = "0.16.0" val zioConfigVersion = "4.0.0-RC16" +val crealyticsVersion = "3.4.1_0.19.0" +val poiVersion = "5.2.5" // ----- TOOL DEPENDENCIES ----- // @@ -81,11 +83,11 @@ val zioConfigDependencies = Seq( ).map(_ excludeAll ("org.scala-lang.modules", "scala-collection-compat")) val crealyticsDependencies = Seq( - "com.crealytics" %% "spark-excel" % "3.4.1_0.19.0" + "com.crealytics" %% "spark-excel" % crealyticsVersion ).map(_.cross(CrossVersion.for3Use2_13)) val poiDependencies = Seq( - "org.apache.poi" % "poi" % "5.2.5" + "org.apache.poi" % "poi" % poiVersion ) // ----- MODULE DEPENDENCIES ----- // @@ -111,8 +113,7 @@ lazy val `data-scalaxy-reader` = (project in file(".")) publish / skip := true, publishLocal / skip := true ) - .aggregate(`reader-text`) - .aggregate(`reader-excel`) + .aggregate(`reader-text`, `reader-excel`) lazy val `reader-text` = (project in file("text")) .settings( @@ -124,4 +125,4 @@ lazy val `reader-excel` = (project in file("excel")) .settings( version := "1.0.0", libraryDependencies ++= excelDependencies - ) \ No newline at end of file + ) diff --git a/excel/src/main/scala/com/clairvoyant/data/scalaxy/reader/excel/ExcelToDataFrameReader.scala b/excel/src/main/scala/com/clairvoyant/data/scalaxy/reader/excel/ExcelToDataFrameReader.scala index 62e8e71..1a58d70 100644 --- a/excel/src/main/scala/com/clairvoyant/data/scalaxy/reader/excel/ExcelToDataFrameReader.scala +++ b/excel/src/main/scala/com/clairvoyant/data/scalaxy/reader/excel/ExcelToDataFrameReader.scala @@ -6,7 +6,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession} import java.io.{ByteArrayInputStream, File, FileOutputStream, PrintWriter} -implicit object ExcelToDataFrameReader { +object ExcelToDataFrameReader { def read( bytes: Array[Byte], diff --git a/excel/src/test/resources/sample_data.xlsx b/excel/src/test/resources/sample_data.xlsx index 7929b4f..862ff58 100644 Binary files a/excel/src/test/resources/sample_data.xlsx and b/excel/src/test/resources/sample_data.xlsx differ diff --git a/excel/src/test/scala/com/clairvoyant/data/scalaxy/reader/excel/ExcelToDataFrameReaderSpec.scala b/excel/src/test/scala/com/clairvoyant/data/scalaxy/reader/excel/ExcelToDataFrameReaderSpec.scala index 7d28111..4c0307d 100644 --- a/excel/src/test/scala/com/clairvoyant/data/scalaxy/reader/excel/ExcelToDataFrameReaderSpec.scala +++ b/excel/src/test/scala/com/clairvoyant/data/scalaxy/reader/excel/ExcelToDataFrameReaderSpec.scala @@ -8,9 +8,33 @@ import scala.util.Using class ExcelToDataFrameReaderSpec extends DataFrameReader with DataFrameMatcher { - val excelToDataFrameReader: ExcelToDataFrameReader.type = ExcelToDataFrameReader - "read() - with excel filepath" should "return a dataframe with correct count and schema" in { + + val expectedDF = readJSONFromText( + """ + | [ + | { + | "Created": "2021-07-29 10:35:12", + | "Advertiser": "Zola", + | "Transaction ID": "1210730000580100000", + | "Earnings": "$0.68", + | "SID": "wlus9", + | "Status": "CONFIRMED", + | "ClickPage": "https://www.zola.com/" + | }, + | { + | "Created": "2022-04-18 07:23:54", + | "Advertiser": "TradeInn", + | "Transaction ID": "1220419021230020000", + | "Earnings": "$12.48", + | "SID": "wles7", + | "Status": "CONFIRMED", + | "ClickPage": "https://www.tradeinn.com/" + | } + | ] + |""".stripMargin + ) + val file = new java.io.File("excel/src/test/resources/sample_data.xlsx") val byteArray: Array[Byte] = Using(new FileInputStream(file)) { fis => @@ -19,11 +43,9 @@ class ExcelToDataFrameReaderSpec extends DataFrameReader with DataFrameMatcher { byteArray }.get - val df = excelToDataFrameReader.read( + ExcelToDataFrameReader.read( byteArray, ExcelFormat(dataAddress = "'Transactions Report'!A2:G4") - ) - df.count() shouldBe 2 + ) should matchExpectedDataFrame(expectedDF) } - }