Skip to content

Commit

Permalink
REST-159 : Incorporated the review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mandarmarathe123 committed Feb 8, 2024
1 parent 26da76c commit 32e18b6
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 14 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ This library contains several APIs to read data from various sources of differen
This library supports below source systems:

* Text
* Excel

## text

Expand All @@ -18,8 +19,6 @@ Supported text formats are:

Please see the detailed documentation [here](text/README.md).

* Excel

## excel

User can use this library to read the data from an excel file and parse it to the spark dataframe.
Expand Down
11 changes: 6 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ val scalaParserCombinatorsVersion = "2.3.0"
val sparkVersion = "3.4.1"
val sparkXMLVersion = "0.16.0"
val zioConfigVersion = "4.0.0-RC16"
val crealyticsVersion = "3.4.1_0.19.0"
val poiVersion = "5.2.5"

// ----- TOOL DEPENDENCIES ----- //

Expand Down Expand Up @@ -81,11 +83,11 @@ val zioConfigDependencies = Seq(
).map(_ excludeAll ("org.scala-lang.modules", "scala-collection-compat"))

val crealyticsDependencies = Seq(
"com.crealytics" %% "spark-excel" % "3.4.1_0.19.0"
"com.crealytics" %% "spark-excel" % crealyticsVersion
).map(_.cross(CrossVersion.for3Use2_13))

val poiDependencies = Seq(
"org.apache.poi" % "poi" % "5.2.5"
"org.apache.poi" % "poi" % poiVersion
)

// ----- MODULE DEPENDENCIES ----- //
Expand All @@ -111,8 +113,7 @@ lazy val `data-scalaxy-reader` = (project in file("."))
publish / skip := true,
publishLocal / skip := true
)
.aggregate(`reader-text`)
.aggregate(`reader-excel`)
.aggregate(`reader-text`, `reader-excel`)

lazy val `reader-text` = (project in file("text"))
.settings(
Expand All @@ -124,4 +125,4 @@ lazy val `reader-excel` = (project in file("excel"))
.settings(
version := "1.0.0",
libraryDependencies ++= excelDependencies
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}

import java.io.{ByteArrayInputStream, File, FileOutputStream, PrintWriter}

implicit object ExcelToDataFrameReader {
object ExcelToDataFrameReader {

def read(
bytes: Array[Byte],
Expand Down
Binary file modified excel/src/test/resources/sample_data.xlsx
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,33 @@ import scala.util.Using

class ExcelToDataFrameReaderSpec extends DataFrameReader with DataFrameMatcher {

val excelToDataFrameReader: ExcelToDataFrameReader.type = ExcelToDataFrameReader

"read() - with excel filepath" should "return a dataframe with correct count and schema" in {

val expectedDF = readJSONFromText(
"""
| [
| {
| "Created": "2021-07-29 10:35:12",
| "Advertiser": "Zola",
| "Transaction ID": "1210730000580100000",
| "Earnings": "$0.68",
| "SID": "wlus9",
| "Status": "CONFIRMED",
| "ClickPage": "https://www.zola.com/"
| },
| {
| "Created": "2022-04-18 07:23:54",
| "Advertiser": "TradeInn",
| "Transaction ID": "1220419021230020000",
| "Earnings": "$12.48",
| "SID": "wles7",
| "Status": "CONFIRMED",
| "ClickPage": "https://www.tradeinn.com/"
| }
| ]
|""".stripMargin
)

val file = new java.io.File("excel/src/test/resources/sample_data.xlsx")
val byteArray: Array[Byte] =
Using(new FileInputStream(file)) { fis =>
Expand All @@ -19,11 +43,9 @@ class ExcelToDataFrameReaderSpec extends DataFrameReader with DataFrameMatcher {
byteArray
}.get

val df = excelToDataFrameReader.read(
ExcelToDataFrameReader.read(
byteArray,
ExcelFormat(dataAddress = "'Transactions Report'!A2:G4")
)
df.count() shouldBe 2
) should matchExpectedDataFrame(expectedDF)
}

}

0 comments on commit 32e18b6

Please sign in to comment.