diff --git a/.run/ClosedIntervalTest.run.xml b/.run/ClosedIntervalTest.run.xml new file mode 100644 index 0000000..ff69e51 --- /dev/null +++ b/.run/ClosedIntervalTest.run.xml @@ -0,0 +1,17 @@ + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index f789ea4..ac9ecc2 100644 --- a/pom.xml +++ b/pom.xml @@ -1,254 +1,299 @@ + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - 4.0.0 - ch.zzeekk.spark - spark-temporalquery_${scala.minor.version} - 2.0.1-SNAPSHOT - jar + 4.0.0 + ch.zzeekk.spark + spark-temporalquery_${scala.minor.version} + 2.0.2-SNAPSHOT + jar - Spark Temporal Queries Library - Implicit functions for querying interval data with Apache Spark/Scala - 2018 - https://github.com/zzeekk/spark-temporalquery + Spark Temporal Queries Library + Implicit functions for querying interval data with Apache Spark/Scala + 2018 + https://github.com/zzeekk/spark-temporalquery - - - MIT License - http://www.opensource.org/licenses/mit-license.php - - + + + MIT License + http://www.opensource.org/licenses/mit-license.php + + - - https://github.com/zzeekk/spark-temporalquery.git - + + https://github.com/zzeekk/spark-temporalquery.git + - - - zzeekk@gmx.net - Zach Kull - https://github.com/zzeekk - zzeekk - - + + + zzeekk@gmx.net + Zach Kull + https://github.com/zzeekk + zzeekk + + + klt@fsfe.org + Nikolaus Thiel + https://github.com/kaelte + kaelte + + - - - scala-2.12 - - 2.12 - ${scala.minor.version}.12 - 3.2.1 - - - - scala-2.11 - true - - 2.11 - ${scala.minor.version}.12 - 2.4.7 - - - - release-sonatype - - - ossrh - Central Repository OSSRH - https://s01.oss.sonatype.org/content/repositories/snapshots - - - ossrh - Central Repository OSSRH - https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ - - - - - - - net.alchim31.maven - scala-maven-plugin - 4.3.1 - - - scala-doc - - doc-jar - - - - -no-link-warnings - - - - - - - - org.apache.maven.plugins - maven-gpg-plugin - 1.6 - - - sign-artifacts - verify - - sign - - - - - --pinentry-mode - loopback - - - - - - - - org.sonatype.plugins - nexus-staging-maven-plugin - 1.6.7 - true - - ossrh - https://s01.oss.sonatype.org/ - true - 10 - - - - - - + + + scala-2.12 + + 2.12 + ${scala.minor.version}.15 + 3.3.3 + + + + scala-2.11 + + true + + + 2.11 + ${scala.minor.version}.12 + 2.4.7 + + + + + com.fasterxml.jackson.core + jackson-databind + 2.13.4.2 + + + com.fasterxml.jackson.core + jackson-core + 2.13.4 + + + com.fasterxml.jackson.core + jackson-core-asl + 2.13.4 + + + com.fasterxml.jackson.core + jackson-annotations + 2.13.4 + + + com.fasterxml.jackson.module + jackson-module-scala_${scala.minor.version} + 2.13.4 + + + + + + release-sonatype + + + ossrh + Central Repository OSSRH + https://s01.oss.sonatype.org/content/repositories/snapshots + + + ossrh + Central Repository OSSRH + https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + + + net.alchim31.maven + scala-maven-plugin + 4.3.1 + + + scala-doc + + doc-jar + + + + -no-link-warnings + + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 3.1.0 + + + sign-artifacts + verify + + sign + + + + + --pinentry-mode + loopback + + + + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.13 + true + + ossrh + https://s01.oss.sonatype.org/ + true + 10 + + + + + + - - UTF-8 - 1.8 - 1.8 - 8 - + + UTF-8 + + 1.8 + 1.8 + 8 + - - - org.scala-lang - scala-library - ${scala.version} - provided - - - org.scala-lang - scala-reflect - ${scala.version} - provided - - - org.apache.spark - spark-sql_${scala.minor.version} - ${spark.version} - provided - - - org.scalatest - scalatest_${scala.minor.version} - 3.0.8 - test - - + + + org.scala-lang + scala-library + ${scala.version} + provided + + + org.scala-lang + scala-reflect + ${scala.version} + provided + + + org.apache.spark + spark-sql_${scala.minor.version} + ${spark.version} + provided + + + + org.codehaus.jackson + jackson-mapper-asl + + + + + org.scalatest + scalatest_${scala.minor.version} + 3.0.9 + test + - - ${basedir}/src/main/scala - - - org.scala-tools - maven-scala-plugin - 2.15.2 - - - compile - - compile - - compile - - - test-compile - - testCompile - - test-compile - - - process-resources - - compile - - - - - - org.apache.maven.plugins - maven-source-plugin - 3.1.0 - - - attach-sources - - jar - - - - - - - org.spurint.maven.plugins - scala-cross-maven-plugin - 0.2.1 - - - rewrite-pom - - rewrite-pom - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.22.2 - - true - - - - - org.scalatest - scalatest-maven-plugin - 2.0.0 - - ${project.build.directory}/surefire-reports - . - WDF TestSuite.txt - - - - test - - test - - - - + - - + + ${basedir}/src/main/scala + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + + compile + + compile + + compile + + + test-compile + + testCompile + + test-compile + + + process-resources + + compile + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.1.0 + + + attach-sources + + jar + + + + + + + org.spurint.maven.plugins + scala-cross-maven-plugin + 0.2.1 + + + rewrite-pom + + rewrite-pom + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.2 + + true + + + + + org.scalatest + scalatest-maven-plugin + 2.0.0 + + ${project.build.directory}/surefire-reports + . + WDF TestSuite.txt + + + + test + + test + + + + + + diff --git a/src/main/scala/ch/zzeekk/spark/temporalquery/Logging.scala b/src/main/scala/ch/zzeekk/spark/temporalquery/Logging.scala index 3eaaa2d..60bcc64 100644 --- a/src/main/scala/ch/zzeekk/spark/temporalquery/Logging.scala +++ b/src/main/scala/ch/zzeekk/spark/temporalquery/Logging.scala @@ -1,7 +1,26 @@ package ch.zzeekk.spark.temporalquery +import org.apache.spark.sql.SparkSession import org.slf4j.{Logger, LoggerFactory} trait Logging { @transient protected lazy val logger: Logger = LoggerFactory.getLogger(getClass.getName) + + protected var _loggEnvDone: Boolean = false + + protected def loggEnv(implicit session: SparkSession): Unit = { + if (!_loggEnvDone) { + val javaVersion: String = System.getProperty("java.version") + val scalaVersion: String = scala.util.Properties.versionString + val sparkVersion = session.sparkContext.version + + logger.info(s"logger.isDebugEnabled ? ${logger.isDebugEnabled()}") + logger.info(s"Java Version : $javaVersion") + logger.info(s"Scala Version : $scalaVersion") + logger.info(s"Spark Version : $sparkVersion") + + _loggEnvDone = true + } + } + } \ No newline at end of file diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties index 3eb1bf4..94d8fe0 100644 --- a/src/test/resources/log4j.properties +++ b/src/test/resources/log4j.properties @@ -1,6 +1,6 @@ log4j.rootCategory=warn,console log4j.logger.ch.zzeekk=INFO -log4j.logger.org.apache.spark.sql.execution.command=error,console +log4j.logger.org.apache.spark=error,console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.out diff --git a/src/test/scala/ch.zzeekk.spark.temporalquery/ClosedIntervalTest.scala b/src/test/scala/ch/zzeekk/spark/temporalquery/ClosedIntervalTest.scala similarity index 98% rename from src/test/scala/ch.zzeekk.spark.temporalquery/ClosedIntervalTest.scala rename to src/test/scala/ch/zzeekk/spark/temporalquery/ClosedIntervalTest.scala index 6a9a0c8..c40d1f9 100644 --- a/src/test/scala/ch.zzeekk.spark.temporalquery/ClosedIntervalTest.scala +++ b/src/test/scala/ch/zzeekk/spark/temporalquery/ClosedIntervalTest.scala @@ -1,13 +1,13 @@ package ch.zzeekk.spark.temporalquery -import java.sql.Timestamp -import java.time.temporal.ChronoUnit - import ch.zzeekk.spark.temporalquery.TemporalHelpers.intervalComplement -import ch.zzeekk.spark.temporalquery.TemporalQueryUtil.{TemporalClosedIntervalQueryConfig, TemporalQueryConfig} +import ch.zzeekk.spark.temporalquery.TemporalQueryUtil.TemporalClosedIntervalQueryConfig import org.apache.spark.sql.Row import org.scalatest.FunSuite +import java.sql.Timestamp +import java.time.temporal.ChronoUnit + class ClosedIntervalTest extends FunSuite with TestUtils { implicit private val timestampOrdering: Ordering[Timestamp] = Ordering.fromLessThan[Timestamp]((a,b) => a.before(b)) diff --git a/src/test/scala/ch.zzeekk.spark.temporalquery/HalfOpenIntervalTest.scala b/src/test/scala/ch/zzeekk/spark/temporalquery/HalfOpenIntervalTest.scala similarity index 100% rename from src/test/scala/ch.zzeekk.spark.temporalquery/HalfOpenIntervalTest.scala rename to src/test/scala/ch/zzeekk/spark/temporalquery/HalfOpenIntervalTest.scala diff --git a/src/test/scala/ch.zzeekk.spark.temporalquery/LinearDoubleQueryUtilTest.scala b/src/test/scala/ch/zzeekk/spark/temporalquery/LinearDoubleQueryUtilTest.scala similarity index 100% rename from src/test/scala/ch.zzeekk.spark.temporalquery/LinearDoubleQueryUtilTest.scala rename to src/test/scala/ch/zzeekk/spark/temporalquery/LinearDoubleQueryUtilTest.scala diff --git a/src/test/scala/ch.zzeekk.spark.temporalquery/LinearDoubleTestUtils.scala b/src/test/scala/ch/zzeekk/spark/temporalquery/LinearDoubleTestUtils.scala similarity index 100% rename from src/test/scala/ch.zzeekk.spark.temporalquery/LinearDoubleTestUtils.scala rename to src/test/scala/ch/zzeekk/spark/temporalquery/LinearDoubleTestUtils.scala diff --git a/src/test/scala/ch.zzeekk.spark.temporalquery/TemporalHelpersTest.scala b/src/test/scala/ch/zzeekk/spark/temporalquery/TemporalHelpersTest.scala similarity index 100% rename from src/test/scala/ch.zzeekk.spark.temporalquery/TemporalHelpersTest.scala rename to src/test/scala/ch/zzeekk/spark/temporalquery/TemporalHelpersTest.scala diff --git a/src/test/scala/ch.zzeekk.spark.temporalquery/TemporalQueryUtilTest.scala b/src/test/scala/ch/zzeekk/spark/temporalquery/TemporalQueryUtilTest.scala similarity index 100% rename from src/test/scala/ch.zzeekk.spark.temporalquery/TemporalQueryUtilTest.scala rename to src/test/scala/ch/zzeekk/spark/temporalquery/TemporalQueryUtilTest.scala diff --git a/src/test/scala/ch.zzeekk.spark.temporalquery/TemporalTestUtils.scala b/src/test/scala/ch/zzeekk/spark/temporalquery/TemporalTestUtils.scala similarity index 100% rename from src/test/scala/ch.zzeekk.spark.temporalquery/TemporalTestUtils.scala rename to src/test/scala/ch/zzeekk/spark/temporalquery/TemporalTestUtils.scala diff --git a/src/test/scala/ch.zzeekk.spark.temporalquery/TestUtils.scala b/src/test/scala/ch/zzeekk/spark/temporalquery/TestUtils.scala similarity index 97% rename from src/test/scala/ch.zzeekk.spark.temporalquery/TestUtils.scala rename to src/test/scala/ch/zzeekk/spark/temporalquery/TestUtils.scala index 2368a12..2c350bb 100644 --- a/src/test/scala/ch.zzeekk.spark.temporalquery/TestUtils.scala +++ b/src/test/scala/ch/zzeekk/spark/temporalquery/TestUtils.scala @@ -9,15 +9,18 @@ trait TestUtils extends Logging { implicit val session: SparkSession = SparkSession.builder .config("spark.port.maxRetries", 100) - .config("spark.ui.enabled", false) + .config("spark.ui.enabled", value = false) .config("spark.sql.shuffle.partitions", 1) .config("spark.task.maxFailures", 1) .master("local").appName("TemporalQueryUtilTest").getOrCreate() + import session.implicits._ + loggEnv + def symmetricDifference(df1: DataFrame, df2: DataFrame): DataFrame = { // attention, "except" works on Dataset and not on DataFrame. We need to check that schema is equal. - require(df1.columns.toSeq==df2.columns.toSeq, + require(df1.columns.toSeq == df2.columns.toSeq, s"""Cannot calculate symmetric difference for DataFrames with different schema. |schema of df1: ${df1.columns.toSeq.mkString(",")} |${df1.schema.treeString}