TIBCOSoftware · ymahajan · Apr 17, 2016 · Apr 27, 2016 · Nov 21, 2015 · Dec 30, 2015
diff --git a/.gitignore b/.gitignore
@@ -72,6 +72,7 @@ spark-tests.log
 src_managed/
 streaming-tests.log
 target/
+build-artifacts/
 unit-tests.log
 work/
 
@@ -91,3 +92,6 @@ spark-warehouse/
 *.Rproj.*
 
 .Rproj.user
+
+# gradle specific
+.gradle/
diff --git a/README.md b/README.md
@@ -1,3 +1,19 @@
+## SnappyData's extensions to Spark
+
+- SnappyData collocates Spark executors with its in-memory data store in the same JVM. To achieve this, support for external cluster manager in Spark 2.0 is used to add a SnappyData cluster manager.
+- SnappyData's MemoryManager was needed to generate and handle memory events. A property spark.memory.manager is now used to specify a memory manager other than Spark's own.
+- To display the consumption of memory in an external embedded store, Spark's storage UI was updated.
+- Support for getting length of type (for VARCHAR) was added in the JDBCDialect class.
+- For SnappyData, dynamic continous queries on streams would be enabled in future. For that, support for registering DStreams after streaming context has started is added.
+- For partitioning, sequence of expressions can be provided. SnappyData adds OrderlessHashPartitioning that does not take into account order of expressions while partitioning.
+- Hive client thread-local configuration changed to be instance specific.
+- Hive client added support for dropTable and listing tables for all databases.
+- RDD partitions with executor specific preferred locations will be forced to be routed to one of those executors if alive.
+- An "unsecure" version of random UUID added in DiskBlockManager for temporary file names.
+- Added a fix for SPARK-13116.
+- Increased visibility of some classes/methods.
+
+
 # Apache Spark
 
 Spark is a fast and general cluster computing system for Big Data. It provides

diff --git a/assembly/build.gradle b/assembly/build.gradle
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2017 SnappyData, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License. You
+ * may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying
+ * LICENSE file.
+ */
+
+description = 'Spark Project Assembly'
+
+dependencies {
+  compile project(subprojectBase + 'snappy-spark-core_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-catalyst_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-sql_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-repl_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-streaming_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-streaming-kafka-0.8_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-streaming-kafka-0.10_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-sql-kafka-0.10_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-mllib_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-graphx_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-yarn_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-mesos_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-hive_' + scalaBinaryVersion)
+  compile project(subprojectBase + 'snappy-spark-hive-thriftserver_' + scalaBinaryVersion)
+  if (rootProject.hasProperty('kubernetes')) {
+    compile project(subprojectBase + 'snappy-spark-kubernetes_' + scalaBinaryVersion)
+  }
+   if (rootProject.hasProperty('spark-ganglia-lgpl')) {
+    compile project(subprojectBase + 'snappy-spark-ganglia-lgpl_' + scalaBinaryVersion)
+  }
+}
+
+def cleanProduct() {
+  delete "${sparkProjectRootDir}/python/lib/pyspark.zip"
+  delete snappyProductDir
+}
+clean.doLast {
+  cleanProduct()
+}
+
+task product(type: Zip) {
+  def examplesProject = project(subprojectBase + 'snappy-spark-examples_' + scalaBinaryVersion)
+  String yarnShuffleProject = subprojectBase + 'snappy-spark-network-yarn_' + scalaBinaryVersion
+  dependsOn jar, examplesProject.jar, "${yarnShuffleProject}:shadowJar"
+  // create python zip
+  destinationDir = file("${snappyProductDir}/python/lib")
+  archiveName = 'pyspark.zip'
+  from("${sparkProjectRootDir}/python") {
+    include 'pyspark/**/*'
+  }
+
+  doFirst {
+    cleanProduct()
+  }
+  doLast {
+    // copy all runtime dependencies (skip for top-level snappydata builds)
+    if (rootProject.name == 'snappy-spark') {
+      copy {
+        from(configurations.runtime) {
+          // exclude antlr4 explicitly (runtime is still included)
+          // that gets pulled by antlr gradle plugin
+          exclude '**antlr4-4*.jar'
+          // exclude scalatest included by spark-tags
+          exclude '**scalatest*.jar'
+        }
+        into "${snappyProductDir}/jars"
+      }
+    }
+    // copy scripts, data and other files that are part of distribution
+    copy {
+      from(sparkProjectRootDir) {
+        include 'bin/**'
+        include 'sbin/**'
+        include 'conf/**'
+        include 'data/**'
+        include 'licenses/**'
+        include 'python/**'
+        include 'examples/src/**'
+      }
+      into snappyProductDir
+    }
+    def sparkR = 'sparkProjectRootDir/R/lib/SparkR'
+    if (file(sparkR).exists()) {
+      copy {
+        from sparkR
+        into "${snappyProductDir}/R/lib"
+      }
+    }
+
+    // copy yarn shuffle shadow jar
+    copy {
+      from "${project(yarnShuffleProject).buildDir}/jars"
+      into "${snappyProductDir}/yarn"
+    }
+    // copy examples jars
+    copy {
+      from "${examplesProject.buildDir}/jars"
+      into "${snappyProductDir}/examples/jars"
+    }
+    // create RELEASE file, copy README etc for top-level snappy-spark project
+    if (rootProject.name == 'snappy-spark') {
+      copy {
+        from(sparkProjectRootDir) {
+          include 'LICENSE'
+          include 'NOTICE'
+          include 'README.md'
+        }
+        into snappyProductDir
+      }
+      def releaseFile = file("${snappyProductDir}/RELEASE")
+      String buildFlags = ''
+      if (rootProject.hasProperty('docker')) {
+        buildFlags += ' -Pdocker'
+      }
+      if (rootProject.hasProperty('ganglia')) {
+        buildFlags += ' -Pganglia'
+      }
+      String gitRevision = "${gitCmd} rev-parse --short HEAD".execute().text.trim()
+      if (gitRevision.length() > 0) {
+        gitRevision = " (git revision ${gitRevision})"
+      }
+
+      releaseFile.append("Spark ${version}${gitRevision} built for Hadoop ${hadoopVersion}\n")
+      releaseFile.append("Build flags:${buildFlags}\n")
+    }
+  }
+}