Skip to content

Commit

Permalink
Merge branch 'release/0.2.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
chuwy committed May 24, 2017
2 parents 91f8080 + cd5e212 commit e5a12cf
Show file tree
Hide file tree
Showing 20 changed files with 1,662 additions and 418 deletions.
16 changes: 16 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,19 @@ jdk:
- oraclejdk8
script:
- sbt test
deploy:
skip_cleanup: true
provider: script
script: ./.travis/deploy.sh $TRAVIS_TAG
on:
condition: '"${TRAVIS_SCALA_VERSION}" == "2.10.6" && "${TRAVIS_JDK_VERSION}" == "oraclejdk8"'
tags: true
env:
global:
# BINTRAY_SNOWPLOW_MAVEN_USER
- secure: qETDQd6Z9R/LAV3Kijz3bZlyYJT8RV60PsfquhAk1714ztnBDXkVspPyGdrupmUOfE87ieUoZGx2h3kQvvgNPnx5AKcJZQZGQiHm817TcmdUOeiECPAOk5u3I3t/03qv7J/9yckss7degLEwX5mnJtKYE2Ao+2DZ18xUArTXMzh2k+DFsBSDqZoIyEeUAss2ZE+IQXsMxdniTccXwtAjrPPE56c9H2EXEX6mgpcc5xm80n5m5YhFUMT7hu5Amrgmn4KzfYVCoaqFFCky78I3W+hlhNsOtrcN6TC+seV5mcIbvu679G0mKsaVCQp4kJKFF9sevDoPC8RUyDUhWvlALxO+5++fr0brx3Dy+/x5zVgU+VbraKtq30qhNe8ssHnUrz9w77uwHAeKPM9o1VNN12fOejlJiFtvNYoDKTYKm/pQRGH3nDuLAmSsKcKBqCYUAR4kIgW4m75pAy/M5jmMUgAKRKvVaUhi5yc+gQunmTRdFOm9iEleOutERJQGGN+3/97KcaaMDCDw1qv+X901PPkgOKoc+c0TvIGG+pw+K1FcBzpFd1UPxVo8EgWN9WcYNQX1Y8IKCEJ5nt7EV1rTl+NZy7eF1beuhGzMfFpGTpmGLNdV5APRn23kVGBGbuN9oQmsqqNp6w2BXISrT7wI2jBXiLbTEb9Fm4dn6zwj5ao=
# BINTRAY_SNOWPLOW_MAVEN_API_KEY
- secure: a3IR33P9048TbBZ4DXnNXRv6J5Ugb6JIfc51ixFKpR7IBuuAPsSmkaJCbIgkgxkaMBCCmP6UTMoY3RRe449IrUH/x+WHLD5P7hNo0D0rGBh96Hu8EVIqSnPmpuYLmm3llEkXgKhje5spLw9af1VKzVe89V7vo4o2OFkv16eymDgs8ftC7NHTH9bPuBVKFZgmbfXtWTHwBMDp1t/zWlFTcfDXVf2IkXlFYTk9BioRsunk8MRk1gRAbhWibgZeGDo8N90ZK0pvNaU5oPzjblWaPCYmBsXkZT27wkUOBEmv78Fh9Y1LUmQNBs203zezGTSOa5zIZkpvmzY42jd8bQY9LSfArPH3Pe0NbpGjUJoySItd3ONcYngf7hY5Z9eqjWVbvkeRSbU0ysWLJtxrw2Uy34oxbOfrkh86KZhihd0iyfaKQUr5ldag+T5pqHlWrC21h/QevD2rhx5uPgAkyLdy4Mqmoeoe1p/aOFi8jK9mvET6Ie4yMzdwwEnZBL3EjuzSpBYSghMQrJntwc1Jx5PSpCNcK9CTjO/dn5nTKaq4VhABihCwezlH47+vhgChOff3cMDzLSOjvWzX7EpaMylKd0GvlUCzdezZgtqLfj0ue3zuWs/VEPUYmp7ukQWKxhMQcLTKnR/2xu4gZvdqO17jPIwRQz1EzV3pqJgKmHCD6x4=
# SONA_PASS
- secure: oDu2dXzektYr/7K5nw7EL2qDUR5AhO4Uz6XGHoOQsN1gJiovdsa5nJeDHgo2YFGpJljyTo+lABbxpGIFQpcnKGIG9eAaXIyYpRlEiksTUnZdwIlCXkRMg2l9cUr30ZDOoVS8QpQbCDdogOSqJ+RUShSuiXR8Qi2e0RfrsVucgkNogQ6w1IoB9kV8CAYsnJVzi/oenTJZjEh5qrKiUALpkiHGjB9WSIHQ80sAO/rwnr88w++HcOIqgnvhJ3/Ig3N6201Slud5pF2yVz4MxzY8bedetqNil5ffosYiU7dladOiKTVj8efZPx0cGq0dhpAZFVhehlXyu4EA24NRgKYvAIc0xWVVm49IBaMpDDI/nh24uF9fBPt2+Apj5BY/ETpKS5tFqFaGkBjlL9KFL3l2DfnWC8AfTHlBXFlkH8tKPSN4so612QAmWuULtrVuQpV8DF40HNwJoR2Lyyy5aHrZtpdjHsp3OJI83QfCxH2yTYhes4eHAxi4ynZDSDolt6mrjx651mmlQCsJWJ5KdWHQwjqzgRP8q1/bCaDYdODhrz0K1JPl6YYA+dzwRP+rFeSQbzG0yGo12p7FZGpq36/Hq9C/HSw6WVDN3Lr8CUxZr1rDhtmAvaMJG5EyYDXpNGn9j2DJX76A1Ifu7KXCp8h+FTLPa1CIxJruNxEA6vFSdqA=
- SONA_USER=snowplow
24 changes: 24 additions & 0 deletions .travis/deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

tag_version=$1

mkdir ~/.bintray/
FILE=$HOME/.bintray/.credentials
cat <<EOF >$FILE
realm = Bintray API Realm
host = api.bintray.com
user = $BINTRAY_SNOWPLOW_MAVEN_USER
password = $BINTRAY_SNOWPLOW_MAVEN_API_KEY
EOF

cd $TRAVIS_BUILD_DIR
pwd

project_version=$(sbt version -Dsbt.log.noformat=true | perl -ne 'print $1 if /(\d+\.\d+\.\d+[^\r\n]*)/')
if [ "${project_version}" == "${tag_version}" ]; then
sbt +publish
sbt +bintraySyncMavenCentral
else
echo "Tag version '${tag_version}' doesn't match version in scala project ('${project_version}'). Aborting!"
exit 1
fi
17 changes: 17 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
Version 0.2.0 (2017-05-24)
--------------------------
Bump SBT to 0.13.15 (#32)
Bump specs2 to 3.8.9 (#33)
Add support for checking and setting a DynamoDB-backed run manifest (#31)
Add transformWithInventory to the JSON EventTransformer (#34)
JSON Event Transformer: don't add null unstruct_event and contexts fields to output (#11)
Replace Scalaz Validation with Scala Either (#20)
Use standard regular expression for schema URIs (#22)
Allow empty custom contexts (#27)
Add CI/CD to project (#18)
Add Sonatype credentials to .travis.yml (#39)
Add Bintray credentials to .travis.yml (#17)
Update README markdown in according with CommonMark (#28)
Migrate setup guide from README to dedicated snowplow/snowplow wiki page (#29)
Migrate usage guide from README to dedicated snowplow/snowplow wiki page (#30)

Version 0.1.1 (2016-07-27)
--------------------------
Allow organisations in Iglu schema URIs to contain hyphens (#12)
Expand Down
126 changes: 14 additions & 112 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,123 +1,26 @@
# Snowplow Scala Analytics SDK

[![Build Status][travis-image]][travis]
[![Release] [release-image]][releases]
[![Release][release-image]][releases]
[![License][license-image]][license]

## 1. Overview
## Overview

The **[Snowplow] [snowplow]** Analytics SDK for Scala lets you work with **[Snowplow enriched events] [enriched-events]** in your Scala event processing and data modeling jobs.
The **[Snowplow][snowplow]** Analytics SDK for Scala lets you work with **[Snowplow enriched events][enriched-events]** in your Scala event processing and data modeling jobs.

Use this SDK with **[Apache Spark] [spark]**, **[AWS Lambda] [lambda]**, **[Apache Flink] [flink]**, **[Scalding] [scalding]**, **[Apache Samza] [samza]** and other Scala-compatible data processing frameworks.
Use this SDK with **[Apache Spark][spark]**, **[AWS Lambda][lambda]**, **[Apache Flink][flink]**, **[Scalding][scalding]**, **[Apache Samza][samza]** and other Scala/JVM-compatible data processing frameworks.

## 2. Functionality

The Snowplow enriched event is a relatively complex TSV string containing self-describing JSONs. Rather than work with this structure directly in Scala, use this Analytics SDK to interact with the enriched event format:
## Documentation

![sdk-usage-img] [sdk-usage-img]
[Setup guide][setup-guide] and [User guide][user-guide] are avaiable at [Snowplow wiki][snowplow-wiki].

As the Snowplow enriched event format evolves towards a cleaner **[Apache Avro] [avro]**-based structure, we will be updating this Analytics SDK to maintain compatibility across different enriched event versions.

Currently the Analytics SDK for Scala ships with a single Event Transformer:
## Copyright and license

* The JSON Event Transformer takes a Snowplow enriched event and converts it into a JSON ready for further processing
The Snowplow Scala Analytics SDK is copyright 2016-2017 Snowplow Analytics Ltd.

### 2.1 JSON Event Transformer

The JSON Event Transformer is adapted from the code used to load Snowplow events into Elasticsearch in the Kinesis real-time pipeline.

It converts a Snowplow enriched event into a single JSON like so:

```json
{ "app_id":"demo","platform":"web","etl_tstamp":"2015-12-01T08:32:35.048Z",
"collector_tstamp":"2015-12-01T04:00:54.000Z","dvce_tstamp":"2015-12-01T03:57:08.986Z",
"event":"page_view","event_id":"f4b8dd3c-85ef-4c42-9207-11ef61b2a46e","txn_id":null,
"name_tracker":"co","v_tracker":"js-2.5.0","v_collector":"clj-1.0.0-tom-0.2.0",...
```

The most complex piece of processing is the handling of the self-describing JSONs found in the enriched event's `unstruct_event`, `contexts` and `derived_contexts` fields. All self-describing JSONs found in the event are flattened into top-level plain (i.e. not self-describing) objects within the enriched event JSON.

For example, if an enriched event contained a `com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1`, then the final JSON would contain:

```json
{ "app_id":"demo","platform":"web","etl_tstamp":"2015-12-01T08:32:35.048Z",
"unstruct_event_com_snowplowanalytics_snowplow_link_click_1": {
"targetUrl":"http://www.example.com",
"elementClasses":["foreground"],
"elementId":"exampleLink"
},...
```

## 3. Usage

### 3.1 Installation

The latest version of Snowplow Scala Analytics SDK is 0.1.0, which is cross-built against Scala 2.10.x and 2.11.x.

If you're using SBT, add the following lines to your build file:

```scala
// Resolvers
val snowplowRepo = "Snowplow Analytics" at "http://maven.snplow.com/releases/"

// Dependency
val analyticsSdk = "com.snowplowanalytics" %% "snowplow-scala-analytics-sdk" % "0.1.0"
```

Note the double percent (`%%`) between the group and artifactId. This will ensure that you get the right package for your Scala version.

### 3.2 Using from Apache Spark

The Scala Analytics SDK is a great fit for performing Snowplow **[event data modeling] [event-data-modeling]** in Apache Spark and Spark Streaming.

Here's the code we use internally for our own data modeling jobs:

```scala
import com.snowplowanalytics.snowplow.analytics.scalasdk.json.EventTransformer

val events = input
.map(line => EventTransformer.transform(line))
.filter(_.isSuccess)
.flatMap(_.toOption)

val dataframe = ctx.read.json(events)
```

### 3.3 Using from AWS Lambda

The Scala Analytics SDK is a great fit for performing **analytics-on-write** on Snowplow event streams using AWS Lambda.

Here's some sample code for transforming enriched events into JSON inside a Scala Lambda:

```scala
import com.snowplowanalytics.snowplow.analytics.scalasdk.json.EventTransformer

def recordHandler(event: KinesisEvent) {

val events = for {
rec <- event.getRecords
line = new String(rec.getKinesis.getData.array())
json = EventTransformer.transform(line)
} yield json
```

## 4. For contributors

Assuming git, **[Vagrant] [vagrant-install]** and **[VirtualBox] [virtualbox-install]** installed:

```bash
host$ git clone https://github.com/snowplow/snowplow-scala-analytics-sdk.git
host$ cd snowplow-scala-analytics-sdk
host$ vagrant up && vagrant ssh
guest$ cd /vagrant
guest$ sbt test
```

## 5. Copyright and license

The Snowplow Scala Analytics SDK is copyright 2016 Snowplow Analytics Ltd.

Licensed under the **[Apache License, Version 2.0] [license]** (the "License");
Licensed under the **[Apache License, Version 2.0][license]** (the "License");
you may not use this software except in compliance with the License.

Unless required by applicable law or agreed to in writing, software
Expand All @@ -126,16 +29,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


[travis-image]: https://travis-ci.org/snowplow/snowplow-scala-analytics-sdk.png?branch=master
[travis]: http://travis-ci.org/snowplow/snowplow-scala-analytics-sdk

[license-image]: http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat
[license]: http://www.apache.org/licenses/LICENSE-2.0

[release-image]: http://img.shields.io/badge/release-0.1.1-blue.svg?style=flat
[release-image]: http://img.shields.io/badge/release-0.2.0-blue.svg?style=flat
[releases]: https://github.com/snowplow/snowplow-scala-analytics-sdk/releases

[sdk-usage-img]: https://raw.githubusercontent.com/snowplow/snowplow-scala-analytics-sdk/master/sdk-usage.png
[setup-guide]: https://github.com/snowplow/snowplow/wiki/Scala-Analytics-SDK-setup
[user-guide]: https://github.com/snowplow/snowplow/wiki/Scala-Analytics-SDK
[snowplow-wiki]: https://github.com/snowplow/snowplow/wiki

[snowplow]: http://snowplowanalytics.com
[enriched-events]: https://github.com/snowplow/snowplow/wiki/canonical-event-model
Expand All @@ -146,7 +52,3 @@ limitations under the License.
[flink]: https://flink.apache.org/
[scalding]: https://github.com/twitter/scalding
[samza]: http://samza.apache.org/
[avro]: https://avro.apache.org/

[vagrant-install]: http://docs.vagrantup.com/v2/installation/index.html
[virtualbox-install]: https://www.virtualbox.org/wiki/Downloads
39 changes: 39 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved.
*
* This program is licensed to you under the Apache License Version 2.0,
* and you may not use this file except in compliance with the Apache License Version 2.0.
* You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the Apache License Version 2.0 is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
*/

lazy val root = project.in(file("."))
.settings(Seq[Setting[_]](
name := "snowplow-scala-analytics-sdk",
organization := "com.snowplowanalytics",
version := "0.2.0",
description := "Scala analytics SDK for Snowplow",
scalaVersion := "2.10.6",
crossScalaVersions := Seq("2.10.6", "2.11.5")
))
.settings(BuildSettings.buildSettings)
.settings(BuildSettings.publishSettings)
.settings(Seq(
shellPrompt := { _ => name.value + " > " }
))
.settings(
libraryDependencies ++= Seq(
// Scala
Dependencies.json4sJackson,
Dependencies.s3,
Dependencies.dynamodb,
// Scala (test only)
Dependencies.scalaCheck,
Dependencies.specs2,
Dependencies.specs2Scalacheck
)
)
61 changes: 39 additions & 22 deletions project/BuildSettings.scala
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved.
* Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved.
*
* This program is licensed to you under the Apache License Version 2.0,
* and you may not use this file except in compliance with the Apache License Version 2.0.
Expand All @@ -10,35 +10,52 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
*/

// SBT
import sbt._
import Keys._

// Bintray plugin
import bintray.BintrayPlugin._
import bintray.BintrayKeys._

object BuildSettings {

// Basic settings for our app
lazy val basicSettings = Seq[Setting[_]](
organization := "com.snowplowanalytics",
version := "0.1.1",
description := "Scala analytics SDK for Snowplow",
scalaVersion := "2.10.6",
crossScalaVersions := Seq("2.10.6", "2.11.5"),
scalacOptions := Seq("-deprecation", "-encoding", "utf8"),
resolvers ++= Dependencies.resolutionRepos
lazy val buildSettings = Seq(
scalacOptions := Seq(
"-deprecation",
"-encoding", "UTF-8",
"-feature",
"-unchecked",
"-Ywarn-dead-code",
"-Ywarn-inaccessible",
"-Ywarn-nullary-override",
"-Ywarn-nullary-unit",
"-Ywarn-numeric-widen",
"-Ywarn-value-discard"
)
)

// Publish settings
// TODO: update with ivy credentials etc when we start using Nexus
lazy val publishSettings = Seq[Setting[_]](
// Enables publishing to maven repo
lazy val publishSettings = bintraySettings ++ Seq(
publishMavenStyle := true,

publishTo <<= version { version =>
val basePath = "target/repo/%s".format {
if (version.trim.endsWith("SNAPSHOT")) "snapshots/" else "releases/"
}
Some(Resolver.file("Local Maven repository", file(basePath)) transactional())
}
publishArtifact := true,
publishArtifact in Test := false,
licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")),
bintrayOrganization := Some("snowplow"),
bintrayRepository := "snowplow-maven",
pomIncludeRepository := { _ => false },
homepage := Some(url("http://snowplowanalytics.com")),
scmInfo := Some(ScmInfo(url("https://github.com/snowplow/scala-scala-analytics-sdk"),
"scm:[email protected]:snowplow/snowplow-scala-analytics-sdk.git")),
pomExtra := (
<developers>
<developer>
<name>Snowplow Analytics Ltd</name>
<email>support@snowplowanalytics.com</email>
<organization>Snowplow Analytics Ltd</organization>
<organizationUrl>http://snowplowanalytics.com</organizationUrl>
</developer>
</developers>)
)

lazy val buildSettings = basicSettings ++ publishSettings
}
30 changes: 13 additions & 17 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved.
* Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved.
*
* This program is licensed to you under the Apache License Version 2.0,
* and you may not use this file except in compliance with the Apache License Version 2.0.
Expand All @@ -11,26 +11,22 @@
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
*/
import sbt._
import Keys._

object Dependencies {

val resolutionRepos = Seq(
)

object V {
val json4s = "3.2.10" // See https://github.com/json4s/json4s/issues/212
val scalaz7 = "7.0.6"
val aws = "1.11.118"
val json4s = "3.2.10" // See https://github.com/json4s/json4s/issues/212
// Scala (test only)
val specs2 = "2.3.13"
val scalazSpecs2 = "0.2"
val specs2 = "3.8.9"
val scalaCheck = "1.13.4"
}

object Libraries {
val json4sJackson = "org.json4s" %% "json4s-jackson" % V.json4s
val scalaz7 = "org.scalaz" %% "scalaz-core" % V.scalaz7
// Scala (test only)
val specs2 = "org.specs2" %% "specs2" % V.specs2 % "test"
val scalazSpecs2 = "org.typelevel" %% "scalaz-specs2" % V.scalazSpecs2 % "test"
}
}
val json4sJackson = "org.json4s" %% "json4s-jackson" % V.json4s
val s3 = "com.amazonaws" % "aws-java-sdk-s3" % V.aws
val dynamodb = "com.amazonaws" % "aws-java-sdk-dynamodb" % V.aws
// Scala (test only)
val specs2 = "org.specs2" %% "specs2-core" % V.specs2 % "test"
val specs2Scalacheck = "org.specs2" %% "specs2-scalacheck" % V.specs2 % "test"
val scalaCheck = "org.scalacheck" %% "scalacheck" % V.scalaCheck % "test"
}
Loading

0 comments on commit e5a12cf

Please sign in to comment.