Skip to content

Commit

Permalink
Merge branch 'release/0.2.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
chuwy committed Nov 20, 2017
2 parents e5a12cf + 19ab038 commit 8265e03
Show file tree
Hide file tree
Showing 6 changed files with 319 additions and 3 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Version 0.2.1 (2017-11-20)
--------------------------
Fix non-merging matching contexts (#44)

Version 0.2.0 (2017-05-24)
--------------------------
Bump SBT to 0.13.15 (#32)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ limitations under the License.
[license-image]: http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat
[license]: http://www.apache.org/licenses/LICENSE-2.0

[release-image]: http://img.shields.io/badge/release-0.2.0-blue.svg?style=flat
[release-image]: http://img.shields.io/badge/release-0.2.1-blue.svg?style=flat
[releases]: https://github.com/snowplow/snowplow-scala-analytics-sdk/releases

[setup-guide]: https://github.com/snowplow/snowplow/wiki/Scala-Analytics-SDK-setup
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ lazy val root = project.in(file("."))
.settings(Seq[Setting[_]](
name := "snowplow-scala-analytics-sdk",
organization := "com.snowplowanalytics",
version := "0.2.0",
version := "0.2.1",
description := "Scala analytics SDK for Snowplow",
scalaVersion := "2.10.6",
crossScalaVersions := Seq("2.10.6", "2.11.5")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,45 @@ object EventTransformer {
private[json] def convertEvent(eventTsv: List[String], initial: JObject): Validated[(Set[InventoryItem], JObject)] = {
val initialPair = (Set.empty[InventoryItem], initial)

Fields.zip(eventTsv).map(x => converter(x)).traverseEitherL.map { kvPairsList =>
val result = Fields.zip(eventTsv).map(x => converter(x)).traverseEitherL.map { kvPairsList =>
kvPairsList.fold(initialPair) { case ((accumInventory, accumObject), (inventory, kvPair)) => (accumInventory ++ inventory, kvPair ~ accumObject)}
}

result.map { case (inventory, json) => (inventory, foldContexts(json)) }
}

/**
* Merge context-arrays into its own JSON-keys
* `{"contexts_foo_1": [{"value": 1}], "contexts_foo_1": [{"value": 2}]`
* becomes
* `{"contexts_foo_1": [{"value": 1}, {"value": 2}]`
*
* NOTE: this functions has assumptions:
* 1. `JObject` can contain multiple identical keys (valid as per Json4s)
* 2. All keys with `contexts_` can contain only arrays
*
* @param eventObject almost-ready enriched event JSON
* @return final enriched JSON
*/
private[json] def foldContexts(eventObject: JObject): JObject = {
val (contexts, nonContexts) = eventObject.obj.partition { case (k, _) => k.startsWith("contexts_")}

// Traverse all found contexts and merge-in twin-contexts
val foldedContexts = contexts.foldLeft(List.empty[(String, JValue)]) {
case (collapsed, (currentKey, currentContexts: JArray)) =>
// Merge context-arrays if keys are identical
val merged = collapsed.map {
case (contextKey, contexts: JArray) if contextKey == currentKey =>
(currentKey, JArray(contexts.arr ++ currentContexts.arr))
case other => other
}

// Make sure only one instance of particular context resides in `collapsed`
val keys = collapsed.map(_._1)
if (keys.contains(currentKey)) merged else (currentKey, currentContexts) :: merged

case (collapsed, (key, value)) => (key, value) :: collapsed // Should never happen
}
JObject(foldedContexts ++ nonContexts)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ object JsonShredder {
* "data": {
* "value": 2
* }
* },
* {
* "schema": "iglu:com.acme/duplicated/jsonschema/1-0-1",
* "data": {
* "value": 3
* }
* }
* ]
* }
Expand All @@ -81,9 +87,12 @@ object JsonShredder {
*
* {
* "iglu:com.acme/duplicated/jsonschema/1-0-0": [{"value": 1}, {"value": 2}],
* "iglu:com.acme/duplicated/jsonschema/1-0-1": [{"value": 3}],
* "iglu:com.acme/unduplicated/jsonschema/1-0-0": [{"unique": true}]
* }
*
* NOTE: it does not merge contexts on per-model basis (as we can see from 1st and 2nd items above)
*
* @param contextsType contexts flavor (derived or custom)
* @param contexts Contexts JSON
* @return Contexts JSON in an Elasticsearch-compatible format
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,43 @@ class EventTransformerSpec extends Specification {
]
}"""

val contextsWithDuplicate = """{
"schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
"data": [
{
"schema": "iglu:org.schema/WebPage/jsonschema/1-0-0",
"data": {
"genre": "blog",
"inLanguage": "en-US",
"datePublished": "2014-11-06T00:00:00Z",
"author": "Fred Blundun",
"breadcrumb": [
"blog",
"releases"
],
"keywords": [
"snowplow",
"javascript",
"tracker",
"event"
]
}
},
{
"schema": "iglu:org.acme/context_one/jsonschema/1-0-0",
"data": {
"item": 1
}
},
{
"schema": "iglu:org.acme/context_one/jsonschema/1-0-1",
"data": {
"item": 2
}
}
]
}"""

val derivedContextsJson = """{
"schema": "iglu:com.snowplowanalytics.snowplow\/contexts\/jsonschema\/1-0-1",
"data": [
Expand Down Expand Up @@ -1091,4 +1128,233 @@ class EventTransformerSpec extends Specification {
}
}

"The 'jsonifyGoodEvent' method" should {
"successfully merge two matching contexts into 2-elements array" in {

val input = List(
"app_id" -> "angry-birds",
"platform" -> "web",
"etl_tstamp" -> "2017-01-26 00:01:25.292",
"collector_tstamp" -> "2013-11-26 00:02:05",
"dvce_created_tstamp" -> "2013-11-26 00:03:57.885",
"event" -> "page_view",
"event_id" -> "c6ef3124-b53a-4b13-a233-0088f79dcbcb",
"txn_id" -> "41828",
"name_tracker" -> "cloudfront-1",
"v_tracker" -> "js-2.1.0",
"v_collector" -> "clj-tomcat-0.1.0",
"v_etl" -> "serde-0.5.2",
"user_id" -> "[email protected]",
"user_ipaddress" -> "92.231.54.234",
"user_fingerprint" -> "2161814971",
"domain_userid" -> "bc2e92ec6c204a14",
"domain_sessionidx" -> "3",
"network_userid" -> "ecdff4d0-9175-40ac-a8bb-325c49733607",
"geo_country" -> "US",
"geo_region" -> "TX",
"geo_city" -> "New York",
"geo_zipcode" -> "94109",
"geo_latitude" -> "37.443604",
"geo_longitude" -> "-122.4124",
"geo_region_name" -> "Florida",
"ip_isp" -> "FDN Communications",
"ip_organization" -> "Bouygues Telecom",
"ip_domain" -> "nuvox.net",
"ip_netspeed" -> "Cable/DSL",
"page_url" -> "http://www.snowplowanalytics.com",
"page_title" -> "On Analytics",
"page_referrer" -> "",
"page_urlscheme" -> "http",
"page_urlhost" -> "www.snowplowanalytics.com",
"page_urlport" -> "80",
"page_urlpath" -> "/product/index.html",
"page_urlquery" -> "id=GTM-DLRG",
"page_urlfragment" -> "4-conclusion",
"refr_urlscheme" -> "",
"refr_urlhost" -> "",
"refr_urlport" -> "",
"refr_urlpath" -> "",
"refr_urlquery" -> "",
"refr_urlfragment" -> "",
"refr_medium" -> "",
"refr_source" -> "",
"refr_term" -> "",
"mkt_medium" -> "",
"mkt_source" -> "",
"mkt_term" -> "",
"mkt_content" -> "",
"mkt_campaign" -> "",
"contexts" -> contextsWithDuplicate,
"se_category" -> "",
"se_action" -> "",
"se_label" -> "",
"se_property" -> "",
"se_value" -> "",
"unstruct_event" -> unstructJson,
"tr_orderid" -> "",
"tr_affiliation" -> "",
"tr_total" -> "",
"tr_tax" -> "",
"tr_shipping" -> "",
"tr_city" -> "",
"tr_state" -> "",
"tr_country" -> "",
"ti_orderid" -> "",
"ti_sku" -> "",
"ti_name" -> "",
"ti_category" -> "",
"ti_price" -> "",
"ti_quantity" -> "",
"pp_xoffset_min" -> "",
"pp_xoffset_max" -> "",
"pp_yoffset_min" -> "",
"pp_yoffset_max" -> "",
"useragent" -> "",
"br_name" -> "",
"br_family" -> "",
"br_version" -> "",
"br_type" -> "",
"br_renderengine" -> "",
"br_lang" -> "",
"br_features_pdf" -> "1",
"br_features_flash" -> "0",
"br_features_java" -> "",
"br_features_director" -> "",
"br_features_quicktime" -> "",
"br_features_realplayer" -> "",
"br_features_windowsmedia" -> "",
"br_features_gears" -> "",
"br_features_silverlight" -> "",
"br_cookies" -> "",
"br_colordepth" -> "",
"br_viewwidth" -> "",
"br_viewheight" -> "",
"os_name" -> "",
"os_family" -> "",
"os_manufacturer" -> "",
"os_timezone" -> "",
"dvce_type" -> "",
"dvce_ismobile" -> "",
"dvce_screenwidth" -> "",
"dvce_screenheight" -> "",
"doc_charset" -> "",
"doc_width" -> "",
"doc_height" -> "",
"tr_currency" -> "",
"tr_total_base" -> "",
"tr_tax_base" -> "",
"tr_shipping_base" -> "",
"ti_currency" -> "",
"ti_price_base" -> "",
"base_currency" -> "",
"geo_timezone" -> "",
"mkt_clickid" -> "",
"mkt_network" -> "",
"etl_tags" -> "",
"dvce_sent_tstamp" -> "",
"refr_domain_userid" -> "",
"refr_device_tstamp" -> "",
"derived_contexts" -> derivedContextsJson,
"domain_sessionid" -> "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1",
"derived_tstamp" -> "2013-11-26 00:03:57.886",
"event_vendor" -> "com.snowplowanalytics.snowplow",
"event_name" -> "link_click",
"event_format" -> "jsonschema",
"event_version" -> "1-0-0",
"event_fingerprint" -> "e3dbfa9cca0412c3d4052863cefb547f",
"true_tstamp" -> "2013-11-26 00:03:57.886"
)

val (_, json) = EventTransformer
.jsonifyGoodEvent(input.map(_._2).toArray)
.right
.toOption
.getOrElse(throw new RuntimeException("Event failed transformation"))

val contexts = json match {
case obj: JObject => obj.obj.filter { case (key, _) => key == "contexts_org_acme_context_one_1" }
case _ => Nil
}

// List[(Context, ContextCardinality)]
val result = contexts.map { case (k, v) => v match {
case a: JArray => (k, a.arr.length)
case _ => (k, 0)
} }

result should beEqualTo(List("contexts_org_acme_context_one_1" -> 2))
}
}

"The 'foldContects' function" should {
"successfully merge three contexts" in {
val input = parse(
"""
|{
| "contexts_one_1": [{"value": 1}],
| "app_id": "foo",
| "contexts_one_1": [{"value": 2}],
| "contexts_one_1": [{"value": 3}]
|}
""".stripMargin).asInstanceOf[JObject]

val expected = parse(
"""
|{
| "app_id": "foo",
| "contexts_one_1": [{"value": 1}, {"value": 2}, {"value": 3}]
|}
""".stripMargin)

val result = EventTransformer.foldContexts(input)
result must beEqualTo(expected)
}

"not merge contexts with different models" in {
val input = parse(
"""
|{
| "app_id": "foo",
| "contexts_one_1": [{"value": 1}],
| "contexts_one_2": [{"value": 2}]
|}
""".stripMargin).asInstanceOf[JObject]

val expected = parse(
"""
|{
| "app_id": "foo",
| "contexts_one_1": [{"value": 1}],
| "contexts_one_2": [{"value": 2}]
|}
""".stripMargin)

val result = EventTransformer.foldContexts(input)
result must beEqualTo(expected)
}

"successfully merge contexts with more than one context" in {
val input = parse(
"""
|{
| "contexts_one_1": [{"value": 1}],
| "app_id": "foo",
| "contexts_one_1": [{"value": 2}, {"value": 2}],
| "contexts_one_1": [{"value": 3}, {"value": 4}]
|}
""".stripMargin).asInstanceOf[JObject]

val expected = parse(
"""
|{
| "app_id": "foo",
| "contexts_one_1": [{"value": 1}, {"value": 2}, {"value": 2}, {"value": 3}, {"value": 4}]
|}
""".stripMargin)

val result = EventTransformer.foldContexts(input)
result must beEqualTo(expected)
}
}

}

0 comments on commit 8265e03

Please sign in to comment.