From f85e48af42f9a9c31fc5f5f388f6d0691ca09e22 Mon Sep 17 00:00:00 2001 From: Ian Streeter Date: Wed, 9 Nov 2022 08:33:30 +0000 Subject: [PATCH] common: remove duplicates from Sendgrid adapter payload (close #725) --- .../adapters/registry/SendgridAdapter.scala | 49 ++++++++++-------- .../registry/SendgridAdapterSpec.scala | 50 +++++++++++++++++++ 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/SendgridAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/SendgridAdapter.scala index 931b1797e..3ebad6aeb 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/SendgridAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/SendgridAdapter.scala @@ -111,26 +111,35 @@ object SendgridAdapter extends Adapter { case Right(json) => json.asArray match { case Some(array) => - array.toList.zipWithIndex.map { - case (item, index) => - val eventType = item.hcursor.downField("event").as[String].toOption - val queryString = toMap(payload.querystring) - lookupSchema(eventType, index, EventSchemaMap).map { schema => - RawEvent( - api = payload.api, - parameters = toUnstructEventParams( - TrackerVersion, - queryString, - schema, - cleanupJsonEventValues(item, eventType.map(("event", _)), List("timestamp")), - "srv" - ), - contentType = payload.contentType, - source = payload.source, - context = payload.context - ) - }.toValidatedNel - } + val queryString = toMap(payload.querystring) + array.zipWithIndex + .map { + case (item, index) => + val sgEventId: Option[String] = item.hcursor.downField("sg_event_id").as[String].toOption + (sgEventId, (item, index)) + } + .toMap // removes duplicate keys based of sg_event_id + .values + .toList + .map { + case (item, index) => + val eventType = item.hcursor.downField("event").as[String].toOption + lookupSchema(eventType, index, EventSchemaMap).map { schema => + RawEvent( + api = payload.api, + parameters = toUnstructEventParams( + TrackerVersion, + queryString, + schema, + cleanupJsonEventValues(item, eventType.map(("event", _)), List("timestamp")), + "srv" + ), + contentType = payload.contentType, + source = payload.source, + context = payload.context + ) + }.toValidatedNel + } case None => List( FailureDetails.AdapterFailure diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala index 7958fc86d..b5f62bd71 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala @@ -469,5 +469,55 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { ) ) } + + "filter events if they are exact duplicates" in { + val inputJson = + """ + [ + { + "email":"example@test.com", + "timestamp":1446549615, + "smtp-id":"\u003c14c5d75ce93.dfd.64b469@ismtpd-555\u003e", + "event":"processed", + "category":"cat facts", + "sg_event_id":"sZROwMGMagFgnOEmSdvhig==", + "sg_message_id":"14c5d75ce93.dfd.64b469.filter0001.16648.5515E0B88.0", + "marketing_campaign_id":12345, + "marketing_campaign_name":"campaign name", + "marketing_campaign_version":"B", + "marketing_campaign_split_id":13471 + }, + { + "email":"example@test.com", + "timestamp":1446549615, + "smtp-id":"\u003c14c5d75ce93.dfd.64b469@ismtpd-555\u003e", + "event":"processed", + "category":"cat facts", + "sg_event_id":"sZROwMGMagFgnOEmSdvhig==", + "sg_message_id":"14c5d75ce93.dfd.64b469.filter0001.16648.5515E0B88.0", + "marketing_campaign_id":12345, + "marketing_campaign_name":"campaign name", + "marketing_campaign_version":"B", + "marketing_campaign_split_id":13471 + } + ] + """ + + val payload = + CollectorPayload( + Shared.api, + Nil, + ContentType.some, + inputJson.some, + Shared.cljSource, + Shared.context + ) + + val res = SendgridAdapter.toRawEvents(payload, SpecHelpers.client) + res must beValid.like { + case nel: NonEmptyList[RawEvent] => + nel.toList must have size 1 + } + } } }