Skip to content

Commit

Permalink
API Enrichment: add ability to mark fields as missing
Browse files Browse the repository at this point in the history
Currently we are very strict about building inputs. When a single value is
missing it is no longer possible to use the existing inputs. This
backwards-compatible feature adds ability to mark fields as allowed to be
missing.
  • Loading branch information
peel committed Jan 8, 2025
1 parent 1e8b01b commit 5f48351
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,11 @@ object Input {
* Describes how to take key from POJO source
* @param field `EnrichedEvent` object field
*/
final case class Pojo(key: String, field: String) extends Input
final case class Pojo(
key: String,
field: String,
allowMissing: Boolean = false
) extends Input

/**
* @param field where to get this JSON, one of unstruct_event, contexts or derived_contexts
Expand All @@ -104,7 +108,8 @@ object Input {
key: String,
field: String,
criterion: SchemaCriterion,
jsonPath: String
jsonPath: String,
allowMissing: Boolean = false
) extends Input

implicit val inputApiCirceDecoder: Decoder[Input] =
Expand All @@ -116,17 +121,18 @@ object Input {
.toRight(DecodingFailure("Key is missing", cur.history))
keyString <- key.as[String]
pojo = obj.get("pojo").map { pojoJson =>
pojoJson.hcursor
.downField("field")
.as[String]
.map(field => Pojo(keyString, field))
for {
field <- pojoJson.hcursor.downField("field").as[String]
allowMissing <- pojoJson.hcursor.downField("allowMissing").as[Boolean].handleError(_ => false)
} yield Pojo(keyString, field, allowMissing)
}
json = obj.get("json").map { jsonJson =>
for {
field <- jsonJson.hcursor.downField("field").as[String]
criterion <- jsonJson.hcursor.downField("schemaCriterion").as[SchemaCriterion]
jsonPath <- jsonJson.hcursor.downField("jsonPath").as[String]
} yield Json(keyString, field, criterion, jsonPath)
allowMissing <- jsonJson.hcursor.downField("allowMissing").as[Boolean].handleError(_ => false)
} yield Json(keyString, field, criterion, jsonPath, allowMissing)
}
_ <- if (json.isDefined && pojo.isDefined)
DecodingFailure("Either json or pojo input must be specified, both provided", cur.history).asLeft
Expand Down Expand Up @@ -157,7 +163,14 @@ object Input {

/**
* Get template context out of input configurations
* If any of inputs missing it will return None
* If any required input is missing it will return None.
* If an optional input is missing it will not affect the result of the fold.
* Example 1:
* Input1 is required and exists in the json, which yields Some(x), however optional Input2 is missing, therefore the result is Some(Map(x'))
* Example 2:
* Input1 is required and missing in the json, which yields None, Input2 is optional and available but the result is None
* Example 3:
* Input1 is optional and missing, the result is Some(Map.empty)
* @param inputs input-configurations with for keys and instructions how to get values
* @param event current enriching event
* @param derivedContexts list of contexts derived on enrichment process
Expand All @@ -171,15 +184,22 @@ object Input {
derivedContexts: List[SelfDescribingData[JSON]],
customContexts: List[SelfDescribingData[JSON]],
unstructEvent: Option[SelfDescribingData[JSON]]
): TemplateContext =
): TemplateContext = {
def pull(input: Input) = input.pull(event, derivedContexts, customContexts, unstructEvent)

inputs
.traverse(_.pull(event, derivedContexts, customContexts, unstructEvent))
.traverse {
case json @ Input.Json(_, _, _, _, true) => pull(json).map(_.orElse(Some(Map.empty)))
case pojo @ Input.Pojo(_, _, true) => pull(pojo).map(_.orElse(Some(Map.empty)))
case input => pull(input)
}
.map { filledInputs =>
filledInputs.sequence // Swap List[Option[Map[K, V]]] with Option[List[Map[K, V]]]
.map(_.foldLeft(List.empty[(String, String)]) { (acc, e) =>
acc |+| e.toList
acc |+| e.filterNot(_._2.isEmpty).toList
}.toMap)
}
}

/**
* Get data out of all JSON contexts matching `schemaCriterion`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class InputSpec extends Specification with ValidatedMatchers with CatsEffect {
POJO invalid key return failure $e6
skip lookup on missing (in event) key $e7
match modelless criterion (*-*-*) $e8
skip inputs that are allowed to be missing $e9
"""

object ContextCase {
Expand Down Expand Up @@ -289,4 +290,34 @@ class InputSpec extends Specification with ValidatedMatchers with CatsEffect {
ko("Context is missing")
}
}

def e9 = {
val jsonLongitudeInput = Input.Json(
"longitude",
"contexts",
SchemaCriterion(
"com.snowplowanalytics.snowplow",
"geolocation_context",
"jsonschema",
1,
1
),
"$.missing",
true
)
val pojoLatitudeInput = Input.Pojo("latitude", "geo_latitude", false)
val event = new EnrichedEvent
event.setGeo_latitude(42.0f)

val templateContext = Input.buildTemplateContext(
List(pojoLatitudeInput, jsonLongitudeInput),
event,
derivedContexts = Nil,
customContexts = List(ContextCase.overriderContext),
unstructEvent = None
)

templateContext must beValid(Some(Map("latitude" -> "42.0")))
}

}

0 comments on commit 5f48351

Please sign in to comment.