diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/SelfSyntaxChecker.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/SelfSyntaxChecker.scala index 685c034e..8aad22c0 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/SelfSyntaxChecker.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/jsonschema/SelfSyntaxChecker.scala @@ -16,11 +16,12 @@ import scala.jdk.CollectionConverters._ import cats.data.{Validated, ValidatedNel, NonEmptyList} import cats.syntax.validated._ +import cats.syntax.either._ import com.fasterxml.jackson.databind.ObjectMapper import com.networknt.schema.{SpecVersion, JsonSchema, JsonSchemaFactory, SchemaValidatorsConfig} -import io.circe.jackson.circeToJackson +import io.circe.jackson.schemaddl.{circeToJackson, CirceToJsonError} import com.snowplowanalytics.iglu.core.SelfDescribingSchema.SelfDescribingUri import com.snowplowanalytics.iglu.core.circe.MetaSchemas @@ -187,42 +188,53 @@ object SelfSyntaxChecker { .build() .getSchema(new ObjectMapper().readTree(SelfSchemaText)) - def validateSchema(schema: Json): ValidatedNel[Message, Unit] = { - val jacksonJson = circeToJackson(schema) - val laxValidation = V4SchemaIgluCore - .validate(jacksonJson) - .asScala - .map(_ -> Linter.Level.Error) // It is an error to fail validation against v4 spec - .toMap - val selfValidation = V4SchemaSelfSyntax - .validate(jacksonJson) - .asScala - .map(_ -> Linter.Level.Error) // It is an error to fail validation of Iglu's `$schema` and `self` properties - .toMap - val strictValidation = V4SchemaStrict - .validate(jacksonJson) - .asScala - .map(_ -> Linter.Level.Warning) // It is a warning to fail the strict validation - .toMap + @deprecated("Use `validateSchema(schema, maxJsonDepth)`", "0.24.0") + def validateSchema(schema: Json): ValidatedNel[Message, Unit] = + validateSchema(schema, Int.MaxValue) - (strictValidation ++ laxValidation ++ selfValidation) // Order is important: Errors override Warnings for identical messages - .toList - .map { case (message, level) => - val pointer = JsonPath.parse(message.getPath).map(JsonPath.toPointer) match { - case Right(Right(value)) => value - case Right(Left(inComplete)) => inComplete - case Left(_) => Pointer.Root + def validateSchema(schema: Json, maxJsonDepth: Int): ValidatedNel[Message, Unit] = + circeToJackson(schema, maxJsonDepth).toValidated + .leftMap { + case CirceToJsonError.MaxDepthExceeded => + NonEmptyList.one( + Message(Pointer.Root, CirceToJsonError.MaxDepthExceeded.message, Linter.Level.Error) + ) + } + .andThen { jacksonJson => + val laxValidation = V4SchemaIgluCore + .validate(jacksonJson) + .asScala + .map(_ -> Linter.Level.Error) // It is an error to fail validation against v4 spec + .toMap + val selfValidation = V4SchemaSelfSyntax + .validate(jacksonJson) + .asScala + .map(_ -> Linter.Level.Error) // It is an error to fail validation of Iglu's `$schema` and `self` properties + .toMap + val strictValidation = V4SchemaStrict + .validate(jacksonJson) + .asScala + .map(_ -> Linter.Level.Warning) // It is a warning to fail the strict validation + .toMap + + (strictValidation ++ laxValidation ++ selfValidation) // Order is important: Errors override Warnings for identical messages + .toList + .map { case (message, level) => + val pointer = JsonPath.parse(message.getPath).map(JsonPath.toPointer) match { + case Right(Right(value)) => value + case Right(Left(inComplete)) => inComplete + case Left(_) => Pointer.Root + } + Message(pointer, message.getMessage, level) + }.valid.swap match { + case Validated.Invalid(Nil) => + ().validNel + case Validated.Invalid(h :: t) => + NonEmptyList(h, t).invalid + case Validated.Valid(_) => + ().validNel } - Message(pointer, message.getMessage, level) - }.valid.swap match { - case Validated.Invalid(Nil) => - ().validNel - case Validated.Invalid(h :: t) => - NonEmptyList(h, t).invalid - case Validated.Valid(_) => - ().validNel - } - } + } /** * Validates that a self-describing JSON contains the correct schema keyword. diff --git a/modules/core/src/main/scala/io/circe/jackson/schemaddl/CirceToJsonError.scala b/modules/core/src/main/scala/io/circe/jackson/schemaddl/CirceToJsonError.scala new file mode 100644 index 00000000..c53785f6 --- /dev/null +++ b/modules/core/src/main/scala/io/circe/jackson/schemaddl/CirceToJsonError.scala @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2014-2024 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ + +package io.circe.jackson.schemaddl + +sealed trait CirceToJsonError extends Product with Serializable { + def message: String +} + +object CirceToJsonError { + case object MaxDepthExceeded extends CirceToJsonError { + override def message: String = "Maximum allowed JSON depth exceeded" + } +} diff --git a/modules/core/src/main/scala/io/circe/jackson/schemaddl/package.scala b/modules/core/src/main/scala/io/circe/jackson/schemaddl/package.scala new file mode 100644 index 00000000..7894f40a --- /dev/null +++ b/modules/core/src/main/scala/io/circe/jackson/schemaddl/package.scala @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014-2024 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ + +package io.circe +package jackson + +import cats.syntax.either._ +import cats.syntax.traverse._ + +import scala.jdk.CollectionConverters._ + +import java.math.{BigDecimal => JBigDecimal} + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node._ + +/** A hack to add max json depth check to circeToJackson */ +package object schemaddl { + private val negativeZeroJson: Json = Json.fromDoubleOrNull(-0.0) + + /** + * Converts given circe's Json instance to Jackson's JsonNode + * Numbers with exponents exceeding Integer.MAX_VALUE are converted to strings + * @param json instance of circe's Json + * @return converted JsonNode + */ + def circeToJackson(json: Json, maxJsonDepth: Int): Either[CirceToJsonError, JsonNode] = + if (maxJsonDepth <= 0) CirceToJsonError.MaxDepthExceeded.asLeft + else + json.fold( + NullNode.instance.asRight, + BooleanNode.valueOf(_).asRight, + number => { + if (json == negativeZeroJson) + DoubleNode.valueOf(number.toDouble) + else + number match { + case _: JsonBiggerDecimal | _: JsonBigDecimal => + number.toBigDecimal + .map(bigDecimal => DecimalNode.valueOf(bigDecimal.underlying)) + .getOrElse(TextNode.valueOf(number.toString)) + case JsonLong(x) => LongNode.valueOf(x) + case JsonDouble(x) => DoubleNode.valueOf(x) + case JsonFloat(x) => FloatNode.valueOf(x) + case JsonDecimal(x) => + try { + DecimalNode.valueOf(new JBigDecimal(x)) + } catch { + case _: NumberFormatException => TextNode.valueOf(x) + } + } + }.asRight, + TextNode.valueOf(_).asRight, + array => array.traverse(circeToJackson(_, maxJsonDepth - 1)) + .map { l => JsonNodeFactory.instance.arrayNode.addAll(l.asJava) }, + obj => obj.toList.traverse { + case (k, v) => circeToJackson(v, maxJsonDepth - 1).map((k, _)) + }.map { l => + objectNodeSetAll( + JsonNodeFactory.instance.objectNode, + l.toMap.asJava + ) + } + ) + + def objectNodeSetAll(node: ObjectNode, fields: java.util.Map[String, JsonNode]): JsonNode = + node.setAll[JsonNode](fields) +} diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/jsonschema/SelfSyntaxCheckerSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/jsonschema/SelfSyntaxCheckerSpec.scala index 10c5f5a6..082b762c 100644 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/jsonschema/SelfSyntaxCheckerSpec.scala +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/jsonschema/SelfSyntaxCheckerSpec.scala @@ -23,6 +23,8 @@ import com.snowplowanalytics.iglu.schemaddl.jsonschema.Linter.Level.{Error, Warn import com.snowplowanalytics.iglu.schemaddl.jsonschema.Linter.Message class SelfSyntaxCheckerSpec extends Specification { + val DefaultMaxJsonDepth = 10 + "validateSchema" should { "recognize invalid schema property" in { val jsonSchema = @@ -72,7 +74,7 @@ class SelfSyntaxCheckerSpec extends Specification { ] }""" - SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like { + SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like { case NonEmptyList (Message( pointer, @@ -103,7 +105,7 @@ class SelfSyntaxCheckerSpec extends Specification { } }""" - SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like { + SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like { case NonEmptyList( Message( pointer, @@ -129,7 +131,7 @@ class SelfSyntaxCheckerSpec extends Specification { "properties": { } }""" - SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like { + SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like { case NonEmptyList (Message( pointer, @@ -171,7 +173,7 @@ class SelfSyntaxCheckerSpec extends Specification { "properties": { } }""" - SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like { + SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like { case NonEmptyList(Message(_, msg, Error), Nil) => msg must contain("does not match the regex pattern") } @@ -205,7 +207,7 @@ class SelfSyntaxCheckerSpec extends Specification { "properties": { } }""" - SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like { + SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like { case NonEmptyList(Message(_, msg, Error), Nil) => msg must contain("does not match the regex pattern") } @@ -229,7 +231,7 @@ class SelfSyntaxCheckerSpec extends Specification { "properties": { } }""" - SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like { + SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like { case NonEmptyList(Message(_, msg, Error), Nil) => msg must contain("does not match the regex pattern") } @@ -251,7 +253,7 @@ class SelfSyntaxCheckerSpec extends Specification { "properties": { } }""" - SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beLeft.like { + SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beLeft.like { case NonEmptyList(Message(_, msg, Error), Nil) => msg must contain("does not match the regex pattern") } @@ -274,7 +276,46 @@ class SelfSyntaxCheckerSpec extends Specification { "properties": { } }""" - SelfSyntaxChecker.validateSchema(jsonSchema).toEither must beRight + SelfSyntaxChecker.validateSchema(jsonSchema, DefaultMaxJsonDepth).toEither must beRight + } + + "disallow schema that exceeds maximum allowed JSON depth" in { + val jsonSchema = + json"""{ + "$$schema" : "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for an example event", + "self": { + "vendor": "com.snowplowanalytics", + "name": "example_event", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "example_field": { + "type": "array", + "description": "the example_field is a collection of user names", + "users": { + "type": "object", + "properties": { + "name": { + "type": "string", + "maxLength": 128 + } + }, + "required": [ + "id" + ], + "additionalProperties": false + } + } + } + }""" + + SelfSyntaxChecker.validateSchema(jsonSchema, 5).toEither must beLeft.like { + case NonEmptyList(Message(_, msg, Error), Nil) => + msg must contain("Maximum allowed JSON depth exceeded") + } } } }