From 3c6e672413bba185ba30e1ec14c1a603d8ec0f9e Mon Sep 17 00:00:00 2001 From: libretto Date: Wed, 13 Nov 2024 18:58:38 +0200 Subject: [PATCH] Support of JSON References --- src/karapace/schema_models.py | 48 +- src/karapace/schema_reader.py | 14 +- src/karapace/schema_registry_apis.py | 2 +- .../test_jsonschema_references.py | 1078 +++++++++++++++++ tests/integration/test_json_references.py | 229 ++++ tests/unit/test_json_resolver.py | 43 + 6 files changed, 1408 insertions(+), 6 deletions(-) create mode 100644 tests/integration/schema_registry/test_jsonschema_references.py create mode 100644 tests/integration/test_json_references.py create mode 100644 tests/unit/test_json_resolver.py diff --git a/src/karapace/schema_models.py b/src/karapace/schema_models.py index 10ffa1d82..4a2ae9c55 100644 --- a/src/karapace/schema_models.py +++ b/src/karapace/schema_models.py @@ -8,7 +8,7 @@ from avro.schema import parse as avro_parse, Schema as AvroSchema from collections.abc import Collection, Mapping, Sequence from dataclasses import dataclass -from jsonschema import Draft7Validator +from jsonschema import Draft7Validator, RefResolver from jsonschema.exceptions import SchemaError from karapace.dependency import Dependency from karapace.errors import InvalidSchema, InvalidVersion, VersionNotFoundException @@ -47,8 +47,12 @@ def parse_avro_schema_definition(s: str, validate_enum_symbols: bool = True, val return avro_parse(json_encode(json_data), validate_enum_symbols=validate_enum_symbols, validate_names=validate_names) -def parse_jsonschema_definition(schema_definition: str) -> Draft7Validator: - """Parses and validates `schema_definition`. +class InvalidValidatorRegistry(Exception): + pass + + +def parse_jsonschema_definition(schema_definition: str, resolver: RefResolver | None = None) -> Draft7Validator: + """Parses and validates `schema_definition` with its `dependencies`. Raises: SchemaError: If `schema_definition` is not a valid Draft7 schema. @@ -57,6 +61,8 @@ def parse_jsonschema_definition(schema_definition: str) -> Draft7Validator: # TODO: Annotations dictate Mapping[str, Any] here, but we have unit tests that # use bool values and fail if we assert isinstance(_, dict). Draft7Validator.check_schema(schema) # type: ignore[arg-type] + if resolver: + return Draft7Validator(schema, resolver=resolver) # type: ignore[arg-type] return Draft7Validator(schema) # type: ignore[arg-type] @@ -195,6 +201,40 @@ def schema(self) -> Draft7Validator | AvroSchema | ProtobufSchema: return parsed_typed_schema.schema +def json_resolver(schema_str: str, dependencies: Mapping[str, Dependency] | None = None) -> RefResolver | None: + # RefResolver is deprecated but it still used in karapace code + # see normalize_schema_rec() function in src/karapace/compatibility/jsonschema/utils.py + # In case when karapace JSON support will be updated we must rewrite this code to use + # referencing.Registry instead of RefResolver + schema_store: dict = {} + stack: list[tuple[str, Mapping[str, Dependency] | None]] = [(schema_str, dependencies)] + if dependencies is None: + return None + while stack: + current_schema_str, current_dependencies = stack.pop() + if current_dependencies: + stack.append((current_schema_str, None)) + for dependency in current_dependencies.values(): + stack.append((dependency.schema.schema_str, dependency.schema.dependencies)) + else: + schema_json = json_decode(current_schema_str) + if isinstance(schema_json, dict): + schema_store[schema_json["$id"]] = schema_json + else: + # In the case of schemas with references, we only support schemas with a canonical structure, + # which must include a $id in the reference. + raise InvalidSchema + main_schema_json = json_decode(schema_str) + if not isinstance(main_schema_json, dict): + # In the case of schemas with references, we only support schemas with a canonical structure, which must + # contain an $id tag within the reference. Simple main schemas of types such as bool, int, str, etc., + # are not supported. + raise InvalidSchema + + resolver = RefResolver.from_schema(main_schema_json, store=schema_store) + return resolver + + def parse( schema_type: SchemaType, schema_str: str, @@ -221,7 +261,7 @@ def parse( elif schema_type is SchemaType.JSONSCHEMA: try: - parsed_schema = parse_jsonschema_definition(schema_str) + parsed_schema = parse_jsonschema_definition(schema_str, resolver=json_resolver(schema_str, dependencies)) # TypeError - Raised when the user forgets to encode the schema as a string. except (TypeError, JSONDecodeError, SchemaError, AssertionError) as e: raise InvalidSchema from e diff --git a/src/karapace/schema_reader.py b/src/karapace/schema_reader.py index b20487631..58d2ca982 100644 --- a/src/karapace/schema_reader.py +++ b/src/karapace/schema_reader.py @@ -591,7 +591,19 @@ def _handle_msg_schema(self, key: dict, value: dict | None) -> None: parsed_schema: Draft7Validator | AvroSchema | ProtobufSchema | None = None resolved_dependencies: dict[str, Dependency] | None = None - if schema_type_parsed in [SchemaType.AVRO, SchemaType.JSONSCHEMA]: + if schema_type_parsed == SchemaType.JSONSCHEMA: + try: + if schema_references: + candidate_references = [reference_from_mapping(reference_data) for reference_data in schema_references] + resolved_references, resolved_dependencies = self.resolve_references(candidate_references) + schema_str = json.dumps(json.loads(schema_str), sort_keys=True) + except json.JSONDecodeError as e: + LOG.warning("Schema is not valid JSON") + raise e + except InvalidReferences as e: + LOG.exception("Invalid JSON references") + raise e + elif schema_type_parsed == SchemaType.AVRO: try: schema_str = json.dumps(json.loads(schema_str), sort_keys=True) except json.JSONDecodeError as exc: diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py index fbb8f5a0c..713e96846 100644 --- a/src/karapace/schema_registry_apis.py +++ b/src/karapace/schema_registry_apis.py @@ -1016,7 +1016,7 @@ def _validate_references( content_type=content_type, status=HTTPStatus.BAD_REQUEST, ) - if references and schema_type != SchemaType.PROTOBUF: + if references and schema_type != SchemaType.PROTOBUF and schema_type != SchemaType.JSONSCHEMA: self.r( body={ "error_code": SchemaErrorCodes.REFERENCES_SUPPORT_NOT_IMPLEMENTED.value, diff --git a/tests/integration/schema_registry/test_jsonschema_references.py b/tests/integration/schema_registry/test_jsonschema_references.py new file mode 100644 index 000000000..9b1616046 --- /dev/null +++ b/tests/integration/schema_registry/test_jsonschema_references.py @@ -0,0 +1,1078 @@ +"""Copyright (c) 2023 Aiven Ltd See LICENSE for details This version of the tests applies the same schemas as in +test_jsonschema.py, but these schemas are embedded within an additional helper schema, which is then referenced by +another schema. This setup allows us to test the behavior of JSON schema references""" +from jsonschema import Draft7Validator +from karapace.client import Client +from karapace.compatibility import CompatibilityModes +from karapace.schema_reader import SchemaType +from tests.schemas.json_schemas import ( + A_DINT_B_DINT_OBJECT_SCHEMA, + A_DINT_B_INT_OBJECT_SCHEMA, + A_DINT_B_NUM_C_DINT_OBJECT_SCHEMA, + A_DINT_B_NUM_OBJECT_SCHEMA, + A_DINT_OBJECT_SCHEMA, + A_INT_B_DINT_OBJECT_SCHEMA, + A_INT_B_DINT_REQUIRED_OBJECT_SCHEMA, + A_INT_B_INT_OBJECT_SCHEMA, + A_INT_B_INT_REQUIRED_OBJECT_SCHEMA, + A_INT_OBJECT_SCHEMA, + A_INT_OPEN_OBJECT_SCHEMA, + A_OBJECT_SCHEMA, + ARRAY_OF_INT_SCHEMA, + ARRAY_OF_NUMBER_SCHEMA, + ARRAY_OF_POSITIVE_INTEGER, + ARRAY_OF_POSITIVE_INTEGER_THROUGH_REF, + ARRAY_OF_STRING_SCHEMA, + ARRAY_SCHEMA, + B_DINT_OPEN_OBJECT_SCHEMA, + B_INT_OBJECT_SCHEMA, + B_INT_OPEN_OBJECT_SCHEMA, + B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + B_NUM_C_INT_OBJECT_SCHEMA, + B_NUM_C_INT_OPEN_OBJECT_SCHEMA, + BOOLEAN_SCHEMA, + EMPTY_OBJECT_SCHEMA, + EMPTY_SCHEMA, + ENUM_AB_SCHEMA, + ENUM_ABC_SCHEMA, + ENUM_BC_SCHEMA, + EXCLUSIVE_MAXIMUM_DECREASED_INTEGER_SCHEMA, + EXCLUSIVE_MAXIMUM_DECREASED_NUMBER_SCHEMA, + EXCLUSIVE_MAXIMUM_INTEGER_SCHEMA, + EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + EXCLUSIVE_MINIMUM_INCREASED_INTEGER_SCHEMA, + EXCLUSIVE_MINIMUM_INCREASED_NUMBER_SCHEMA, + EXCLUSIVE_MINIMUM_INTEGER_SCHEMA, + EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + INT_SCHEMA, + MAX_ITEMS_DECREASED_SCHEMA, + MAX_ITEMS_SCHEMA, + MAX_LENGTH_DECREASED_SCHEMA, + MAX_LENGTH_SCHEMA, + MAX_PROPERTIES_DECREASED_SCHEMA, + MAX_PROPERTIES_SCHEMA, + MAXIMUM_DECREASED_INTEGER_SCHEMA, + MAXIMUM_DECREASED_NUMBER_SCHEMA, + MAXIMUM_INTEGER_SCHEMA, + MAXIMUM_NUMBER_SCHEMA, + MIN_ITEMS_INCREASED_SCHEMA, + MIN_ITEMS_SCHEMA, + MIN_LENGTH_INCREASED_SCHEMA, + MIN_LENGTH_SCHEMA, + MIN_PATTERN_SCHEMA, + MIN_PATTERN_STRICT_SCHEMA, + MIN_PROPERTIES_INCREASED_SCHEMA, + MIN_PROPERTIES_SCHEMA, + MINIMUM_INCREASED_INTEGER_SCHEMA, + MINIMUM_INCREASED_NUMBER_SCHEMA, + MINIMUM_INTEGER_SCHEMA, + MINIMUM_NUMBER_SCHEMA, + NOT_OF_EMPTY_SCHEMA, + NOT_OF_TRUE_SCHEMA, + NUMBER_SCHEMA, + OBJECT_SCHEMA, + ONEOF_ARRAY_A_DINT_B_NUM_SCHEMA, + ONEOF_ARRAY_B_NUM_C_DINT_OPEN_SCHEMA, + ONEOF_ARRAY_B_NUM_C_INT_SCHEMA, + ONEOF_INT_SCHEMA, + ONEOF_NUMBER_SCHEMA, + ONEOF_STRING_INT_SCHEMA, + ONEOF_STRING_SCHEMA, + PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + STRING_SCHEMA, + TUPLE_OF_INT_INT_OPEN_SCHEMA, + TUPLE_OF_INT_INT_SCHEMA, + TUPLE_OF_INT_OPEN_SCHEMA, + TUPLE_OF_INT_SCHEMA, + TUPLE_OF_INT_WITH_ADDITIONAL_INT_SCHEMA, + TYPES_STRING_INT_SCHEMA, + TYPES_STRING_SCHEMA, +) +from tests.utils import new_random_name + +import json + + +async def debugging_details( + newer: Draft7Validator, + older: Draft7Validator, + client: Client, + subject: str, +) -> str: + newer_schema = json.dumps(newer.schema) + older_schema = json.dumps(older.schema) + config_res = await client.get(f"config/{subject}?defaultToGlobal=true") + config = config_res.json() + return f"subject={subject} newer={newer_schema} older={older_schema} compatibility={config}" + + +async def sainty_check(client: Client, subject: str, compatibility_mode: CompatibilityModes) -> None: + # Sanity check. The compatibility must be explicitly set because any + # difference can result in unexpected errors. + subject_config_res = await client.get(f"config/{subject}_dependency?defaultToGlobal=true") + subject_config = subject_config_res.json() + assert subject_config["compatibilityLevel"] == compatibility_mode.value + subject_config_res = await client.get(f"config/{subject}?defaultToGlobal=true") + subject_config = subject_config_res.json() + assert subject_config["compatibilityLevel"] == compatibility_mode.value + + +async def not_schemas_are_compatible( + newer: Draft7Validator, + older: Draft7Validator, + client: Client, + compatibility_mode: CompatibilityModes, +) -> None: + subject = new_random_name("subject") + + # sanity check + subject_res = await client.get(f"subjects/{subject}/versions") + assert subject_res.status_code == 404, "random subject should no exist {subject}" + subject_res = await client.get(f"subjects/{subject}_dependency/versions") + assert subject_res.status_code == 404, "random subject should no exist {subject}" + + template_schema = { + "$id": "https://example.com/dependency.schema.json", + "title": "Dependency", + } + older_schema = dict(template_schema) + older_schema.update(older.schema) + older_dependency_res = await client.post( + f"subjects/{subject}_dependency/versions", + json={ + "schema": json.dumps(older_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + }, + ) + assert older_dependency_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + assert "id" in older_dependency_res.json(), await debugging_details(newer, older, client, f"{subject}_dependency") + + main_schema = { + "$id": "https://example.com/main.schema.json", + "title": "Main", + "type": "object", + "properties": { + "x1": {"type": "string"}, + "x2": {"type": "string"}, + "x3": {"$ref": "https://example.com/dependency.schema.json"}, + }, + "required": ["x1", "x2", "x3"], + } + + main_res = await client.post( + f"subjects/{subject}/versions", + json={ + "schema": json.dumps(main_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + "references": [{"name": "dependency.schema.json", "subject": f"{subject}_dependency", "version": 1}], + }, + ) + + assert main_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + assert "id" in main_res.json(), await debugging_details(newer, older, client, f"{subject}_dependency") + + # enforce the target compatibility mode. not using the global setting + # because that interfere with parallel runs. + subject_config_res = await client.put(f"config/{subject}", json={"compatibility": compatibility_mode.value}) + assert subject_config_res.status_code == 200 + subject_config_res = await client.put(f"config/{subject}_dependency", json={"compatibility": compatibility_mode.value}) + assert subject_config_res.status_code == 200 + + newer_schema = dict(template_schema) + newer_schema.update(newer.schema) + newer_res = await client.post( + f"subjects/{subject}_dependency/versions", + json={ + "schema": json.dumps(newer_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + }, + ) + assert newer_res.status_code != 200, await debugging_details(newer, older, client, f"{subject}_dependency") + + await sainty_check(client, subject, compatibility_mode) + + +async def schemas_are_compatible( + client: Client, + newer: Draft7Validator, + older: Draft7Validator, + compatibility_mode: CompatibilityModes, +) -> None: + subject = new_random_name("subject") + + # sanity check + subject_res = await client.get(f"subjects/{subject}/versions") + assert subject_res.status_code == 404, "random subject should no exist {subject}" + subject_res = await client.get(f"subjects/{subject}_dependency/versions") + assert subject_res.status_code == 404, "random subject should no exist {subject}" + + template_schema = { + "$id": "https://example.com/dependency.schema.json", + "title": "Dependency", + } + older_schema = dict(template_schema) + older_schema.update(older.schema) + older_dependency_res = await client.post( + f"subjects/{subject}_dependency/versions", + json={ + "schema": json.dumps(older_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + }, + ) + assert older_dependency_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + assert "id" in older_dependency_res.json(), await debugging_details(newer, older, client, f"{subject}_dependency") + + main_schema = { + "$id": "https://example.com/main.schema.json", + "title": "Main", + "type": "object", + "properties": { + "x1": {"type": "string"}, + "x2": {"type": "string"}, + "x3": {"$ref": "https://example.com/dependency.schema.json"}, + }, + "required": ["x1", "x2", "x3"], + } + + main_res = await client.post( + f"subjects/{subject}/versions", + json={ + "schema": json.dumps(main_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + "references": [{"name": "dependency.schema.json", "subject": f"{subject}_dependency", "version": 1}], + }, + ) + + assert main_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + assert "id" in main_res.json(), await debugging_details(newer, older, client, f"{subject}_dependency") + + # enforce the target compatibility mode. not using the global setting + # because that interfere with parallel runs. + subject_config_res = await client.put(f"config/{subject}", json={"compatibility": compatibility_mode.value}) + assert subject_config_res.status_code == 200 + subject_config_res = await client.put(f"config/{subject}_dependency", json={"compatibility": compatibility_mode.value}) + assert subject_config_res.status_code == 200 + + newer_schema = dict(template_schema) + newer_schema.update(newer.schema) + newer_res = await client.post( + f"subjects/{subject}_dependency/versions", + json={ + "schema": json.dumps(newer_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + }, + ) + assert newer_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + # Because the IDs are global, and the same schema is used in multiple + # tests, their order is unknown. + assert older_dependency_res.json()["id"] != newer_res.json()["id"], await debugging_details( + newer, older, client, f"{subject}_dependency" + ) + + await sainty_check(client, subject, compatibility_mode) + + +async def schemas_are_backward_compatible( + reader: Draft7Validator, + writer: Draft7Validator, + client: Client, +) -> None: + await schemas_are_compatible( + # For backwards compatibility the newer schema is the reader + newer=reader, + older=writer, + client=client, + compatibility_mode=CompatibilityModes.BACKWARD, + ) + + +async def not_schemas_are_backward_compatible( + reader: Draft7Validator, + writer: Draft7Validator, + client: Client, +) -> None: + await not_schemas_are_compatible( + # For backwards compatibility the newer schema is the reader + newer=reader, + older=writer, + client=client, + compatibility_mode=CompatibilityModes.BACKWARD, + ) + + +async def test_schemaregistry_schemaregistry_extra_optional_field_with_open_model_is_compatible( + registry_async_client: Client, +) -> None: + # - the newer is an open model, the extra field produced by the older is + # automatically accepted + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=EMPTY_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - the older is a closed model, so the field `b` was never produced, which + # means that the older never produced an invalid value. + # - the newer's `b` field is optional, so the absenced of the field is not + # a problem, and `a` is ignored because of the open model + await schemas_are_backward_compatible( + reader=B_INT_OPEN_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - if the model is closed, then `a` must also be accepted + await schemas_are_backward_compatible( + reader=A_INT_B_INT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # Examples a bit more complex + await schemas_are_backward_compatible( + reader=A_DINT_B_NUM_C_DINT_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_C_DINT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_NUM_C_INT_OPEN_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schemaregistry_extra_field_with_closed_model_is_incompatible( + registry_async_client: Client, +) -> None: + await not_schemas_are_backward_compatible( + reader=NOT_OF_TRUE_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=NOT_OF_EMPTY_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=B_INT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=B_NUM_C_INT_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=B_NUM_C_INT_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_C_DINT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schemaregistry_missing_required_field_is_incompatible(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=A_INT_B_INT_REQUIRED_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=A_INT_B_DINT_REQUIRED_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_giving_a_default_value_for_a_non_required_field_is_compatible( + registry_async_client: Client, +) -> None: + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=A_DINT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=EMPTY_SCHEMA, + writer=A_DINT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_DINT_OPEN_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_INT_B_DINT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_DINT_B_INT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_DINT_B_DINT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_DINT_B_DINT_OBJECT_SCHEMA, + writer=EMPTY_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_from_closed_to_open_is_incompatible(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=B_NUM_C_INT_OBJECT_SCHEMA, + writer=B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_union_with_incompatible_elements(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=ONEOF_ARRAY_B_NUM_C_INT_SCHEMA, + writer=ONEOF_ARRAY_A_DINT_B_NUM_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_union_with_compatible_elements(registry_async_client: Client) -> None: + await schemas_are_backward_compatible( + reader=ONEOF_ARRAY_B_NUM_C_DINT_OPEN_SCHEMA, + writer=ONEOF_ARRAY_A_DINT_B_NUM_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_array_and_tuples_are_incompatible(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=TUPLE_OF_INT_OPEN_SCHEMA, + writer=ARRAY_OF_INT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ARRAY_OF_INT_SCHEMA, + writer=TUPLE_OF_INT_OPEN_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schema_compatibility_successes(registry_async_client: Client) -> None: + # allowing a broader set of values is compatible + await schemas_are_backward_compatible( + reader=NUMBER_SCHEMA, + writer=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ARRAY_OF_NUMBER_SCHEMA, + writer=ARRAY_OF_INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=TUPLE_OF_INT_OPEN_SCHEMA, + writer=TUPLE_OF_INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=TUPLE_OF_INT_WITH_ADDITIONAL_INT_SCHEMA, + writer=TUPLE_OF_INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ENUM_ABC_SCHEMA, + writer=ENUM_AB_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ONEOF_STRING_INT_SCHEMA, + writer=ONEOF_STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ONEOF_STRING_INT_SCHEMA, + writer=STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_INT_OPEN_OBJECT_SCHEMA, + writer=A_INT_B_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # requiring less values is compatible + await schemas_are_backward_compatible( + reader=TUPLE_OF_INT_OPEN_SCHEMA, + writer=TUPLE_OF_INT_INT_OPEN_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=TUPLE_OF_INT_OPEN_SCHEMA, + writer=TUPLE_OF_INT_INT_SCHEMA, + client=registry_async_client, + ) + + # equivalences + await schemas_are_backward_compatible( + reader=ONEOF_STRING_SCHEMA, + writer=STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=STRING_SCHEMA, + writer=ONEOF_STRING_SCHEMA, + client=registry_async_client, + ) + + # new non-required fields is compatible + await schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=EMPTY_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_INT_B_INT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_type_narrowing_incompabilities(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=INT_SCHEMA, + writer=NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ARRAY_OF_INT_SCHEMA, + writer=ARRAY_OF_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ENUM_AB_SCHEMA, + writer=ENUM_ABC_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ENUM_BC_SCHEMA, + writer=ENUM_ABC_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ONEOF_INT_SCHEMA, + writer=ONEOF_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ONEOF_STRING_SCHEMA, + writer=ONEOF_STRING_INT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=INT_SCHEMA, + writer=ONEOF_STRING_INT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_type_mismatch_incompabilities(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=BOOLEAN_SCHEMA, + writer=INT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=INT_SCHEMA, + writer=BOOLEAN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=STRING_SCHEMA, + writer=BOOLEAN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=STRING_SCHEMA, + writer=INT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ARRAY_OF_INT_SCHEMA, + writer=ARRAY_OF_STRING_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=TUPLE_OF_INT_INT_SCHEMA, + writer=TUPLE_OF_INT_OPEN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=TUPLE_OF_INT_INT_OPEN_SCHEMA, + writer=TUPLE_OF_INT_OPEN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=INT_SCHEMA, + writer=ENUM_AB_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ENUM_AB_SCHEMA, + writer=INT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schema_restrict_attributes_is_incompatible(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + writer=STRING_SCHEMA, + reader=MAX_LENGTH_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAX_LENGTH_SCHEMA, + reader=MAX_LENGTH_DECREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=STRING_SCHEMA, + reader=MIN_LENGTH_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MIN_LENGTH_SCHEMA, + reader=MIN_LENGTH_INCREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=STRING_SCHEMA, + reader=MIN_PATTERN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MIN_PATTERN_SCHEMA, + reader=MIN_PATTERN_STRICT_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=MAXIMUM_INTEGER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=NUMBER_SCHEMA, + reader=MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAXIMUM_NUMBER_SCHEMA, + reader=MAXIMUM_DECREASED_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAXIMUM_INTEGER_SCHEMA, + reader=MAXIMUM_DECREASED_INTEGER_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=NUMBER_SCHEMA, + reader=MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MINIMUM_NUMBER_SCHEMA, + reader=MINIMUM_INCREASED_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MINIMUM_INTEGER_SCHEMA, + reader=MINIMUM_INCREASED_INTEGER_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=NUMBER_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_DECREASED_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_DECREASED_INTEGER_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=NUMBER_SCHEMA, + reader=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + reader=EXCLUSIVE_MINIMUM_INCREASED_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_INTEGER_SCHEMA, + reader=EXCLUSIVE_MINIMUM_INCREASED_INTEGER_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=OBJECT_SCHEMA, + reader=MAX_PROPERTIES_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAX_PROPERTIES_SCHEMA, + reader=MAX_PROPERTIES_DECREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=OBJECT_SCHEMA, + reader=MIN_PROPERTIES_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MIN_PROPERTIES_SCHEMA, + reader=MIN_PROPERTIES_INCREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=ARRAY_SCHEMA, + reader=MAX_ITEMS_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAX_ITEMS_SCHEMA, + reader=MAX_ITEMS_DECREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=ARRAY_SCHEMA, + reader=MIN_ITEMS_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MIN_ITEMS_SCHEMA, + reader=MIN_ITEMS_INCREASED_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schema_broadenning_attributes_is_compatible(registry_async_client: Client) -> None: + await schemas_are_backward_compatible( + writer=MAX_LENGTH_SCHEMA, + reader=STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAX_LENGTH_DECREASED_SCHEMA, + reader=MAX_LENGTH_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MIN_LENGTH_SCHEMA, + reader=STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MIN_LENGTH_INCREASED_SCHEMA, + reader=MIN_LENGTH_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MIN_PATTERN_SCHEMA, + reader=STRING_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MAXIMUM_INTEGER_SCHEMA, + reader=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAXIMUM_NUMBER_SCHEMA, + reader=NUMBER_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAXIMUM_DECREASED_NUMBER_SCHEMA, + reader=MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MINIMUM_INTEGER_SCHEMA, + reader=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MINIMUM_NUMBER_SCHEMA, + reader=NUMBER_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MINIMUM_INCREASED_NUMBER_SCHEMA, + reader=MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_INTEGER_SCHEMA, + reader=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + reader=NUMBER_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_DECREASED_NUMBER_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + reader=NUMBER_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_INTEGER_SCHEMA, + reader=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_INCREASED_NUMBER_SCHEMA, + reader=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MAX_PROPERTIES_SCHEMA, + reader=OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAX_PROPERTIES_DECREASED_SCHEMA, + reader=MAX_PROPERTIES_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MIN_PROPERTIES_SCHEMA, + reader=OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MIN_PROPERTIES_INCREASED_SCHEMA, + reader=MIN_PROPERTIES_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MAX_ITEMS_SCHEMA, + reader=ARRAY_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAX_ITEMS_DECREASED_SCHEMA, + reader=MAX_ITEMS_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MIN_ITEMS_SCHEMA, + reader=ARRAY_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MIN_ITEMS_INCREASED_SCHEMA, + reader=MIN_ITEMS_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_pattern_properties(registry_async_client: Client): + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + # In backward compatibility mode it is allowed to delete fields + await schemas_are_backward_compatible( + reader=A_OBJECT_SCHEMA, + writer=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + # In backward compatibility mode it is allowed to add optional fields + await schemas_are_backward_compatible( + reader=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + writer=A_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - older accept any value for `a` + # - newer requires it to be an `int`, therefore the other values became + # invalid + await not_schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - older has property `b` + # - newer only accepts properties with match regex `a*` + await not_schemas_are_backward_compatible( + reader=B_INT_OBJECT_SCHEMA, + writer=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_object_properties(registry_async_client: Client): + await not_schemas_are_backward_compatible( + reader=A_OBJECT_SCHEMA, + writer=OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=A_OBJECT_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=OBJECT_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + reader=B_INT_OBJECT_SCHEMA, + writer=OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_property_names(registry_async_client: Client): + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=A_OBJECT_SCHEMA, + writer=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + writer=A_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - older accept any value for `a` + # - newer requires it to be an `int`, therefore the other values became + # invalid + await not_schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - older has property `b` + # - newer only accepts properties with match regex `a*` + await schemas_are_backward_compatible( + reader=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + writer=B_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_type_with_list(registry_async_client: Client): + # "type": [] is treated as a shortcut for anyOf + await schemas_are_backward_compatible( + reader=STRING_SCHEMA, + writer=TYPES_STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=TYPES_STRING_INT_SCHEMA, + writer=TYPES_STRING_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_ref(registry_async_client: Client): + await schemas_are_backward_compatible( + reader=ARRAY_OF_POSITIVE_INTEGER, + writer=ARRAY_OF_POSITIVE_INTEGER_THROUGH_REF, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ARRAY_OF_POSITIVE_INTEGER_THROUGH_REF, + writer=ARRAY_OF_POSITIVE_INTEGER, + client=registry_async_client, + ) diff --git a/tests/integration/test_json_references.py b/tests/integration/test_json_references.py new file mode 100644 index 000000000..69084729c --- /dev/null +++ b/tests/integration/test_json_references.py @@ -0,0 +1,229 @@ +""" +karapace - json schema (with references) tests + +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" + +from karapace.client import Client, Result +from tests.utils import create_subject_name_factory + +import json + +baseurl = "http://localhost:8081" + +# country.schema.json +SCHEMA_COUNTRY = { + "$id": "https://example.com/country.schema.json", + "title": "Country", + "type": "object", + "description": "A country of registration", + "properties": {"name": {"type": "string"}, "code": {"type": "string"}}, + "required": ["name", "code"], +} + +# address.schema.json +SCHEMA_ADDRESS = { + "$id": "https://example.com/address.schema.json", + "title": "Address", + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + "postalCode": {"type": "string"}, + "country": {"$ref": "https://example.com/country.schema.json"}, + }, + "required": ["street", "city", "postalCode", "country"], +} + +# job.schema.json +SCHEMA_JOB = { + "$id": "https://example.com/job.schema.json", + "title": "Job", + "type": "object", + "properties": {"title": {"type": "string"}, "salary": {"type": "number"}}, + "required": ["title", "salary"], +} + +# person.schema.json +SCHEMA_PERSON = { + "$id": "https://example.com/person.schema.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": {"$ref": "https://example.com/address.schema.json"}, + "job": {"$ref": "https://example.com/job.schema.json"}, + }, + "required": ["name", "age", "address", "job"], +} + +SCHEMA_PERSON_AGE_INT_LONG = { + "$id": "https://example.com/person.schema.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": {"$ref": "https://example.com/address.schema.json"}, + "job": {"$ref": "https://example.com/job.schema.json"}, + }, + "required": ["name", "age", "address", "job"], +} + +SCHEMA_PERSON_AGE_LONG_STRING = { + "$id": "https://example.com/person.schema.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "string"}, + "address": {"$ref": "https://example.com/address.schema.json"}, + "job": {"$ref": "https://example.com/job.schema.json"}, + }, + "required": ["name", "age", "address", "job"], +} + +SCHEMA_ADDRESS_INCOMPATIBLE = { + "$id": "https://example.com/address2.schema.json", + "title": "ChangedAddress", + "type": "object", + "properties": { + "street2": {"type": "string"}, + "city": {"type": "string"}, + "postalCode": {"type": "string"}, + "country": {"$ref": "https://example.com/country.schema.json"}, + }, + "required": ["street", "city", "postalCode", "country"], +} + + +def address_references(subject_prefix: str) -> list: + return [{"name": "country.schema.json", "subject": f"{subject_prefix}country", "version": 1}] + + +def person_references(subject_prefix: str) -> list: + return [ + {"name": "address.schema.json", "subject": f"{subject_prefix}address", "version": 1}, + {"name": "job.schema.json", "subject": f"{subject_prefix}job", "version": 1}, + ] + + +def stored_person_subject(subject_prefix: str, subject_id: int) -> dict: + return { + "id": subject_id, + "references": [ + {"name": "address.schema.json", "subject": f"{subject_prefix}address", "version": 1}, + {"name": "job.schema.json", "subject": f"{subject_prefix}job", "version": 1}, + ], + "schema": SCHEMA_PERSON, + "schemaType": "JSON", + "subject": f"{subject_prefix}person", + "version": 1, + } + + +async def basic_json_references_fill_test(registry_async_client: Client, subject_prefix: str) -> Result: + res = await registry_async_client.post( + f"subjects/{subject_prefix}country/versions", json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_COUNTRY)} + ) + assert res.status_code == 200 + assert "id" in res.json() + + res = await registry_async_client.post( + f"subjects/{subject_prefix}address/versions", + json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_ADDRESS), "references": address_references(subject_prefix)}, + ) + assert res.status_code == 200 + assert "id" in res.json() + address_id = res.json()["id"] + + # Check if the schema has now been registered under the subject + + res = await registry_async_client.post( + f"subjects/{subject_prefix}address", + json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_ADDRESS), "references": address_references(subject_prefix)}, + ) + assert res.status_code == 200 + assert "subject" in res.json() + assert "id" in res.json() + assert address_id == res.json()["id"] + assert "version" in res.json() + assert "schema" in res.json() + + res = await registry_async_client.post( + f"subjects/{subject_prefix}job/versions", json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_JOB)} + ) + assert res.status_code == 200 + assert "id" in res.json() + res = await registry_async_client.post( + f"subjects/{subject_prefix}person/versions", + json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_PERSON), "references": person_references(subject_prefix)}, + ) + assert res.status_code == 200 + assert "id" in res.json() + return res + + +async def test_basic_json_references(registry_async_client: Client) -> None: + subject_prefix = create_subject_name_factory("basic-json-references-")() + res = await basic_json_references_fill_test(registry_async_client, subject_prefix) + person_id = res.json()["id"] + res = await registry_async_client.get(f"subjects/{subject_prefix}country/versions/latest") + assert res.status_code == 200 + res = await registry_async_client.get(f"subjects/{subject_prefix}person/versions/latest") + assert res.status_code == 200 + r = res.json() + r["schema"] = json.loads(r["schema"]) + assert r == stored_person_subject(subject_prefix, person_id) + + +async def test_json_references_compatibility(registry_async_client: Client) -> None: + subject_prefix = create_subject_name_factory("json-references-compatibility-")() + await basic_json_references_fill_test(registry_async_client, subject_prefix) + + res = await registry_async_client.post( + f"compatibility/subjects/{subject_prefix}person/versions/latest", + json={ + "schemaType": "JSON", + "schema": json.dumps(SCHEMA_PERSON_AGE_INT_LONG), + "references": person_references(subject_prefix), + }, + ) + assert res.status_code == 200 + assert res.json() == {"is_compatible": True} + res = await registry_async_client.post( + f"compatibility/subjects/{subject_prefix}person/versions/latest", + json={ + "schemaType": "JSON", + "schema": json.dumps(SCHEMA_PERSON_AGE_LONG_STRING), + "references": person_references(subject_prefix), + }, + ) + assert res.status_code == 200 + assert res.json() == { + "is_compatible": False, + "messages": ["type Instance.STRING is not compatible with type Instance.INTEGER"], + } + + +async def test_json_incompatible_name_references(registry_async_client: Client) -> None: + subject_prefix = create_subject_name_factory("json-references-incompatible-name-")() + await basic_json_references_fill_test(registry_async_client, subject_prefix) + res = await registry_async_client.post( + f"subjects/{subject_prefix}address/versions", + json={ + "schemaType": "JSON", + "schema": json.dumps(SCHEMA_ADDRESS_INCOMPATIBLE), + "references": address_references(subject_prefix), + }, + ) + assert res.status_code == 409 + msg = ( + "Incompatible schema, compatibility_mode=BACKWARD. Incompatibilities: Restricting acceptable values of " + "properties is an incompatible change. The following properties street2 accepted any value because of the " + "lack of validation (the object schema had neither patternProperties nor additionalProperties), " + "now these values are restricted." + ) + assert res.json()["message"] == msg diff --git a/tests/unit/test_json_resolver.py b/tests/unit/test_json_resolver.py new file mode 100644 index 000000000..86048dc35 --- /dev/null +++ b/tests/unit/test_json_resolver.py @@ -0,0 +1,43 @@ +""" +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" + +from jsonschema import RefResolver +from karapace.dependency import Dependency +from karapace.schema_models import json_resolver, parse_jsonschema_definition, ValidatedTypedSchema +from karapace.schema_type import SchemaType +from karapace.typing import Subject, Version + + +def test_json_registry_no_dependencies(): + """Test case when there are no dependencies.""" + schema_str = '{"$id": "http://example.com/schema.json"}' + result = json_resolver(schema_str) + assert result is None + + +def test_json_registry_with_single_dependency(): + """Test json_registry with a single dependency.""" + schema_str = '{"$id": "http://example.com/schema.json"}' + dependency_schema = '{"$id": "http://example.com/dependency-schema.json"}' + + # Using the Dependency class from Karapace + dependencies = { + "dep1": Dependency( + name="dep1", + subject=Subject("subj"), + version=Version(1), + target_schema=ValidatedTypedSchema( + schema_type=SchemaType.JSONSCHEMA, + schema_str=dependency_schema, + schema=parse_jsonschema_definition(dependency_schema), + ), + ) + } + + result = json_resolver(schema_str, dependencies) + assert isinstance(result, RefResolver) + assert result.store + assert result.store.get("http://example.com/schema.json") + assert result.store.get("http://example.com/dependency-schema.json")