diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d3ae1c40f..8b151f124 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] + python-version: [ '3.9', '3.10', '3.11', '3.12' ] env: PYTEST_ADDOPTS: >- --log-dir=/tmp/ci-logs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a975b52ec..1161ba0b7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: rev: v3.4.0 hooks: - id: pyupgrade - args: [ --py38-plus ] + args: [ --py39-plus ] - repo: https://github.com/pycqa/autoflake rev: v2.1.1 diff --git a/GNUmakefile b/GNUmakefile index 7f9c90191..0749b7613 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -3,7 +3,7 @@ SHELL := /usr/bin/env bash VENV_DIR ?= $(CURDIR)/venv PIP ?= pip3 --disable-pip-version-check --no-input --require-virtualenv PYTHON ?= python3 -PYTHON_VERSION ?= 3.8 +PYTHON_VERSION ?= 3.9 define PIN_VERSIONS_COMMAND pip install pip-tools && \ diff --git a/README.rst b/README.rst index d1bcbd28f..0cc7489e4 100644 --- a/README.rst +++ b/README.rst @@ -149,6 +149,10 @@ Test the compatibility of a schema with the latest schema under subject "test-ke http://localhost:8081/compatibility/subjects/test-key/versions/latest {"is_compatible":true} +NOTE: if the subject's compatibility mode is transitive (BACKWARD_TRANSITIVE, FORWARD_TRANSITIVE or FULL_TRANSITIVE) then the +compatibility is checked not only against the latest schema, but also against all previous schemas, as it would be done +when trying to register the new schema through the `subjects//versions` endpoint. + Get current global backwards compatibility setting value:: $ curl -X GET http://localhost:8081/config @@ -478,6 +482,15 @@ Keys to take special care are the ones needed to configure Kafka and advertised_ * - ``use_protobuf_formatter`` - ``false`` - If protobuf formatter should be used on protobuf schemas in order to normalize schemas. The formatter is used on top and independent of regular normalization and schemas will be persisted in a formatted state. + * - ``log_handler`` + - ``stdout`` + - Select the log handler. Default is standard output. Alternative log handler is ``systemd``. + * - ``log_level`` + - ``DEBUG`` + - Logging level. Default level is debug. + * - ``log_format`` + - ``%(name)-20s\t%(threadName)s\t%(levelname)-8s\t%(message)s`` + - Log format Authentication and authorization of Karapace Schema Registry REST API diff --git a/karapace.config.json b/karapace.config.json index 55303ff4d..52a75bef9 100644 --- a/karapace.config.json +++ b/karapace.config.json @@ -9,6 +9,7 @@ "group_id": "schema-registry", "host": "127.0.0.1", "log_level": "DEBUG", + "log_handler": "stdout", "port": 8081, "server_tls_certfile": null, "server_tls_keyfile": null, diff --git a/mypy.ini b/mypy.ini index 15ab9042f..c4ef8efd1 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,6 @@ [mypy] mypy_path = $MYPY_CONFIG_FILE_DIR/stubs -python_version = 3.8 +python_version = 3.9 packages = karapace show_error_codes = True pretty = True @@ -85,3 +85,6 @@ ignore_missing_imports = True [mypy-networkx.*] ignore_missing_imports = True + +[mypy-systemd.*] +ignore_missing_imports = True diff --git a/performance-test/schema-registry-schema-post.py b/performance-test/schema-registry-schema-post.py index 491bcbcc7..b3fd795ff 100644 --- a/performance-test/schema-registry-schema-post.py +++ b/performance-test/schema-registry-schema-post.py @@ -5,7 +5,6 @@ from dataclasses import dataclass, field from locust import FastHttpUser, task from locust.contrib.fasthttp import ResponseContextManager -from typing import Dict import json import random @@ -17,7 +16,7 @@ @dataclass class TestData: count: int = 0 - schemas: Dict[uuid.UUID, SchemaId] = field(default_factory=dict) + schemas: dict[uuid.UUID, SchemaId] = field(default_factory=dict) SUBJECTS = ["test-subject-1", "test-subject-2"] diff --git a/pyproject.toml b/pyproject.toml index 089668037..9c505b176 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "karapace" -requires-python = ">= 3.8" +requires-python = ">= 3.9" dynamic = ["version"] readme = "README.rst" license = {file = "LICENSE"} @@ -49,7 +49,6 @@ classifiers=[ "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -70,6 +69,7 @@ Issues = "https://github.com/Aiven-Open/karapace/issues" [project.optional-dependencies] sentry-sdk = ["sentry-sdk>=1.6.0"] +systemd-logging = ["systemd-python==235"] ujson = ["ujson"] dev = [ # Developer QoL @@ -107,5 +107,5 @@ include-package-data = true version_file = "src/karapace/version.py" [tool.black] -target-version = ["py38"] +target-version = ["py39"] line-length = 125 diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 849f7c38f..510bfeef1 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # make pin-requirements @@ -93,10 +93,6 @@ idna==3.8 # yarl importlib-metadata==8.4.0 # via flask -importlib-resources==6.4.4 - # via - # jsonschema - # jsonschema-specifications iniconfig==2.0.0 # via pytest isodate==0.6.1 @@ -135,8 +131,6 @@ packaging==24.1 # pytest pdbpp==0.10.3 # via karapace (/karapace/pyproject.toml) -pkgutil-resolve-name==1.3.10 - # via jsonschema pluggy==1.5.0 # via pytest prometheus-client==0.20.0 @@ -215,7 +209,6 @@ typing-extensions==4.12.2 # via # anyio # karapace (/karapace/pyproject.toml) - # rich ujson==5.10.0 # via karapace (/karapace/pyproject.toml) urllib3==2.2.2 @@ -238,9 +231,7 @@ yarl==1.12.1 # aiohttp # karapace (/karapace/pyproject.toml) zipp==3.20.1 - # via - # importlib-metadata - # importlib-resources + # via importlib-metadata zope-event==5.0 # via gevent zope-interface==7.0.2 diff --git a/requirements/requirements-typing.txt b/requirements/requirements-typing.txt index 503061bea..2667aea8d 100644 --- a/requirements/requirements-typing.txt +++ b/requirements/requirements-typing.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # make pin-requirements @@ -45,10 +45,6 @@ idna==3.10 # via # anyio # yarl -importlib-resources==6.4.5 - # via - # jsonschema - # jsonschema-specifications isodate==0.6.1 # via karapace (/karapace/pyproject.toml) jsonschema==4.23.0 @@ -73,8 +69,6 @@ networkx==3.1 # via karapace (/karapace/pyproject.toml) packaging==24.1 # via aiokafka -pkgutil-resolve-name==1.3.10 - # via jsonschema prometheus-client==0.20.0 # via karapace (/karapace/pyproject.toml) protobuf==3.20.3 @@ -122,7 +116,6 @@ typing-extensions==4.12.2 # karapace (/karapace/pyproject.toml) # multidict # mypy - # rich ujson==5.10.0 # via karapace (/karapace/pyproject.toml) urllib3==2.2.2 @@ -135,7 +128,5 @@ yarl==1.12.1 # via # aiohttp # karapace (/karapace/pyproject.toml) -zipp==3.20.2 - # via importlib-resources zstandard==0.23.0 # via karapace (/karapace/pyproject.toml) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index d7803e46e..15b787dcf 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # make pin-requirements @@ -43,10 +43,6 @@ idna==3.8 # via # anyio # yarl -importlib-resources==6.4.4 - # via - # jsonschema - # jsonschema-specifications isodate==0.6.1 # via karapace (/karapace/pyproject.toml) jsonschema==4.23.0 @@ -67,8 +63,6 @@ networkx==3.1 # via karapace (/karapace/pyproject.toml) packaging==24.1 # via aiokafka -pkgutil-resolve-name==1.3.10 - # via jsonschema prometheus-client==0.20.0 # via karapace (/karapace/pyproject.toml) protobuf==3.20.3 @@ -103,7 +97,6 @@ typing-extensions==4.12.2 # via # anyio # karapace (/karapace/pyproject.toml) - # rich ujson==5.10.0 # via karapace (/karapace/pyproject.toml) watchfiles==0.23.0 @@ -114,7 +107,5 @@ yarl==1.12.1 # via # aiohttp # karapace (/karapace/pyproject.toml) -zipp==3.20.1 - # via importlib-resources zstandard==0.23.0 # via karapace (/karapace/pyproject.toml) diff --git a/runtime.txt b/runtime.txt index 9e9414fda..57f558859 100644 --- a/runtime.txt +++ b/runtime.txt @@ -1 +1 @@ -python-3.8.16 +python-3.9.20 diff --git a/src/karapace/anonymize_schemas/anonymize_avro.py b/src/karapace/anonymize_schemas/anonymize_avro.py index 56c0b8302..69906c94d 100644 --- a/src/karapace/anonymize_schemas/anonymize_avro.py +++ b/src/karapace/anonymize_schemas/anonymize_avro.py @@ -4,7 +4,7 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -from typing import Any, Dict, List, Union +from typing import Any, Union from typing_extensions import TypeAlias import hashlib @@ -95,7 +95,7 @@ def anonymize_element(m: re.Match) -> str: return NAME_ANONYMIZABLE_PATTERN.sub(anonymize_element, name) -Schema: TypeAlias = Union[str, Dict[str, Any], List[Any]] +Schema: TypeAlias = Union[str, dict[str, Any], list[Any]] def anonymize(input_schema: Schema) -> Schema: @@ -105,10 +105,10 @@ def anonymize(input_schema: Schema) -> Schema: if input_schema in ALL_TYPES: return input_schema return anonymize_name(input_schema) - elif isinstance(input_schema, List): + elif isinstance(input_schema, list): return [anonymize(value) for value in input_schema] - elif isinstance(input_schema, Dict): - output_schema: Dict[str, Any] = {} + elif isinstance(input_schema, dict): + output_schema: dict[str, Any] = {} for key, value in input_schema.items(): if key in KEYWORDS: output_schema[key] = anonymize(value) diff --git a/src/karapace/avro_dataclasses/introspect.py b/src/karapace/avro_dataclasses/introspect.py index 64b2e5856..7ba38ab00 100644 --- a/src/karapace/avro_dataclasses/introspect.py +++ b/src/karapace/avro_dataclasses/introspect.py @@ -6,15 +6,11 @@ from __future__ import annotations from .schema import AvroType, EnumType, FieldSchema, MapType, RecordSchema -from collections.abc import Mapping +from collections.abc import Mapping, Sequence from dataclasses import Field, fields, is_dataclass, MISSING from enum import Enum from functools import lru_cache -from typing import Final, Sequence, TYPE_CHECKING, TypeVar, Union - -# Note: It's important get_args and get_origin are imported from typing_extensions -# until support for Python 3.8 is dropped. -from typing_extensions import get_args, get_origin +from typing import Final, get_args, get_origin, TYPE_CHECKING, TypeVar, Union import datetime import uuid @@ -46,10 +42,17 @@ def _field_type_array(field: Field, origin: type, type_: object) -> AvroType: else: (inner_type,) = get_args(type_) + items: AvroType + if is_dataclass(inner_type): + assert isinstance(inner_type, type) + items = record_schema(inner_type) + else: + items = _field_type(field, inner_type) + return { "name": f"one_of_{field.name}", "type": "array", - "items": (record_schema(inner_type) if is_dataclass(inner_type) else _field_type(field, inner_type)), + "items": items, } @@ -132,7 +135,7 @@ def _field_type(field: Field, type_: object) -> AvroType: # pylint: disable=too T = TypeVar("T", str, int, bool, Enum, None) -def transform_default(type_: type[T], default: T) -> str | int | bool | None: +def transform_default(type_: type[T] | str, default: T) -> str | int | bool | None: if isinstance(default, Enum): assert isinstance(type_, type) assert issubclass(type_, Enum) diff --git a/src/karapace/avro_dataclasses/models.py b/src/karapace/avro_dataclasses/models.py index 78a64c8f4..9bcd630cf 100644 --- a/src/karapace/avro_dataclasses/models.py +++ b/src/karapace/avro_dataclasses/models.py @@ -5,10 +5,11 @@ from __future__ import annotations from .introspect import record_schema +from collections.abc import Iterable, Mapping from dataclasses import asdict, fields, is_dataclass from enum import Enum from functools import lru_cache, partial -from typing import Callable, cast, IO, Iterable, Mapping, TYPE_CHECKING, TypeVar, Union +from typing import Callable, cast, IO, TYPE_CHECKING, TypeVar, Union from typing_extensions import get_args, get_origin, Self import avro diff --git a/src/karapace/backup/api.py b/src/karapace/backup/api.py index 7b243586b..3da9a2304 100644 --- a/src/karapace/backup/api.py +++ b/src/karapace/backup/api.py @@ -22,7 +22,7 @@ from .poll_timeout import PollTimeout from .topic_configurations import ConfigSource, get_topic_configurations from aiokafka.errors import KafkaError, TopicAlreadyExistsError -from collections.abc import Sized +from collections.abc import Iterator, Mapping, Sized from concurrent.futures import Future from confluent_kafka import Message, TopicPartition from enum import Enum @@ -42,7 +42,7 @@ from pathlib import Path from rich.console import Console from tenacity import retry, retry_if_exception_type, RetryCallState, stop_after_delay, wait_fixed -from typing import Callable, Iterator, Literal, Mapping, NewType, TypeVar +from typing import Callable, Literal, NewType, TypeVar import contextlib import datetime @@ -373,13 +373,20 @@ def _handle_restore_topic( instruction: RestoreTopic, config: Config, skip_topic_creation: bool = False, + override_replication_factor: int | None = None, ) -> None: if skip_topic_creation: return + repl_factor = instruction.replication_factor + if override_replication_factor is not None: + LOG.info( + "Overriding replication factor with: %d (was: %d)", override_replication_factor, instruction.replication_factor + ) + repl_factor = override_replication_factor if not _maybe_create_topic( config=config, name=instruction.topic_name, - replication_factor=instruction.replication_factor, + replication_factor=repl_factor, topic_configs=instruction.topic_configs, ): raise BackupTopicAlreadyExists(f"Topic to restore '{instruction.topic_name}' already exists") @@ -426,6 +433,7 @@ def restore_backup( backup_location: ExistingFile, topic_name: TopicName, skip_topic_creation: bool = False, + override_replication_factor: int | None = None, ) -> None: """Restores a backup from the specified location into the configured topic. @@ -475,7 +483,7 @@ def _check_producer_exception() -> None: _handle_restore_topic_legacy(instruction, config, skip_topic_creation) producer = stack.enter_context(_producer(config, instruction.topic_name)) elif isinstance(instruction, RestoreTopic): - _handle_restore_topic(instruction, config, skip_topic_creation) + _handle_restore_topic(instruction, config, skip_topic_creation, override_replication_factor) producer = stack.enter_context(_producer(config, instruction.topic_name)) elif isinstance(instruction, ProducerSend): if producer is None: diff --git a/src/karapace/backup/backends/reader.py b/src/karapace/backup/backends/reader.py index d4caadda2..d1d32bfe8 100644 --- a/src/karapace/backup/backends/reader.py +++ b/src/karapace/backup/backends/reader.py @@ -4,10 +4,11 @@ """ from __future__ import annotations +from collections.abc import Generator, Iterator, Mapping, Sequence from karapace.dataclasses import default_dataclass from karapace.typing import JsonData, JsonObject from pathlib import Path -from typing import Callable, ClassVar, Final, Generator, IO, Iterator, Mapping, Optional, Sequence, TypeVar, Union +from typing import Callable, ClassVar, Final, IO, Optional, TypeVar, Union from typing_extensions import TypeAlias import abc diff --git a/src/karapace/backup/backends/v1.py b/src/karapace/backup/backends/v1.py index 1b9400a98..186af0c0b 100644 --- a/src/karapace/backup/backends/v1.py +++ b/src/karapace/backup/backends/v1.py @@ -4,16 +4,17 @@ """ from __future__ import annotations +from collections.abc import Generator from karapace.backup.backends.reader import BaseItemsBackupReader from karapace.utils import json_decode -from typing import Generator, IO, List +from typing import IO class SchemaBackupV1Reader(BaseItemsBackupReader): @staticmethod def items_from_file(fp: IO[str]) -> Generator[list[str], None, None]: raw_msg = fp.read() - values = json_decode(raw_msg, List[List[str]]) + values = json_decode(raw_msg, list[list[str]]) if not values: return yield from values diff --git a/src/karapace/backup/backends/v2.py b/src/karapace/backup/backends/v2.py index 7472e9b2f..a456d6e39 100644 --- a/src/karapace/backup/backends/v2.py +++ b/src/karapace/backup/backends/v2.py @@ -4,12 +4,13 @@ """ from __future__ import annotations +from collections.abc import Generator, Sequence from karapace.anonymize_schemas import anonymize_avro from karapace.backup.backends.reader import BaseItemsBackupReader from karapace.backup.backends.writer import BaseKVBackupWriter, StdOut from karapace.utils import json_decode, json_encode from pathlib import Path -from typing import Any, ClassVar, Dict, Final, Generator, IO, Sequence +from typing import Any, ClassVar, Final, IO import base64 import contextlib @@ -61,8 +62,8 @@ def serialize_record( # Check that the message has key `schema` and type is Avro schema. # The Avro schemas may have `schemaType` key, if not present the schema is Avro. - key = json_decode(key_bytes, Dict[str, str]) - value = json_decode(value_bytes, Dict[str, str]) + key = json_decode(key_bytes, dict[str, str]) + value = json_decode(value_bytes, dict[str, str]) if value and "schema" in value and value.get("schemaType", "AVRO") == "AVRO": original_schema: Any = json_decode(value["schema"]) diff --git a/src/karapace/backup/backends/v3/backend.py b/src/karapace/backup/backends/v3/backend.py index 25e08cf42..c2aca1f25 100644 --- a/src/karapace/backup/backends/v3/backend.py +++ b/src/karapace/backup/backends/v3/backend.py @@ -9,6 +9,7 @@ from .readers import read_metadata, read_records from .schema import ChecksumAlgorithm, DataFile, Header, Metadata, Record from .writers import write_metadata, write_record +from collections.abc import Generator, Iterator, Mapping, Sequence from confluent_kafka import Message from dataclasses import dataclass from karapace.backup.backends.reader import BaseBackupReader, Instruction, ProducerSend, RestoreTopic @@ -18,7 +19,7 @@ from karapace.utils import assert_never from karapace.version import __version__ from pathlib import Path -from typing import Callable, ContextManager, Final, Generator, IO, Iterator, Mapping, Sequence, TypeVar +from typing import Callable, ContextManager, Final, IO, TypeVar from typing_extensions import TypeAlias import datetime diff --git a/src/karapace/backup/backends/v3/readers.py b/src/karapace/backup/backends/v3/readers.py index afa4aba4a..74b981143 100644 --- a/src/karapace/backup/backends/v3/readers.py +++ b/src/karapace/backup/backends/v3/readers.py @@ -8,8 +8,9 @@ from .constants import V3_MARKER from .errors import InvalidChecksum, InvalidHeader, TooFewRecords, TooManyRecords, UnexpectedEndOfData from .schema import Metadata, Record +from collections.abc import Generator from karapace.avro_dataclasses.models import AvroModel -from typing import Generator, IO, TypeVar +from typing import IO, TypeVar import io import struct diff --git a/src/karapace/backup/backends/v3/schema.py b/src/karapace/backup/backends/v3/schema.py index 9105d1c97..db4cc7862 100644 --- a/src/karapace/backup/backends/v3/schema.py +++ b/src/karapace/backup/backends/v3/schema.py @@ -4,10 +4,11 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Mapping from dataclasses import field from karapace.avro_dataclasses.models import AvroModel from karapace.dataclasses import default_dataclass -from typing import Mapping, Optional, Tuple +from typing import Optional import datetime import enum @@ -56,7 +57,7 @@ class Metadata(AvroModel): partition_count: int = field(metadata={"type": "int"}) replication_factor: int = field(metadata={"type": "int"}) topic_configurations: Mapping[str, str] - data_files: Tuple[DataFile, ...] + data_files: tuple[DataFile, ...] checksum_algorithm: ChecksumAlgorithm = ChecksumAlgorithm.unknown def __post_init__(self) -> None: @@ -78,7 +79,7 @@ class Header(AvroModel): class Record(AvroModel): key: Optional[bytes] value: Optional[bytes] - headers: Tuple[Header, ...] + headers: tuple[Header, ...] offset: int = field(metadata={"type": "long"}) timestamp: int = field(metadata={"type": "long"}) # In order to reduce the impact of checksums on total file sizes, especially diff --git a/src/karapace/backup/backends/v3/schema_tool.py b/src/karapace/backup/backends/v3/schema_tool.py index 340be2477..f5843ceb2 100644 --- a/src/karapace/backup/backends/v3/schema_tool.py +++ b/src/karapace/backup/backends/v3/schema_tool.py @@ -6,10 +6,11 @@ """ from . import schema from avro.compatibility import ReaderWriterCompatibilityChecker, SchemaCompatibilityType +from collections.abc import Generator from karapace.avro_dataclasses.introspect import record_schema from karapace.avro_dataclasses.models import AvroModel from karapace.schema_models import parse_avro_schema_definition -from typing import Final, Generator, Tuple, Type +from typing import Final import argparse import json @@ -19,7 +20,7 @@ import sys -def types() -> Generator[Tuple[str, Type[AvroModel]], None, None]: +def types() -> Generator[tuple[str, type[AvroModel]], None, None]: for name, value in schema.__dict__.items(): try: if issubclass(value, AvroModel) and value != AvroModel: @@ -58,17 +59,6 @@ def relative_path(path: pathlib.Path) -> pathlib.Path: return pathlib.Path(str_path[len(cwd) + 1 :]) if str_path.startswith(cwd) else path -def target_has_source_layout(git_target: str) -> bool: - with subprocess.Popen( - ["git", "show", f"{git_target}:src"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) as cp: - if cp.returncode == 128: - return False - return True - - def check_compatibility(git_target: str) -> None: errored = False found_any = False @@ -81,13 +71,8 @@ def check_compatibility(git_target: str) -> None: subprocess.run(["git", "fetch", remote, branch], check=True, capture_output=True) - # Does the target version have source layout - source_layout = target_has_source_layout(git_target) - for file in schema_directory.glob(f"*{extension}"): relative = relative_path(file) - if not source_layout: - relative = pathlib.Path(*relative.parts[1:]) with subprocess.Popen( ["git", "show", f"{git_target}:{relative}"], stdout=subprocess.PIPE, diff --git a/src/karapace/backup/backends/writer.py b/src/karapace/backup/backends/writer.py index 7d5ddc287..927077e2b 100644 --- a/src/karapace/backup/backends/writer.py +++ b/src/karapace/backup/backends/writer.py @@ -4,10 +4,11 @@ """ from __future__ import annotations +from collections.abc import Iterator, Mapping, Sequence from confluent_kafka import Message from karapace.backup.safe_writer import bytes_writer, str_writer from pathlib import Path -from typing import ContextManager, Generic, IO, Iterator, Literal, Mapping, Sequence, TypeVar +from typing import ContextManager, Generic, IO, Literal, TypeVar from typing_extensions import TypeAlias import abc diff --git a/src/karapace/backup/cli.py b/src/karapace/backup/cli.py index 8e4b108be..7125b1e04 100644 --- a/src/karapace/backup/cli.py +++ b/src/karapace/backup/cli.py @@ -10,9 +10,9 @@ from .errors import BackupDataRestorationError, StaleConsumerError from .poll_timeout import PollTimeout from aiokafka.errors import BrokerResponseError +from collections.abc import Iterator from karapace.backup.api import VerifyLevel from karapace.config import Config, read_config -from typing import Iterator import argparse import contextlib @@ -76,6 +76,15 @@ def parse_args() -> argparse.Namespace: ), ) + parser_restore.add_argument( + "--override-replication-factor", + help=( + "Override the replication factor that is save in the backup. This is needed when restoring a backup from a" + "downsized cluster (like scaling down from 6 to 3 nodes). This has effect only for V3 backups." + ), + type=int, + ) + return parser.parse_args() @@ -115,6 +124,7 @@ def dispatch(args: argparse.Namespace) -> None: backup_location=api.locate_backup_file(location), topic_name=api.normalize_topic_name(args.topic, config), skip_topic_creation=args.skip_topic_creation, + override_replication_factor=args.override_replication_factor, ) except BackupDataRestorationError: traceback.print_exc() diff --git a/src/karapace/backup/safe_writer.py b/src/karapace/backup/safe_writer.py index 57970b950..d8338f5ae 100644 --- a/src/karapace/backup/safe_writer.py +++ b/src/karapace/backup/safe_writer.py @@ -4,9 +4,10 @@ """ from __future__ import annotations +from collections.abc import Generator from pathlib import Path from tempfile import mkstemp, TemporaryDirectory -from typing import Final, Generator, IO, Literal +from typing import Final, IO, Literal from typing_extensions import TypeAlias import contextlib diff --git a/src/karapace/backup/topic_configurations.py b/src/karapace/backup/topic_configurations.py index 320e2e6ee..5aaf13a52 100644 --- a/src/karapace/backup/topic_configurations.py +++ b/src/karapace/backup/topic_configurations.py @@ -4,9 +4,10 @@ """ from __future__ import annotations +from collections.abc import Container from confluent_kafka.admin import ConfigSource from karapace.kafka.admin import KafkaAdminClient -from typing import Container, Final +from typing import Final ALL_CONFIG_SOURCES: Final = ConfigSource diff --git a/src/karapace/client.py b/src/karapace/client.py index dae79b244..23a9e157a 100644 --- a/src/karapace/client.py +++ b/src/karapace/client.py @@ -5,8 +5,9 @@ See LICENSE for details """ from aiohttp import BasicAuth, ClientSession +from collections.abc import Awaitable, Mapping from karapace.typing import JsonData -from typing import Awaitable, Callable, Mapping, Optional, Union +from typing import Callable, Optional, Union from urllib.parse import urljoin import logging diff --git a/src/karapace/compatibility/__init__.py b/src/karapace/compatibility/__init__.py index e5f61e710..3984ed9f5 100644 --- a/src/karapace/compatibility/__init__.py +++ b/src/karapace/compatibility/__init__.py @@ -4,22 +4,7 @@ Copyright (c) 2019 Aiven Ltd See LICENSE for details """ -from avro.compatibility import ( - merge, - ReaderWriterCompatibilityChecker as AvroChecker, - SchemaCompatibilityResult, - SchemaCompatibilityType, - SchemaIncompatibilityType, -) -from avro.schema import Schema as AvroSchema from enum import Enum, unique -from jsonschema import Draft7Validator -from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility, incompatible_schema -from karapace.compatibility.protobuf.checks import check_protobuf_schema_compatibility -from karapace.protobuf.schema import ProtobufSchema -from karapace.schema_models import ParsedTypedSchema, ValidatedTypedSchema -from karapace.schema_reader import SchemaType -from karapace.utils import assert_never import logging @@ -54,121 +39,3 @@ def is_transitive(self) -> bool: "FULL_TRANSITIVE", } return self.value in TRANSITIVE_MODES - - -def check_avro_compatibility(reader_schema: AvroSchema, writer_schema: AvroSchema) -> SchemaCompatibilityResult: - return AvroChecker().get_compatibility(reader=reader_schema, writer=writer_schema) - - -def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Validator) -> SchemaCompatibilityResult: - return jsonschema_compatibility(reader, writer) - - -def check_protobuf_compatibility(reader: ProtobufSchema, writer: ProtobufSchema) -> SchemaCompatibilityResult: - return check_protobuf_schema_compatibility(reader, writer) - - -def check_compatibility( - old_schema: ParsedTypedSchema, - new_schema: ValidatedTypedSchema, - compatibility_mode: CompatibilityModes, -) -> SchemaCompatibilityResult: - """Check that `old_schema` and `new_schema` are compatible under `compatibility_mode`.""" - if compatibility_mode is CompatibilityModes.NONE: - LOG.info("Compatibility level set to NONE, no schema compatibility checks performed") - return SchemaCompatibilityResult(SchemaCompatibilityType.compatible) - - if old_schema.schema_type is not new_schema.schema_type: - return incompatible_schema( - incompat_type=SchemaIncompatibilityType.type_mismatch, - message=f"Comparing different schema types: {old_schema.schema_type} with {new_schema.schema_type}", - location=[], - ) - - if old_schema.schema_type is SchemaType.AVRO: - assert isinstance(old_schema.schema, AvroSchema) - assert isinstance(new_schema.schema, AvroSchema) - if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: - result = check_avro_compatibility( - reader_schema=new_schema.schema, - writer_schema=old_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: - result = check_avro_compatibility( - reader_schema=old_schema.schema, - writer_schema=new_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: - result = check_avro_compatibility( - reader_schema=new_schema.schema, - writer_schema=old_schema.schema, - ) - result = merge( - result, - check_avro_compatibility( - reader_schema=old_schema.schema, - writer_schema=new_schema.schema, - ), - ) - - elif old_schema.schema_type is SchemaType.JSONSCHEMA: - assert isinstance(old_schema.schema, Draft7Validator) - assert isinstance(new_schema.schema, Draft7Validator) - if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: - result = check_jsonschema_compatibility( - reader=new_schema.schema, - writer=old_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: - result = check_jsonschema_compatibility( - reader=old_schema.schema, - writer=new_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: - result = check_jsonschema_compatibility( - reader=new_schema.schema, - writer=old_schema.schema, - ) - result = merge( - result, - check_jsonschema_compatibility( - reader=old_schema.schema, - writer=new_schema.schema, - ), - ) - - elif old_schema.schema_type is SchemaType.PROTOBUF: - assert isinstance(old_schema.schema, ProtobufSchema) - assert isinstance(new_schema.schema, ProtobufSchema) - if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: - result = check_protobuf_compatibility( - reader=new_schema.schema, - writer=old_schema.schema, - ) - elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: - result = check_protobuf_compatibility( - reader=old_schema.schema, - writer=new_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: - result = check_protobuf_compatibility( - reader=new_schema.schema, - writer=old_schema.schema, - ) - result = merge( - result, - check_protobuf_compatibility( - reader=old_schema.schema, - writer=new_schema.schema, - ), - ) - - else: - assert_never(f"Unknown schema_type {old_schema.schema_type}") - - return result diff --git a/src/karapace/compatibility/jsonschema/utils.py b/src/karapace/compatibility/jsonschema/utils.py index 011b7aa74..486af0719 100644 --- a/src/karapace/compatibility/jsonschema/utils.py +++ b/src/karapace/compatibility/jsonschema/utils.py @@ -5,12 +5,12 @@ from copy import copy from jsonschema import Draft7Validator from karapace.compatibility.jsonschema.types import BooleanSchema, Instance, Keyword, Subschema -from typing import Any, List, Optional, Tuple, Type, TypeVar, Union +from typing import Any, Optional, TypeVar, Union import re T = TypeVar("T") -JSONSCHEMA_TYPES = Union[Instance, Subschema, Keyword, Type[BooleanSchema]] +JSONSCHEMA_TYPES = Union[Instance, Subschema, Keyword, type[BooleanSchema]] def normalize_schema(validator: Draft7Validator) -> Any: @@ -53,7 +53,7 @@ def normalize_schema_rec(validator: Draft7Validator, original_schema: Any) -> An return normalized -def maybe_get_subschemas_and_type(schema: Any) -> Optional[Tuple[List[Any], Subschema]]: +def maybe_get_subschemas_and_type(schema: Any) -> Optional[tuple[list[Any], Subschema]]: """If schema contains `anyOf`, `allOf`, or `oneOf`, return it. This will also normalized schemas with a list of types to a `anyOf`, e..g: diff --git a/src/karapace/compatibility/schema_compatibility.py b/src/karapace/compatibility/schema_compatibility.py new file mode 100644 index 000000000..07e059d50 --- /dev/null +++ b/src/karapace/compatibility/schema_compatibility.py @@ -0,0 +1,138 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" +from avro.compatibility import ( + merge, + ReaderWriterCompatibilityChecker as AvroChecker, + SchemaCompatibilityResult, + SchemaCompatibilityType, + SchemaIncompatibilityType, +) +from avro.schema import Schema as AvroSchema +from jsonschema import Draft7Validator +from karapace.compatibility import CompatibilityModes +from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility, incompatible_schema +from karapace.compatibility.protobuf.checks import check_protobuf_schema_compatibility +from karapace.protobuf.schema import ProtobufSchema +from karapace.schema_models import ParsedTypedSchema, ValidatedTypedSchema +from karapace.schema_type import SchemaType +from karapace.utils import assert_never + +import logging + +LOG = logging.getLogger(__name__) + + +class SchemaCompatibility: + @staticmethod + def check_compatibility( + old_schema: ParsedTypedSchema, + new_schema: ValidatedTypedSchema, + compatibility_mode: CompatibilityModes, + ) -> SchemaCompatibilityResult: + """Check that `old_schema` and `new_schema` are compatible under `compatibility_mode`.""" + + if compatibility_mode is CompatibilityModes.NONE: + LOG.info("Compatibility level set to NONE, no schema compatibility checks performed") + return SchemaCompatibilityResult(SchemaCompatibilityType.compatible) + + if old_schema.schema_type is not new_schema.schema_type: + return incompatible_schema( + incompat_type=SchemaIncompatibilityType.type_mismatch, + message=f"Comparing different schema types: {old_schema.schema_type} with {new_schema.schema_type}", + location=[], + ) + + if old_schema.schema_type is SchemaType.AVRO: + assert isinstance(old_schema.schema, AvroSchema) + assert isinstance(new_schema.schema, AvroSchema) + if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: + result = SchemaCompatibility.check_avro_compatibility( + reader_schema=new_schema.schema, + writer_schema=old_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: + result = SchemaCompatibility.check_avro_compatibility( + reader_schema=old_schema.schema, + writer_schema=new_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: + result = SchemaCompatibility.check_avro_compatibility( + reader_schema=new_schema.schema, + writer_schema=old_schema.schema, + ) + result = merge( + result, + SchemaCompatibility.check_avro_compatibility( + reader_schema=old_schema.schema, + writer_schema=new_schema.schema, + ), + ) + elif old_schema.schema_type is SchemaType.JSONSCHEMA: + assert isinstance(old_schema.schema, Draft7Validator) + assert isinstance(new_schema.schema, Draft7Validator) + if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: + result = SchemaCompatibility.check_jsonschema_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: + result = SchemaCompatibility.check_jsonschema_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: + result = SchemaCompatibility.check_jsonschema_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + result = merge( + result, + SchemaCompatibility.check_jsonschema_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ), + ) + elif old_schema.schema_type is SchemaType.PROTOBUF: + assert isinstance(old_schema.schema, ProtobufSchema) + assert isinstance(new_schema.schema, ProtobufSchema) + if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: + result = SchemaCompatibility.check_protobuf_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: + result = SchemaCompatibility.check_protobuf_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ) + + elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: + result = SchemaCompatibility.check_protobuf_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + result = merge( + result, + SchemaCompatibility.check_protobuf_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ), + ) + else: + assert_never(f"Unknown schema_type {old_schema.schema_type}") + + return result + + @staticmethod + def check_avro_compatibility(reader_schema: AvroSchema, writer_schema: AvroSchema) -> SchemaCompatibilityResult: + return AvroChecker().get_compatibility(reader=reader_schema, writer=writer_schema) + + @staticmethod + def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Validator) -> SchemaCompatibilityResult: + return jsonschema_compatibility(reader, writer) + + @staticmethod + def check_protobuf_compatibility(reader: ProtobufSchema, writer: ProtobufSchema) -> SchemaCompatibilityResult: + return check_protobuf_schema_compatibility(reader, writer) diff --git a/src/karapace/config.py b/src/karapace/config.py index 2618158a2..7f02b7712 100644 --- a/src/karapace/config.py +++ b/src/karapace/config.py @@ -6,11 +6,12 @@ """ from __future__ import annotations +from collections.abc import Mapping from karapace.constants import DEFAULT_AIOHTTP_CLIENT_MAX_SIZE, DEFAULT_PRODUCER_MAX_REQUEST, DEFAULT_SCHEMA_TOPIC from karapace.typing import ElectionStrategy, NameStrategy from karapace.utils import json_decode, json_encode, JSONDecodeError from pathlib import Path -from typing import IO, Mapping +from typing import IO from typing_extensions import NotRequired, TypedDict import logging @@ -48,6 +49,7 @@ class Config(TypedDict): registry_authfile: str | None rest_authorization: bool rest_base_uri: str | None + log_handler: str | None log_level: str log_format: str master_eligibility: bool @@ -125,6 +127,7 @@ class ConfigDefaults(Config, total=False): "registry_authfile": None, "rest_authorization": False, "rest_base_uri": None, + "log_handler": "stdout", "log_level": "DEBUG", "log_format": "%(name)-20s\t%(threadName)s\t%(levelname)-8s\t%(message)s", "master_eligibility": True, diff --git a/src/karapace/coordinator/schema_coordinator.py b/src/karapace/coordinator/schema_coordinator.py index ade69be91..151a1db26 100644 --- a/src/karapace/coordinator/schema_coordinator.py +++ b/src/karapace/coordinator/schema_coordinator.py @@ -25,11 +25,12 @@ SyncGroupRequest_v3, ) from aiokafka.util import create_future, create_task +from collections.abc import Coroutine, Sequence from karapace.dataclasses import default_dataclass from karapace.typing import JsonData from karapace.utils import json_decode, json_encode from karapace.version import __version__ -from typing import Any, Coroutine, Final, Sequence +from typing import Any, Final from typing_extensions import TypedDict import aiokafka.errors as Errors diff --git a/src/karapace/in_memory_database.py b/src/karapace/in_memory_database.py index 1192260ba..6692cae33 100644 --- a/src/karapace/in_memory_database.py +++ b/src/karapace/in_memory_database.py @@ -7,12 +7,12 @@ from __future__ import annotations from abc import ABC, abstractmethod +from collections.abc import Iterable, Sequence from dataclasses import dataclass, field from karapace.schema_models import SchemaVersion, TypedSchema, Versioner from karapace.schema_references import Reference, Referents from karapace.typing import SchemaId, Subject, Version from threading import Lock, RLock -from typing import Iterable, Sequence import logging diff --git a/src/karapace/instrumentation/prometheus.py b/src/karapace/instrumentation/prometheus.py index 4e478fdc7..1336b4ab0 100644 --- a/src/karapace/instrumentation/prometheus.py +++ b/src/karapace/instrumentation/prometheus.py @@ -9,9 +9,10 @@ from __future__ import annotations from aiohttp.web import middleware, Request, Response +from collections.abc import Awaitable from karapace.rapu import RestApp from prometheus_client import CollectorRegistry, Counter, Gauge, generate_latest, Histogram -from typing import Awaitable, Callable, Final +from typing import Callable, Final import logging import time diff --git a/src/karapace/kafka/admin.py b/src/karapace/kafka/admin.py index 5b9d9e5ad..fef52ebf5 100644 --- a/src/karapace/kafka/admin.py +++ b/src/karapace/kafka/admin.py @@ -5,7 +5,7 @@ from __future__ import annotations -from collections.abc import Iterable +from collections.abc import Container, Iterable from concurrent.futures import Future from confluent_kafka import TopicPartition from confluent_kafka.admin import ( @@ -27,7 +27,6 @@ single_futmap_result, UnknownTopicOrPartitionError, ) -from typing import Container class KafkaAdminClient(_KafkaConfigMixin, AdminClient): diff --git a/src/karapace/kafka/consumer.py b/src/karapace/kafka/consumer.py index 98e92c5f7..4bf4cde54 100644 --- a/src/karapace/kafka/consumer.py +++ b/src/karapace/kafka/consumer.py @@ -6,11 +6,12 @@ from __future__ import annotations from aiokafka.errors import IllegalStateError, KafkaTimeoutError +from collections.abc import Iterable from confluent_kafka import Consumer, Message, TopicPartition from confluent_kafka.admin import PartitionMetadata from confluent_kafka.error import KafkaException from karapace.kafka.common import _KafkaConfigMixin, KafkaClientParams, raise_from_kafkaexception -from typing import Any, Callable, Iterable, TypeVar +from typing import Any, Callable, TypeVar from typing_extensions import Unpack import asyncio diff --git a/src/karapace/kafka_rest_apis/consumer_manager.py b/src/karapace/kafka_rest_apis/consumer_manager.py index a2792303b..809478f4c 100644 --- a/src/karapace/kafka_rest_apis/consumer_manager.py +++ b/src/karapace/kafka_rest_apis/consumer_manager.py @@ -25,7 +25,6 @@ from karapace.serialization import DeserializationError, InvalidMessageHeader, InvalidPayload, SchemaRegistrySerializer from karapace.utils import convert_to_int, json_decode, JSONDecodeError from struct import error as UnpackError -from typing import Tuple from urllib.parse import urljoin import asyncio @@ -58,7 +57,7 @@ def _assert(cond: bool, code: HTTPStatus, sub_code: int, message: str, content_t if not cond: KarapaceBase.r(content_type=content_type, status=code, body={"message": message, "error_code": sub_code}) - def _assert_consumer_exists(self, internal_name: Tuple[str, str], content_type: str) -> None: + def _assert_consumer_exists(self, internal_name: tuple[str, str], content_type: str) -> None: if internal_name not in self.consumers: KarapaceBase.not_found( message=f"Consumer for {internal_name} not found among {list(self.consumers.keys())}", @@ -116,7 +115,7 @@ def _topic_and_partition_valid(cluster_metadata: dict, topic_data: dict, content ) @staticmethod - def create_internal_name(group_name: str, consumer_name: str) -> Tuple[str, str]: + def create_internal_name(group_name: str, consumer_name: str) -> tuple[str, str]: return group_name, consumer_name @staticmethod @@ -151,12 +150,22 @@ def _illegal_state_fail(message: str, content_type: str) -> None: message=message, ) + @staticmethod + def _unprocessable_entity(*, message: str, content_type: str) -> None: + ConsumerManager._assert( + cond=False, + code=HTTPStatus.UNPROCESSABLE_ENTITY, + sub_code=RESTErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, + content_type=content_type, + message=message, + ) + # external api below # CONSUMER async def create_consumer(self, group_name: str, request_data: dict, content_type: str): group_name = group_name.strip("/") consumer_name: str = request_data.get("name") or new_name() - internal_name: Tuple[str, str] = self.create_internal_name(group_name, consumer_name) + internal_name: tuple[str, str] = self.create_internal_name(group_name, consumer_name) async with self.consumer_locks[internal_name]: if internal_name in self.consumers: LOG.warning( @@ -238,7 +247,7 @@ async def create_kafka_consumer(self, fetch_min_bytes, group_name, client_id: st raise await asyncio.sleep(1) - async def delete_consumer(self, internal_name: Tuple[str, str], content_type: str): + async def delete_consumer(self, internal_name: tuple[str, str], content_type: str): LOG.info("Deleting consumer for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: @@ -253,7 +262,7 @@ async def delete_consumer(self, internal_name: Tuple[str, str], content_type: st # OFFSETS async def commit_offsets( - self, internal_name: Tuple[str, str], content_type: str, request_data: dict, cluster_metadata: dict + self, internal_name: tuple[str, str], content_type: str, request_data: dict, cluster_metadata: dict ): LOG.info("Committing offsets for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) @@ -283,7 +292,7 @@ async def commit_offsets( KarapaceBase.internal_error(message=f"error sending commit request: {e}", content_type=content_type) empty_response() - async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): + async def get_offsets(self, internal_name: tuple[str, str], content_type: str, request_data: dict): LOG.info("Retrieving offsets for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "partitions", content_type) @@ -315,11 +324,15 @@ async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, r KarapaceBase.r(body=response, content_type=content_type) # SUBSCRIPTION - async def set_subscription(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): + async def set_subscription(self, internal_name: tuple[str, str], content_type: str, request_data: dict): LOG.info("Updating subscription for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) topics = request_data.get("topics", []) + if topics and not isinstance(topics, list): + self._unprocessable_entity(message="Topics is expected to be list of strings", content_type=content_type) topics_pattern = request_data.get("topic_pattern") + if topics_pattern and not isinstance(topics_pattern, str): + self._unprocessable_entity(message="Topic patterns is expected to be a string", content_type=content_type) if not (topics or topics_pattern): self._illegal_state_fail( message="Neither topic_pattern nor topics are present in request", content_type=content_type @@ -343,14 +356,14 @@ async def set_subscription(self, internal_name: Tuple[str, str], content_type: s finally: LOG.info("Done updating subscription") - async def get_subscription(self, internal_name: Tuple[str, str], content_type: str): + async def get_subscription(self, internal_name: tuple[str, str], content_type: str): LOG.info("Retrieving subscription for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer KarapaceBase.r(content_type=content_type, body={"topics": list(consumer.subscription())}) - async def delete_subscription(self, internal_name: Tuple[str, str], content_type: str): + async def delete_subscription(self, internal_name: tuple[str, str], content_type: str): LOG.info("Deleting subscription for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: @@ -358,7 +371,7 @@ async def delete_subscription(self, internal_name: Tuple[str, str], content_type empty_response() # ASSIGNMENTS - async def set_assignments(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): + async def set_assignments(self, internal_name: tuple[str, str], content_type: str, request_data: dict): LOG.info("Updating assignments for %s to %r", internal_name, request_data) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "partitions", content_type) @@ -377,7 +390,7 @@ async def set_assignments(self, internal_name: Tuple[str, str], content_type: st finally: LOG.info("Done updating assignment") - async def get_assignments(self, internal_name: Tuple[str, str], content_type: str): + async def get_assignments(self, internal_name: tuple[str, str], content_type: str): LOG.info("Retrieving assignment for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: @@ -388,7 +401,7 @@ async def get_assignments(self, internal_name: Tuple[str, str], content_type: st ) # POSITIONS - async def seek_to(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): + async def seek_to(self, internal_name: tuple[str, str], content_type: str, request_data: dict): LOG.info("Resetting offsets for %s to %r", internal_name, request_data) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "offsets", content_type) @@ -410,7 +423,7 @@ async def seek_to(self, internal_name: Tuple[str, str], content_type: str, reque empty_response() async def seek_limit( - self, internal_name: Tuple[str, str], content_type: str, request_data: dict, beginning: bool = True + self, internal_name: tuple[str, str], content_type: str, request_data: dict, beginning: bool = True ): direction = "beginning" if beginning else "end" LOG.info("Seeking %s offsets", direction) @@ -443,7 +456,7 @@ async def seek_limit( sub_code=RESTErrorCodes.UNKNOWN_TOPIC_OR_PARTITION.value, ) - async def fetch(self, internal_name: Tuple[str, str], content_type: str, formats: dict, query_params: dict): + async def fetch(self, internal_name: tuple[str, str], content_type: str, formats: dict, query_params: dict): LOG.info("Running fetch for name %s with parameters %r and formats %r", internal_name, query_params, formats) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: diff --git a/src/karapace/kafka_rest_apis/schema_cache.py b/src/karapace/kafka_rest_apis/schema_cache.py index bde742e37..6f4d8b45a 100644 --- a/src/karapace/kafka_rest_apis/schema_cache.py +++ b/src/karapace/kafka_rest_apis/schema_cache.py @@ -5,9 +5,10 @@ from abc import ABC, abstractmethod from cachetools import TTLCache +from collections.abc import MutableMapping from karapace.schema_models import TypedSchema from karapace.typing import SchemaId, Subject -from typing import Dict, Final, MutableMapping, Optional +from typing import Final, Optional import hashlib @@ -36,7 +37,7 @@ def get_schema_str(self, schema_id: SchemaId) -> Optional[str]: class TopicSchemaCache: def __init__(self) -> None: - self._topic_cache: Dict[Subject, SchemaCache] = {} + self._topic_cache: dict[Subject, SchemaCache] = {} self._empty_schema_cache: Final = EmptySchemaCache() def get_schema_id(self, topic: Subject, schema: TypedSchema) -> Optional[SchemaId]: @@ -60,7 +61,7 @@ def get_schema_str(self, topic: Subject, schema_id: SchemaId) -> Optional[str]: class SchemaCache(SchemaCacheProtocol): def __init__(self) -> None: - self._schema_hash_str_to_id: Dict[str, SchemaId] = {} + self._schema_hash_str_to_id: dict[str, SchemaId] = {} self._id_to_schema_str: MutableMapping[SchemaId, TypedSchema] = TTLCache(maxsize=100, ttl=600) def get_schema_id(self, schema: TypedSchema) -> Optional[SchemaId]: diff --git a/src/karapace/kafka_utils.py b/src/karapace/kafka_utils.py index 129ad96d4..ede5e7023 100644 --- a/src/karapace/kafka_utils.py +++ b/src/karapace/kafka_utils.py @@ -3,10 +3,10 @@ See LICENSE for details """ from .config import Config +from collections.abc import Iterator from karapace.kafka.admin import KafkaAdminClient from karapace.kafka.consumer import KafkaConsumer from karapace.kafka.producer import KafkaProducer -from typing import Iterator import contextlib diff --git a/src/karapace/karapace.py b/src/karapace/karapace.py index 28e26cf91..f486b1903 100644 --- a/src/karapace/karapace.py +++ b/src/karapace/karapace.py @@ -8,20 +8,29 @@ from __future__ import annotations from aiohttp.web_request import Request +from collections.abc import Awaitable from functools import partial from http import HTTPStatus from karapace.config import Config +from karapace.dataclasses import default_dataclass from karapace.rapu import HTTPRequest, HTTPResponse, RestApp from karapace.typing import JsonObject from karapace.utils import json_encode from karapace.version import __version__ -from typing import Awaitable, Callable, NoReturn +from typing import Callable, NoReturn from typing_extensions import TypeAlias import aiohttp.web import time -HealthHook: TypeAlias = Callable[[], Awaitable[JsonObject]] + +@default_dataclass +class HealthCheck: + status: JsonObject + healthy: bool + + +HealthHook: TypeAlias = Callable[[], Awaitable[HealthCheck]] class KarapaceBase(RestApp): @@ -95,11 +104,15 @@ async def health(self, _request: Request) -> aiohttp.web.Response: "process_uptime_sec": int(time.monotonic() - self._process_start_time), "karapace_version": __version__, } + status_code = HTTPStatus.OK for hook in self.health_hooks: - resp.update(await hook()) + check = await hook() + resp.update(check.status) + if not check.healthy: + status_code = HTTPStatus.SERVICE_UNAVAILABLE return aiohttp.web.Response( body=json_encode(resp, binary=True, compact=True), - status=HTTPStatus.OK.value, + status=status_code.value, headers={"Content-Type": "application/json"}, ) diff --git a/src/karapace/karapace_all.py b/src/karapace/karapace_all.py index 240da1008..ccdb96915 100644 --- a/src/karapace/karapace_all.py +++ b/src/karapace/karapace_all.py @@ -2,10 +2,12 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from __future__ import annotations + from aiohttp.web_log import AccessLogger from contextlib import closing from karapace import version as karapace_version -from karapace.config import read_config +from karapace.config import Config, read_config from karapace.instrumentation.prometheus import PrometheusInstrumentation from karapace.kafka_rest_apis import KafkaRest from karapace.rapu import RestApp @@ -21,6 +23,38 @@ class KarapaceAll(KafkaRest, KarapaceSchemaRegistryController): pass +def _configure_logging(*, config: Config) -> None: + log_level = config.get("log_level", "DEBUG") + log_format = config.get("log_format", "%(name)-20s\t%(threadName)s\t%(levelname)-8s\t%(message)s") + + root_handler: logging.Handler | None = None + log_handler = config.get("log_handler", None) + if "systemd" == log_handler: + from systemd import journal + + root_handler = journal.JournalHandler(SYSLOG_IDENTIFIER="karapace") + elif "stdout" == log_handler or log_handler is None: + root_handler = logging.StreamHandler(stream=sys.stdout) + else: + logging.basicConfig(level=logging.INFO, format=log_format) + logging.getLogger().setLevel(log_level) + logging.warning("Log handler %s not recognized, root handler not set.", log_handler) + + if root_handler is not None: + root_handler.setFormatter(logging.Formatter(log_format)) + root_handler.setLevel(log_level) + root_handler.set_name(name="karapace") + logging.root.addHandler(root_handler) + + logging.root.setLevel(log_level) + + if config.get("access_logs_debug") is True: + config["access_log_class"] = DebugAccessLogger + logging.getLogger("aiohttp.access").setLevel(logging.DEBUG) + else: + config["access_log_class"] = AccessLogger + + def main() -> int: parser = argparse.ArgumentParser(prog="karapace", description="Karapace: Your Kafka essentials in one tool") parser.add_argument("--version", action="version", help="show program version", version=karapace_version.__version__) @@ -30,13 +64,7 @@ def main() -> int: with closing(arg.config_file): config = read_config(arg.config_file) - logging.basicConfig(level=logging.INFO, format=config["log_format"]) - logging.getLogger().setLevel(config["log_level"]) - if config.get("access_logs_debug") is True: - config["access_log_class"] = DebugAccessLogger - logging.getLogger("aiohttp.access").setLevel(logging.DEBUG) - else: - config["access_log_class"] = AccessLogger + _configure_logging(config=config) app: RestApp if config["karapace_rest"] and config["karapace_registry"]: diff --git a/src/karapace/messaging.py b/src/karapace/messaging.py index bfdd33665..501047769 100644 --- a/src/karapace/messaging.py +++ b/src/karapace/messaging.py @@ -12,7 +12,7 @@ from karapace.offset_watcher import OffsetWatcher from karapace.utils import json_encode from karapace.version import __version__ -from typing import Any, Dict, Final, Optional, Union +from typing import Any, Final, Optional, Union import logging import time @@ -103,7 +103,7 @@ def _send_kafka_message(self, key: Union[bytes, str], value: Union[bytes, str]) ) ) - def send_message(self, *, key: Dict[str, Any], value: Optional[Dict[str, Any]]) -> None: + def send_message(self, *, key: dict[str, Any], value: Optional[dict[str, Any]]) -> None: key_bytes = self._key_formatter.format_key(key) value_bytes: Union[bytes, str] = b"" if value is not None: diff --git a/src/karapace/protobuf/compare_result.py b/src/karapace/protobuf/compare_result.py index fdbdb6bf5..1caffdc24 100644 --- a/src/karapace/protobuf/compare_result.py +++ b/src/karapace/protobuf/compare_result.py @@ -4,7 +4,6 @@ """ from dataclasses import dataclass, field from enum import auto, Enum -from typing import List class Modification(Enum): @@ -68,9 +67,9 @@ def to_str(self) -> str: class CompareResult: def __init__(self) -> None: - self.result: List[ModificationRecord] = [] - self.path: List[str] = [] - self.canonical_name: List[str] = [] + self.result: list[ModificationRecord] = [] + self.path: list[str] = [] + self.canonical_name: list[str] = [] def push_path(self, name_element: str, canonical: bool = False) -> None: if canonical: diff --git a/src/karapace/protobuf/compare_type_lists.py b/src/karapace/protobuf/compare_type_lists.py index d4d181a95..788d272ad 100644 --- a/src/karapace/protobuf/compare_type_lists.py +++ b/src/karapace/protobuf/compare_type_lists.py @@ -4,6 +4,7 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Sequence from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -11,7 +12,6 @@ from karapace.protobuf.exception import IllegalStateException from karapace.protobuf.message_element import MessageElement from karapace.protobuf.type_element import TypeElement -from typing import Sequence def compare_type_lists( diff --git a/src/karapace/protobuf/compare_type_storage.py b/src/karapace/protobuf/compare_type_storage.py index cc1378d02..4ab651f9a 100644 --- a/src/karapace/protobuf/compare_type_storage.py +++ b/src/karapace/protobuf/compare_type_storage.py @@ -7,14 +7,14 @@ from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.proto_type import ProtoType from karapace.protobuf.type_element import TypeElement -from typing import Dict, List, Optional, TYPE_CHECKING, Union +from typing import Optional, TYPE_CHECKING, Union if TYPE_CHECKING: from karapace.protobuf.field_element import FieldElement from karapace.protobuf.message_element import MessageElement -def compute_name(t: ProtoType, result_path: List[str], package_name: str, types: dict) -> Optional[str]: +def compute_name(t: ProtoType, result_path: list[str], package_name: str, types: dict) -> Optional[str]: string = t.string if string.startswith("."): @@ -41,10 +41,10 @@ def __init__(self, self_package_name: str, other_package_name: str, result: Comp self.self_package_name = self_package_name or "" self.other_package_name = other_package_name or "" - self.self_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = {} - self.other_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = {} - self.locked_messages: List["MessageElement"] = [] - self.environment: List["MessageElement"] = [] + self.self_types: dict[str, Union[TypeRecord, TypeRecordMap]] = {} + self.other_types: dict[str, Union[TypeRecord, TypeRecordMap]] = {} + self.locked_messages: list["MessageElement"] = [] + self.environment: list["MessageElement"] = [] self.result = result def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, types: dict) -> None: diff --git a/src/karapace/protobuf/encoding_variants.py b/src/karapace/protobuf/encoding_variants.py index 37e1d3cb9..ba1e24232 100644 --- a/src/karapace/protobuf/encoding_variants.py +++ b/src/karapace/protobuf/encoding_variants.py @@ -7,7 +7,6 @@ from io import BytesIO from karapace.protobuf.exception import IllegalArgumentException -from typing import List ZERO_BYTE = b"\x00" @@ -33,7 +32,7 @@ def read_varint(bio: BytesIO) -> int: return varint -def read_indexes(bio: BytesIO) -> List[int]: +def read_indexes(bio: BytesIO) -> list[int]: try: size: int = read_varint(bio) except EOFError: @@ -67,6 +66,6 @@ def write_varint(bio: BytesIO, value: int) -> int: return written_bytes -def write_indexes(bio: BytesIO, indexes: List[int]) -> None: +def write_indexes(bio: BytesIO, indexes: list[int]) -> None: for i in indexes: write_varint(bio, i) diff --git a/src/karapace/protobuf/enum_element.py b/src/karapace/protobuf/enum_element.py index dcee9522c..38c8a87c4 100644 --- a/src/karapace/protobuf/enum_element.py +++ b/src/karapace/protobuf/enum_element.py @@ -6,6 +6,7 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumElement.kt from __future__ import annotations +from collections.abc import Sequence from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -14,7 +15,6 @@ from karapace.protobuf.option_element import OptionElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence class EnumElement(TypeElement): diff --git a/src/karapace/protobuf/extend_element.py b/src/karapace/protobuf/extend_element.py index da8229650..748d85d7e 100644 --- a/src/karapace/protobuf/extend_element.py +++ b/src/karapace/protobuf/extend_element.py @@ -6,11 +6,11 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ExtendElement.kt from __future__ import annotations +from collections.abc import Sequence from dataclasses import dataclass from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence @dataclass diff --git a/src/karapace/protobuf/group_element.py b/src/karapace/protobuf/group_element.py index 1eeecf31c..0db09e41f 100644 --- a/src/karapace/protobuf/group_element.py +++ b/src/karapace/protobuf/group_element.py @@ -6,12 +6,12 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/GroupElement.kt from __future__ import annotations +from collections.abc import Sequence from dataclasses import dataclass from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence @dataclass diff --git a/src/karapace/protobuf/io.py b/src/karapace/protobuf/io.py index 2c87073d3..36c76e491 100644 --- a/src/karapace/protobuf/io.py +++ b/src/karapace/protobuf/io.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from collections.abc import Generator, Iterable from io import BytesIO from karapace.config import Config from karapace.protobuf.encoding_variants import read_indexes, write_indexes @@ -14,7 +15,7 @@ from karapace.protobuf.type_element import TypeElement from multiprocessing import Process, Queue from pathlib import Path -from typing import Dict, Final, Generator, Iterable, Protocol +from typing import Final, Protocol from typing_extensions import Self, TypeAlias import hashlib @@ -209,7 +210,7 @@ def read_in_forked_multiprocess_process( finally: p.join() reader_queue.close() - if isinstance(result, Dict): + if isinstance(result, dict): return result if isinstance(result, BaseException): raise result diff --git a/src/karapace/protobuf/known_dependency.py b/src/karapace/protobuf/known_dependency.py index 5b322929b..bb250707b 100644 --- a/src/karapace/protobuf/known_dependency.py +++ b/src/karapace/protobuf/known_dependency.py @@ -7,7 +7,7 @@ # Support of known dependencies -from typing import Any, Dict, Set +from typing import Any def static_init(cls: Any) -> object: @@ -18,9 +18,9 @@ def static_init(cls: Any) -> object: @static_init # pylint: disable=used-before-assignment class KnownDependency: - index: Dict = dict() - index_simple: Dict = dict() - map: Dict = { + index: dict = dict() + index_simple: dict = dict() + map: dict = { "google/protobuf/any.proto": ["google.protobuf.Any"], "google/protobuf/api.proto": ["google.protobuf.Api", "google.protobuf.Method", "google.protobuf.Mixin"], "google/protobuf/descriptor.proto": [ @@ -108,7 +108,7 @@ def static_init(cls) -> None: class DependenciesHardcoded: - index: Set[str] = { + index: set[str] = { "bool", "bytes", "double", diff --git a/src/karapace/protobuf/message_element.py b/src/karapace/protobuf/message_element.py index c7f2ddb10..d3333b47b 100644 --- a/src/karapace/protobuf/message_element.py +++ b/src/karapace/protobuf/message_element.py @@ -7,6 +7,7 @@ # compatibility routine added from __future__ import annotations +from collections.abc import Sequence from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -19,7 +20,6 @@ from karapace.protobuf.reserved_element import ReservedElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence class MessageElement(TypeElement): diff --git a/src/karapace/protobuf/one_of_element.py b/src/karapace/protobuf/one_of_element.py index 278886e23..8889d0cc7 100644 --- a/src/karapace/protobuf/one_of_element.py +++ b/src/karapace/protobuf/one_of_element.py @@ -7,6 +7,7 @@ from __future__ import annotations +from collections.abc import Sequence from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -14,7 +15,6 @@ from karapace.protobuf.group_element import GroupElement from karapace.protobuf.option_element import OptionElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence class OneOfElement: diff --git a/src/karapace/protobuf/proto_file_element.py b/src/karapace/protobuf/proto_file_element.py index c9f4be031..ed9f638cd 100644 --- a/src/karapace/protobuf/proto_file_element.py +++ b/src/karapace/protobuf/proto_file_element.py @@ -2,6 +2,8 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Sequence + # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ProtoFileElement.kt from karapace.dependency import Dependency @@ -13,12 +15,12 @@ from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax from karapace.protobuf.type_element import TypeElement -from typing import Dict, List, NewType, Optional, Sequence +from typing import NewType, Optional -def _collect_dependencies_types(compare_types: CompareTypes, dependencies: Optional[Dict[str, Dependency]], is_self: bool): +def _collect_dependencies_types(compare_types: CompareTypes, dependencies: Optional[dict[str, Dependency]], is_self: bool): for dep in dependencies.values(): - types: List[TypeElement] = dep.schema.schema.proto_file_element.types + types: list[TypeElement] = dep.schema.schema.proto_file_element.types sub_deps = dep.schema.schema.dependencies package_name = dep.schema.schema.proto_file_element.package_name type_: TypeElement @@ -131,8 +133,8 @@ def compare( self, other: "ProtoFileElement", result: CompareResult, - self_dependencies: Optional[Dict[str, Dependency]] = None, - other_dependencies: Optional[Dict[str, Dependency]] = None, + self_dependencies: Optional[dict[str, Dependency]] = None, + other_dependencies: Optional[dict[str, Dependency]] = None, ) -> CompareResult: from karapace.protobuf.compare_type_lists import compare_type_lists diff --git a/src/karapace/protobuf/proto_normalizations.py b/src/karapace/protobuf/proto_normalizations.py index 6a5356103..9b82cb3f6 100644 --- a/src/karapace/protobuf/proto_normalizations.py +++ b/src/karapace/protobuf/proto_normalizations.py @@ -5,6 +5,7 @@ from __future__ import annotations +from collections.abc import Sequence from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement from karapace.protobuf.extend_element import ExtendElement @@ -20,8 +21,6 @@ from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.type_tree import TypeTree -from karapace.utils import remove_prefix -from typing import Sequence import abc @@ -90,7 +89,7 @@ class NormalizedOneOfElement(OneOfElement): def normalize_type_field_element(type_field: FieldElement, package: str, type_tree: TypeTree) -> NormalizedFieldElement: sorted_options = None if type_field.options is None else list(sorted(type_field.options, key=sort_by_name)) - field_type_normalized = remove_prefix(remove_prefix(type_field.element_type, "."), f"{package}.") + field_type_normalized = type_field.element_type.removeprefix(".").removeprefix(f"{package}.") reference_in_type_tree = type_tree.type_in_tree(field_type_normalized) google_included_type = ( field_type_normalized in KnownDependency.index_simple or field_type_normalized in KnownDependency.index diff --git a/src/karapace/protobuf/proto_parser.py b/src/karapace/protobuf/proto_parser.py index f00602ac7..f5a002aa5 100644 --- a/src/karapace/protobuf/proto_parser.py +++ b/src/karapace/protobuf/proto_parser.py @@ -28,7 +28,7 @@ from karapace.protobuf.syntax_reader import SyntaxReader from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import MAX_TAG_VALUE -from typing import List, Optional, Union +from typing import Optional, Union class Context(Enum): @@ -73,17 +73,17 @@ def permits_extend(self) -> bool: class ProtoParser: def __init__(self, location: Location, data: str) -> None: self.location = location - self.imports: List[str] = [] - self.nested_types: List[TypeElement] = [] - self.services: List[str] = [] - self.extends_list: List[str] = [] - self.options: List[str] = [] + self.imports: list[str] = [] + self.nested_types: list[TypeElement] = [] + self.services: list[str] = [] + self.extends_list: list[str] = [] + self.options: list[str] = [] self.declaration_count = 0 self.syntax: Optional[Syntax] = None self.package_name: Optional[str] = None self.prefix = "" self.data = data - self.public_imports: List[str] = [] + self.public_imports: list[str] = [] self.reader = SyntaxReader(data, location) def read_proto_file(self) -> ProtoFileElement: @@ -226,13 +226,13 @@ def read_declaration( def read_message(self, location: Location, documentation: str) -> MessageElement: """Reads a message declaration.""" name: str = self.reader.read_name() - fields: List[FieldElement] = [] - one_ofs: List[OneOfElement] = [] - nested_types: List[TypeElement] = [] - extensions: List[ExtensionsElement] = [] - options: List[OptionElement] = [] - reserveds: List[ReservedElement] = [] - groups: List[GroupElement] = [] + fields: list[FieldElement] = [] + one_ofs: list[OneOfElement] = [] + nested_types: list[TypeElement] = [] + extensions: list[ExtensionsElement] = [] + options: list[OptionElement] = [] + reserveds: list[ReservedElement] = [] + groups: list[GroupElement] = [] previous_prefix = self.prefix self.prefix = f"{self.prefix}{name}." diff --git a/src/karapace/protobuf/protobuf_to_dict.py b/src/karapace/protobuf/protobuf_to_dict.py index a9713e523..22fc300d6 100644 --- a/src/karapace/protobuf/protobuf_to_dict.py +++ b/src/karapace/protobuf/protobuf_to_dict.py @@ -12,7 +12,6 @@ from google.protobuf.message import Message from google.protobuf.timestamp_pb2 import Timestamp from types import MappingProxyType -from typing import Dict import datetime @@ -82,7 +81,7 @@ def protobuf_to_dict( use_enum_labels=True, including_default_value_fields=True, lowercase_enum_lables=False, -) -> Dict[object, object]: +) -> dict[object, object]: type_callable_map = TYPE_CALLABLE_MAP result_dict = {} extensions = {} diff --git a/src/karapace/protobuf/protopace/protopace.py b/src/karapace/protobuf/protopace/protopace.py index a65f90582..0f928016e 100644 --- a/src/karapace/protobuf/protopace/protopace.py +++ b/src/karapace/protobuf/protopace/protopace.py @@ -6,7 +6,6 @@ from dataclasses import dataclass, field from functools import cached_property from karapace.errors import InvalidSchema -from typing import Dict, List import ctypes import importlib.util @@ -41,11 +40,11 @@ class FormatResult(ctypes.Structure): class Proto: name: str schema: str - dependencies: List["Proto"] = field(default_factory=list) + dependencies: list["Proto"] = field(default_factory=list) @cached_property - def all_dependencies(self) -> List["Proto"]: - dependencies: Dict[str, "Proto"] = {} + def all_dependencies(self) -> list["Proto"]: + dependencies: dict[str, "Proto"] = {} for dep in self.dependencies: if dep.dependencies: dependencies.update([(d.name, d) for d in dep.all_dependencies]) diff --git a/src/karapace/protobuf/schema.py b/src/karapace/protobuf/schema.py index fdd72a891..1d059447a 100644 --- a/src/karapace/protobuf/schema.py +++ b/src/karapace/protobuf/schema.py @@ -5,6 +5,7 @@ from __future__ import annotations +from collections.abc import Mapping, Sequence from karapace.dataclasses import default_dataclass # Ported from square/wire: @@ -26,7 +27,6 @@ from karapace.protobuf.type_tree import SourceFileReference, TypeTree from karapace.protobuf.utils import append_documentation, append_indented from karapace.schema_references import Reference -from typing import Mapping, Sequence import binascii diff --git a/src/karapace/protobuf/serialization.py b/src/karapace/protobuf/serialization.py index abc01247d..123e80c8f 100644 --- a/src/karapace/protobuf/serialization.py +++ b/src/karapace/protobuf/serialization.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from collections.abc import Sequence from karapace.errors import InvalidSchema from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement @@ -19,7 +20,7 @@ from karapace.protobuf.syntax import Syntax from karapace.protobuf.type_element import TypeElement from types import MappingProxyType -from typing import Any, Sequence +from typing import Any import base64 import google.protobuf.descriptor @@ -92,17 +93,31 @@ def _deserialize_msg(msgtype: Any) -> MessageElement: for nested_enum in msgtype.enum_type: nested_types.append(_deserialize_enum(nested_enum)) - one_ofs: list[OneOfElement] = [OneOfElement(oneof.name) for oneof in msgtype.oneof_decl] + one_ofs: list[OneOfElement | None] = [OneOfElement(oneof.name) for oneof in msgtype.oneof_decl] for f in msgtype.field: sf = _deserialize_field(f) - if f.HasField("oneof_index"): + is_oneof = f.HasField("oneof_index") + is_proto3_optional = f.HasField("oneof_index") and f.HasField("proto3_optional") and f.proto3_optional + if is_proto3_optional: + # Every proto3 optional field is placed into a one-field oneof, called a "synthetic" oneof, + # as it was not present in the source .proto file. + # This will make sure that we don't interpret those optionals as oneof. + one_ofs[f.oneof_index] = None + fields.append(sf) + elif is_oneof: one_ofs[f.oneof_index].fields.append(sf) else: fields.append(sf) + one_ofs_filtered: list[OneOfElement] = [oneof for oneof in one_ofs if oneof is not None] return MessageElement( - DEFAULT_LOCATION, msgtype.name, nested_types=nested_types, reserveds=reserveds, fields=fields, one_ofs=one_ofs + DEFAULT_LOCATION, + msgtype.name, + nested_types=nested_types, + reserveds=reserveds, + fields=fields, + one_ofs=one_ofs_filtered, ) diff --git a/src/karapace/protobuf/service_element.py b/src/karapace/protobuf/service_element.py index ed714c58c..08f365b8b 100644 --- a/src/karapace/protobuf/service_element.py +++ b/src/karapace/protobuf/service_element.py @@ -6,12 +6,12 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ServiceElement.kt from __future__ import annotations +from collections.abc import Sequence from dataclasses import dataclass from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement from karapace.protobuf.rpc_element import RpcElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence @dataclass diff --git a/src/karapace/protobuf/type_element.py b/src/karapace/protobuf/type_element.py index ec840a801..89e999034 100644 --- a/src/karapace/protobuf/type_element.py +++ b/src/karapace/protobuf/type_element.py @@ -6,9 +6,10 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/TypeElement.kt from __future__ import annotations +from collections.abc import Sequence from dataclasses import dataclass from karapace.protobuf.location import Location -from typing import Sequence, TYPE_CHECKING +from typing import TYPE_CHECKING if TYPE_CHECKING: from karapace.protobuf.compare_result import CompareResult diff --git a/src/karapace/protobuf/type_tree.py b/src/karapace/protobuf/type_tree.py index f9279e864..71fe83b3d 100644 --- a/src/karapace/protobuf/type_tree.py +++ b/src/karapace/protobuf/type_tree.py @@ -6,7 +6,6 @@ from collections.abc import Iterable, Sequence from karapace.dataclasses import default_dataclass -from karapace.utils import remove_prefix import itertools @@ -84,7 +83,7 @@ def _type_in_tree(tree: TypeTree, remaining_tokens: list[str]) -> TypeTree | Non return tree def type_in_tree(self, queried_type: str) -> TypeTree | None: - return TypeTree._type_in_tree(self, remove_prefix(queried_type, ".").split(".")) + return TypeTree._type_in_tree(self, queried_type.removeprefix(".").split(".")) def expand_missing_absolute_path(self) -> Sequence[str]: oldest_import = self.oldest_matching_import() diff --git a/src/karapace/protobuf/utils.py b/src/karapace/protobuf/utils.py index b01e428fe..081a23c87 100644 --- a/src/karapace/protobuf/utils.py +++ b/src/karapace/protobuf/utils.py @@ -4,13 +4,13 @@ """ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/Util.kt -from typing import List, TYPE_CHECKING +from typing import TYPE_CHECKING if TYPE_CHECKING: from karapace.protobuf.option_element import OptionElement -def append_documentation(data: List[str], documentation: str) -> None: +def append_documentation(data: list[str], documentation: str) -> None: if not documentation: return @@ -25,7 +25,7 @@ def append_documentation(data: List[str], documentation: str) -> None: data.append("\n") -def append_options(data: List[str], options: List["OptionElement"]) -> None: +def append_options(data: list[str], options: list["OptionElement"]) -> None: count = len(options) if count == 1: data.append("[") @@ -52,7 +52,7 @@ def try_to_schema(obj: "OptionElement") -> str: raise -def append_indented(data: List[str], value: str) -> None: +def append_indented(data: list[str], value: str) -> None: lines = value.split("\n") if len(lines) > 1 and not lines[-1]: del lines[-1] diff --git a/src/karapace/rapu.py b/src/karapace/rapu.py index c4e4c5ee3..3731abc8a 100644 --- a/src/karapace/rapu.py +++ b/src/karapace/rapu.py @@ -12,7 +12,7 @@ from karapace.statsd import StatsClient from karapace.utils import json_decode, json_encode from karapace.version import __version__ -from typing import Callable, Dict, NoReturn, Optional, overload, Union +from typing import Callable, NoReturn, Optional, overload, Union import aiohttp import aiohttp.web @@ -63,7 +63,7 @@ def __init__( *, url: str, query, - headers: Dict[str, str], + headers: dict[str, str], path_for_stats: str, method: str, content_type: Optional[str] = None, @@ -71,7 +71,7 @@ def __init__( ): self.url = url self.headers = headers - self._header_cache: Dict[str, Optional[str]] = {} + self._header_cache: dict[str, Optional[str]] = {} self.query = query self.content_type = content_type self.accepts = accepts @@ -120,7 +120,7 @@ def __init__( *, status: HTTPStatus = HTTPStatus.OK, content_type: Optional[str] = None, - headers: Optional[Dict[str, str]] = None, + headers: Optional[dict[str, str]] = None, ) -> None: self.body = body self.status = status diff --git a/src/karapace/schema_models.py b/src/karapace/schema_models.py index eab1a5c9f..4a2ae9c55 100644 --- a/src/karapace/schema_models.py +++ b/src/karapace/schema_models.py @@ -6,8 +6,9 @@ from avro.errors import SchemaParseException from avro.schema import parse as avro_parse, Schema as AvroSchema +from collections.abc import Collection, Mapping, Sequence from dataclasses import dataclass -from jsonschema import Draft7Validator +from jsonschema import Draft7Validator, RefResolver from jsonschema.exceptions import SchemaError from karapace.dependency import Dependency from karapace.errors import InvalidSchema, InvalidVersion, VersionNotFoundException @@ -26,7 +27,7 @@ from karapace.schema_type import SchemaType from karapace.typing import JsonObject, SchemaId, Subject, Version, VersionTag from karapace.utils import assert_never, json_decode, json_encode, JSONDecodeError -from typing import Any, cast, Collection, Dict, Final, final, Mapping, Sequence +from typing import Any, cast, Final, final import hashlib import logging @@ -46,8 +47,12 @@ def parse_avro_schema_definition(s: str, validate_enum_symbols: bool = True, val return avro_parse(json_encode(json_data), validate_enum_symbols=validate_enum_symbols, validate_names=validate_names) -def parse_jsonschema_definition(schema_definition: str) -> Draft7Validator: - """Parses and validates `schema_definition`. +class InvalidValidatorRegistry(Exception): + pass + + +def parse_jsonschema_definition(schema_definition: str, resolver: RefResolver | None = None) -> Draft7Validator: + """Parses and validates `schema_definition` with its `dependencies`. Raises: SchemaError: If `schema_definition` is not a valid Draft7 schema. @@ -56,6 +61,8 @@ def parse_jsonschema_definition(schema_definition: str) -> Draft7Validator: # TODO: Annotations dictate Mapping[str, Any] here, but we have unit tests that # use bool values and fail if we assert isinstance(_, dict). Draft7Validator.check_schema(schema) # type: ignore[arg-type] + if resolver: + return Draft7Validator(schema, resolver=resolver) # type: ignore[arg-type] return Draft7Validator(schema) # type: ignore[arg-type] @@ -126,7 +133,7 @@ def __init__( def to_dict(self) -> JsonObject: if self.schema_type is SchemaType.PROTOBUF: raise InvalidSchema("Protobuf do not support to_dict serialization") - return json_decode(self.schema_str, Dict[str, Any]) + return json_decode(self.schema_str, dict[str, Any]) def fingerprint(self) -> str: if self._fingerprint_cached is None: @@ -194,6 +201,40 @@ def schema(self) -> Draft7Validator | AvroSchema | ProtobufSchema: return parsed_typed_schema.schema +def json_resolver(schema_str: str, dependencies: Mapping[str, Dependency] | None = None) -> RefResolver | None: + # RefResolver is deprecated but it still used in karapace code + # see normalize_schema_rec() function in src/karapace/compatibility/jsonschema/utils.py + # In case when karapace JSON support will be updated we must rewrite this code to use + # referencing.Registry instead of RefResolver + schema_store: dict = {} + stack: list[tuple[str, Mapping[str, Dependency] | None]] = [(schema_str, dependencies)] + if dependencies is None: + return None + while stack: + current_schema_str, current_dependencies = stack.pop() + if current_dependencies: + stack.append((current_schema_str, None)) + for dependency in current_dependencies.values(): + stack.append((dependency.schema.schema_str, dependency.schema.dependencies)) + else: + schema_json = json_decode(current_schema_str) + if isinstance(schema_json, dict): + schema_store[schema_json["$id"]] = schema_json + else: + # In the case of schemas with references, we only support schemas with a canonical structure, + # which must include a $id in the reference. + raise InvalidSchema + main_schema_json = json_decode(schema_str) + if not isinstance(main_schema_json, dict): + # In the case of schemas with references, we only support schemas with a canonical structure, which must + # contain an $id tag within the reference. Simple main schemas of types such as bool, int, str, etc., + # are not supported. + raise InvalidSchema + + resolver = RefResolver.from_schema(main_schema_json, store=schema_store) + return resolver + + def parse( schema_type: SchemaType, schema_str: str, @@ -220,7 +261,7 @@ def parse( elif schema_type is SchemaType.JSONSCHEMA: try: - parsed_schema = parse_jsonschema_definition(schema_str) + parsed_schema = parse_jsonschema_definition(schema_str, resolver=json_resolver(schema_str, dependencies)) # TypeError - Raised when the user forgets to encode the schema as a string. except (TypeError, JSONDecodeError, SchemaError, AssertionError) as e: raise InvalidSchema from e diff --git a/src/karapace/schema_reader.py b/src/karapace/schema_reader.py index cd04944dc..58d2ca982 100644 --- a/src/karapace/schema_reader.py +++ b/src/karapace/schema_reader.py @@ -21,7 +21,8 @@ UnknownTopicOrPartitionError, ) from avro.schema import Schema as AvroSchema -from confluent_kafka import Message, TopicPartition +from collections.abc import Mapping, Sequence +from confluent_kafka import Message, TopicCollection, TopicPartition from contextlib import closing, ExitStack from enum import Enum from jsonschema.validators import Draft7Validator @@ -45,8 +46,9 @@ from karapace.typing import JsonObject, SchemaId, Subject, Version from karapace.utils import json_decode, JSONDecodeError, shutdown from threading import Event, Thread -from typing import Final, Mapping, Sequence +from typing import Final +import asyncio import json import logging import time @@ -61,6 +63,11 @@ KAFKA_CLIENT_CREATION_TIMEOUT_SECONDS: Final = 2.0 SCHEMA_TOPIC_CREATION_TIMEOUT_SECONDS: Final = 5.0 +# If handle_messages throws at least UNHEALTHY_CONSECUTIVE_ERRORS +# for UNHEALTHY_TIMEOUT_SECONDS the SchemaReader will be reported unhealthy +UNHEALTHY_TIMEOUT_SECONDS: Final = 10.0 +UNHEALTHY_CONSECUTIVE_ERRORS: Final = 3 + # For good startup performance the consumption of multiple # records for each consume round is essential. # Consumer default is 1 message for each consume call and after @@ -175,6 +182,9 @@ def __init__( self.start_time = time.monotonic() self.startup_previous_processed_offset = 0 + self.consecutive_unexpected_errors: int = 0 + self.consecutive_unexpected_errors_start: float = 0 + def close(self) -> None: LOG.info("Closing schema_reader") self._stop_schema_reader.set() @@ -248,15 +258,44 @@ def run(self) -> None: self.offset = self._get_beginning_offset() try: self.handle_messages() + self.consecutive_unexpected_errors = 0 except ShutdownException: self._stop_schema_reader.set() shutdown() except KafkaUnavailableError: + self.consecutive_unexpected_errors += 1 LOG.warning("Kafka cluster is unavailable or broker can't be resolved.") except Exception as e: # pylint: disable=broad-except self.stats.unexpected_exception(ex=e, where="schema_reader_loop") + self.consecutive_unexpected_errors += 1 + if self.consecutive_unexpected_errors == 1: + self.consecutive_unexpected_errors_start = time.monotonic() LOG.warning("Unexpected exception in schema reader loop - %s", e) + async def is_healthy(self) -> bool: + if ( + self.consecutive_unexpected_errors >= UNHEALTHY_CONSECUTIVE_ERRORS + and (duration := time.monotonic() - self.consecutive_unexpected_errors_start) >= UNHEALTHY_TIMEOUT_SECONDS + ): + LOG.warning( + "Health check failed with %s consecutive errors in %s seconds", self.consecutive_unexpected_errors, duration + ) + return False + + try: + # Explicitly check if topic exists. + # This needs to be done because in case of missing topic the consumer will not repeat the error + # on conscutive consume calls and instead will return empty list. + assert self.admin_client is not None + topic = self.config["topic_name"] + res = self.admin_client.describe_topics(TopicCollection([topic])) + await asyncio.wrap_future(res[topic]) + except Exception as e: # pylint: disable=broad-except + LOG.warning("Health check failed with %r", e) + return False + + return True + def _get_beginning_offset(self) -> int: assert self.consumer is not None, "Thread must be started" @@ -552,7 +591,19 @@ def _handle_msg_schema(self, key: dict, value: dict | None) -> None: parsed_schema: Draft7Validator | AvroSchema | ProtobufSchema | None = None resolved_dependencies: dict[str, Dependency] | None = None - if schema_type_parsed in [SchemaType.AVRO, SchemaType.JSONSCHEMA]: + if schema_type_parsed == SchemaType.JSONSCHEMA: + try: + if schema_references: + candidate_references = [reference_from_mapping(reference_data) for reference_data in schema_references] + resolved_references, resolved_dependencies = self.resolve_references(candidate_references) + schema_str = json.dumps(json.loads(schema_str), sort_keys=True) + except json.JSONDecodeError as e: + LOG.warning("Schema is not valid JSON") + raise e + except InvalidReferences as e: + LOG.exception("Invalid JSON references") + raise e + elif schema_type_parsed == SchemaType.AVRO: try: schema_str = json.dumps(json.loads(schema_str), sort_keys=True) except json.JSONDecodeError as exc: diff --git a/src/karapace/schema_references.py b/src/karapace/schema_references.py index 0eae47141..900568349 100644 --- a/src/karapace/schema_references.py +++ b/src/karapace/schema_references.py @@ -7,11 +7,12 @@ from __future__ import annotations +from collections.abc import Mapping from karapace.dataclasses import default_dataclass from karapace.typing import JsonData, JsonObject, SchemaId, Subject, Version -from typing import cast, List, Mapping, NewType, TypeVar +from typing import cast, NewType, TypeVar -Referents = NewType("Referents", List[SchemaId]) +Referents = NewType("Referents", list[SchemaId]) T = TypeVar("T") diff --git a/src/karapace/schema_registry.py b/src/karapace/schema_registry.py index 6594663ad..67f58fddd 100644 --- a/src/karapace/schema_registry.py +++ b/src/karapace/schema_registry.py @@ -4,9 +4,12 @@ """ from __future__ import annotations +from avro.compatibility import SchemaCompatibilityResult, SchemaCompatibilityType +from collections.abc import Sequence from contextlib import AsyncExitStack, closing -from karapace.compatibility import check_compatibility, CompatibilityModes +from karapace.compatibility import CompatibilityModes from karapace.compatibility.jsonschema.checks import is_incompatible +from karapace.compatibility.schema_compatibility import SchemaCompatibility from karapace.config import Config from karapace.coordinator.master_coordinator import MasterCoordinator from karapace.dependency import Dependency @@ -29,7 +32,6 @@ from karapace.schema_reader import KafkaSchemaReader from karapace.schema_references import LatestVersionReference, Reference from karapace.typing import JsonObject, Mode, SchemaId, Subject, Version -from typing import Sequence import asyncio import logging @@ -281,7 +283,7 @@ async def subject_version_referencedby_get( return list(referenced_by) return [] - def _resolve_and_parse(self, schema: TypedSchema) -> ParsedTypedSchema: + def resolve_and_parse(self, schema: TypedSchema) -> ParsedTypedSchema: references, dependencies = self.resolve_references(schema.references) if schema.references else (None, None) return ParsedTypedSchema.parse( schema_type=schema.schema_type, @@ -325,12 +327,8 @@ async def write_new_schema_local( ) else: # First check if any of the existing schemas for the subject match - live_schema_versions = { - version_id: schema_version - for version_id, schema_version in all_schema_versions.items() - if schema_version.deleted is False - } - if not live_schema_versions: # Previous ones have been deleted by the user. + live_versions = self.get_live_versions_sorted(all_schema_versions) + if not live_versions: # Previous ones have been deleted by the user. version = self.database.get_next_version(subject=subject) schema_id = self.database.get_schema_id(new_schema) LOG.debug( @@ -351,32 +349,17 @@ async def write_new_schema_local( ) return schema_id - compatibility_mode = self.get_compatibility_mode(subject=subject) + result = self.check_schema_compatibility(new_schema, subject) - # Run a compatibility check between on file schema(s) and the one being submitted now - # the check is either towards the latest one or against all previous ones in case of - # transitive mode - schema_versions = sorted(live_schema_versions) - if compatibility_mode.is_transitive(): - check_against = schema_versions - else: - check_against = [schema_versions[-1]] - - for old_version in check_against: - parsed_old_schema = self._resolve_and_parse(all_schema_versions[old_version].schema) - result = check_compatibility( - old_schema=parsed_old_schema, - new_schema=new_schema, - compatibility_mode=compatibility_mode, + if is_incompatible(result): + LOG.warning( + "Incompatible schema: %s, incompatibilities: %s", result.compatibility, result.incompatibilities + ) + compatibility_mode = self.get_compatibility_mode(subject=subject) + raise IncompatibleSchema( + f"Incompatible schema, compatibility_mode={compatibility_mode.value}. " + f"Incompatibilities: {', '.join(result.messages)[:300]}" ) - if is_incompatible(result): - message = set(result.messages).pop() if result.messages else "" - LOG.warning( - "Incompatible schema: %s, incompatibilities: %s", result.compatibility, result.incompatibilities - ) - raise IncompatibleSchema( - f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}" - ) # We didn't find an existing schema and the schema is compatible so go and create one version = self.database.get_next_version(subject=subject) @@ -465,3 +448,48 @@ def send_delete_subject_message(self, subject: Subject, version: Version) -> Non key = {"subject": subject, "magic": 0, "keytype": "DELETE_SUBJECT"} value = {"subject": subject, "version": version.value} self.producer.send_message(key=key, value=value) + + def check_schema_compatibility( + self, + new_schema: ValidatedTypedSchema, + subject: Subject, + ) -> SchemaCompatibilityResult: + result = SchemaCompatibilityResult(SchemaCompatibilityType.compatible) + + compatibility_mode = self.get_compatibility_mode(subject=subject) + all_schema_versions: dict[Version, SchemaVersion] = self.database.find_subject_schemas( + subject=subject, include_deleted=True + ) + live_versions = self.get_live_versions_sorted(all_schema_versions) + + if not live_versions: + old_versions = [] + elif compatibility_mode.is_transitive(): + # Check against all versions + old_versions = live_versions + else: + # Only check against latest version + old_versions = [live_versions[-1]] + + for old_version in old_versions: + old_parsed_schema = self.resolve_and_parse(all_schema_versions[old_version].schema) + + result = SchemaCompatibility.check_compatibility( + old_schema=old_parsed_schema, + new_schema=new_schema, + compatibility_mode=compatibility_mode, + ) + + if is_incompatible(result): + return result + + return result + + @staticmethod + def get_live_versions_sorted(all_schema_versions: dict[Version, SchemaVersion]) -> list[Version]: + live_schema_versions = { + version_id: schema_version + for version_id, schema_version in all_schema_versions.items() + if schema_version.deleted is False + } + return sorted(live_schema_versions) diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py index 5a9196087..713e96846 100644 --- a/src/karapace/schema_registry_apis.py +++ b/src/karapace/schema_registry_apis.py @@ -9,8 +9,9 @@ from enum import Enum, unique from http import HTTPStatus from karapace.auth import HTTPAuthorizer, Operation, User -from karapace.compatibility import check_compatibility, CompatibilityModes +from karapace.compatibility import CompatibilityModes from karapace.compatibility.jsonschema.checks import is_incompatible +from karapace.compatibility.schema_compatibility import SchemaCompatibility from karapace.config import Config from karapace.errors import ( IncompatibleSchema, @@ -28,7 +29,7 @@ SubjectSoftDeletedException, VersionNotFoundException, ) -from karapace.karapace import KarapaceBase +from karapace.karapace import HealthCheck, KarapaceBase from karapace.protobuf.exception import ProtobufUnresolvedDependencyException from karapace.rapu import HTTPRequest, JSON_CONTENT_TYPE, SERVER_NAME from karapace.schema_models import ParsedTypedSchema, SchemaType, SchemaVersion, TypedSchema, ValidatedTypedSchema, Versioner @@ -98,7 +99,7 @@ def __init__(self, config: Config) -> None: self.app.on_startup.append(self._create_forward_client) self.health_hooks.append(self.schema_registry_health) - async def schema_registry_health(self) -> JsonObject: + async def schema_registry_health(self) -> HealthCheck: resp = {} if self._auth is not None: resp["schema_registry_authfile_timestamp"] = self._auth.authfile_last_modified @@ -115,7 +116,12 @@ async def schema_registry_health(self) -> JsonObject: resp["schema_registry_primary_url"] = cs.primary_url resp["schema_registry_coordinator_running"] = cs.is_running resp["schema_registry_coordinator_generation_id"] = cs.group_generation_id - return resp + + healthy = True + if not await self.schema_registry.schema_reader.is_healthy(): + healthy = False + + return HealthCheck(status=resp, healthy=healthy) async def _start_schema_registry(self, app: aiohttp.web.Application) -> None: # pylint: disable=unused-argument """Callback for aiohttp.Application.on_startup""" @@ -375,63 +381,12 @@ def _invalid_version(self, content_type, version): ) async def compatibility_check( - self, content_type: str, *, subject: str, version: str, request: HTTPRequest, user: User | None = None + self, content_type: str, *, subject: Subject, version: str, request: HTTPRequest, user: User | None = None ) -> None: """Check for schema compatibility""" self._check_authorization(user, Operation.Read, f"Subject:{subject}") - body = request.json - schema_type = self._validate_schema_type(content_type=content_type, data=body) - references = self._validate_references(content_type, schema_type, body) - try: - references, new_schema_dependencies = self.schema_registry.resolve_references(references) - new_schema = ValidatedTypedSchema.parse( - schema_type=schema_type, - schema_str=body["schema"], - references=references, - dependencies=new_schema_dependencies, - use_protobuf_formatter=self.config["use_protobuf_formatter"], - ) - except InvalidSchema: - self.r( - body={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Invalid {schema_type} schema", - }, - content_type=content_type, - status=HTTPStatus.UNPROCESSABLE_ENTITY, - ) - try: - old = self.schema_registry.subject_version_get(subject=subject, version=Versioner.V(version)) - except InvalidVersion: - self._invalid_version(content_type, version) - except (VersionNotFoundException, SchemasNotFoundException, SubjectNotFoundException): - self.r( - body={ - "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, - "message": f"Version {version} not found.", - }, - content_type=content_type, - status=HTTPStatus.NOT_FOUND, - ) - old_schema_type = self._validate_schema_type(content_type=content_type, data=old) - try: - old_references = old.get("references", None) - old_dependencies = None - if old_references: - old_references, old_dependencies = self.schema_registry.resolve_references(old_references) - old_schema = ParsedTypedSchema.parse(old_schema_type, old["schema"], old_references, old_dependencies) - except InvalidSchema: - self.r( - body={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Found an invalid {old_schema_type} schema registered", - }, - content_type=content_type, - status=HTTPStatus.UNPROCESSABLE_ENTITY, - ) - try: compatibility_mode = self.schema_registry.get_compatibility_mode(subject=subject) except ValueError as ex: @@ -446,13 +401,18 @@ async def compatibility_check( status=HTTPStatus.INTERNAL_SERVER_ERROR, ) - result = check_compatibility( - old_schema=old_schema, - new_schema=new_schema, - compatibility_mode=compatibility_mode, - ) + new_schema = self.get_new_schema(request.json, content_type) + old_schema = self.get_old_schema(subject, Versioner.V(version), content_type) + if compatibility_mode.is_transitive(): + # Ignore the schema version provided in the rest api call (`version`) + # Instead check against all previous versions (including `version` if existing) + result = self.schema_registry.check_schema_compatibility(new_schema, subject) + else: + # Check against the schema version provided in the rest api call (`version`) + result = SchemaCompatibility.check_compatibility(old_schema, new_schema, compatibility_mode) + if is_incompatible(result): - self.r({"is_compatible": False}, content_type) + self.r({"is_compatible": False, "messages": list(result.messages)}, content_type) self.r({"is_compatible": True}, content_type) async def schemas_list(self, content_type: str, *, request: HTTPRequest, user: User | None = None): @@ -1056,7 +1016,7 @@ def _validate_references( content_type=content_type, status=HTTPStatus.BAD_REQUEST, ) - if references and schema_type != SchemaType.PROTOBUF: + if references and schema_type != SchemaType.PROTOBUF and schema_type != SchemaType.JSONSCHEMA: self.r( body={ "error_code": SchemaErrorCodes.REFERENCES_SUPPORT_NOT_IMPLEMENTED.value, @@ -1365,3 +1325,57 @@ def no_master_error(self, content_type: str) -> None: content_type=content_type, status=HTTPStatus.INTERNAL_SERVER_ERROR, ) + + def get_new_schema(self, body: JsonObject, content_type: str) -> ValidatedTypedSchema: + schema_type = self._validate_schema_type(content_type=content_type, data=body) + references = self._validate_references(content_type, schema_type, body) + try: + references, new_schema_dependencies = self.schema_registry.resolve_references(references) + return ValidatedTypedSchema.parse( + schema_type=schema_type, + schema_str=body["schema"], + references=references, + dependencies=new_schema_dependencies, + use_protobuf_formatter=self.config["use_protobuf_formatter"], + ) + except InvalidSchema: + self.r( + body={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Invalid {schema_type} schema", + }, + content_type=content_type, + status=HTTPStatus.UNPROCESSABLE_ENTITY, + ) + + def get_old_schema(self, subject: Subject, version: Version, content_type: str) -> ParsedTypedSchema: + try: + old = self.schema_registry.subject_version_get(subject=subject, version=version) + except InvalidVersion: + self._invalid_version(content_type, version) + except (VersionNotFoundException, SchemasNotFoundException, SubjectNotFoundException): + self.r( + body={ + "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, + "message": f"Version {version} not found.", + }, + content_type=content_type, + status=HTTPStatus.NOT_FOUND, + ) + old_schema_type = self._validate_schema_type(content_type=content_type, data=old) + try: + old_references = old.get("references", None) + old_dependencies = None + if old_references: + old_references, old_dependencies = self.schema_registry.resolve_references(old_references) + old_schema = ParsedTypedSchema.parse(old_schema_type, old["schema"], old_references, old_dependencies) + return old_schema + except InvalidSchema: + self.r( + body={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Found an invalid {old_schema_type} schema registered", + }, + content_type=content_type, + status=HTTPStatus.UNPROCESSABLE_ENTITY, + ) diff --git a/src/karapace/sentry/sentry_client.py b/src/karapace/sentry/sentry_client.py index 59143ef04..c4dc99d33 100644 --- a/src/karapace/sentry/sentry_client.py +++ b/src/karapace/sentry/sentry_client.py @@ -4,8 +4,8 @@ """ from __future__ import annotations +from collections.abc import Mapping from karapace.sentry.sentry_client_api import KarapaceSentryConfig, SentryClientAPI -from typing import Mapping # The Sentry SDK is optional, omit pylint import error import sentry_sdk @@ -41,6 +41,8 @@ def _initialize_sentry(self) -> None: # Don't send library logged errors to Sentry as there is also proper return value or raised exception to calling code from sentry_sdk.integrations.logging import ignore_logger + ignore_logger("aiokafka") + ignore_logger("aiokafka.*") ignore_logger("kafka") ignore_logger("kafka.*") diff --git a/src/karapace/sentry/sentry_client_api.py b/src/karapace/sentry/sentry_client_api.py index 22f4482d4..4ca9575c8 100644 --- a/src/karapace/sentry/sentry_client_api.py +++ b/src/karapace/sentry/sentry_client_api.py @@ -4,7 +4,7 @@ """ from __future__ import annotations -from typing import Mapping +from collections.abc import Mapping from typing_extensions import TypeAlias KarapaceSentryConfig: TypeAlias = "Mapping[str, object] | None" diff --git a/src/karapace/serialization.py b/src/karapace/serialization.py index 81c51cabc..36509855e 100644 --- a/src/karapace/serialization.py +++ b/src/karapace/serialization.py @@ -7,6 +7,7 @@ from aiohttp import BasicAuth from avro.io import BinaryDecoder, BinaryEncoder, DatumReader, DatumWriter from cachetools import TTLCache +from collections.abc import MutableMapping from functools import lru_cache from google.protobuf.message import DecodeError from jsonschema import ValidationError @@ -20,7 +21,7 @@ from karapace.schema_references import LatestVersionReference, Reference, reference_from_mapping from karapace.typing import NameStrategy, SchemaId, Subject, SubjectType, Version from karapace.utils import json_decode, json_encode -from typing import Any, Callable, MutableMapping +from typing import Any, Callable from urllib.parse import quote import asyncio diff --git a/src/karapace/statsd.py b/src/karapace/statsd.py index 3c32e09d8..39d6a3153 100644 --- a/src/karapace/statsd.py +++ b/src/karapace/statsd.py @@ -10,10 +10,11 @@ """ from __future__ import annotations +from collections.abc import Iterator from contextlib import contextmanager from karapace.config import Config from karapace.sentry import get_sentry_client -from typing import Any, Final, Iterator +from typing import Any, Final import datetime import logging diff --git a/src/karapace/typing.py b/src/karapace/typing.py index 77058cce2..1268db001 100644 --- a/src/karapace/typing.py +++ b/src/karapace/typing.py @@ -4,15 +4,16 @@ """ from __future__ import annotations +from collections.abc import Mapping, Sequence from enum import Enum, unique from karapace.errors import InvalidVersion -from typing import Any, ClassVar, Dict, List, Mapping, NewType, Sequence, Union +from typing import Any, ClassVar, NewType, Union from typing_extensions import TypeAlias import functools -JsonArray: TypeAlias = List["JsonData"] -JsonObject: TypeAlias = Dict[str, "JsonData"] +JsonArray: TypeAlias = list["JsonData"] +JsonObject: TypeAlias = dict[str, "JsonData"] JsonScalar: TypeAlias = Union[str, int, float, None] JsonData: TypeAlias = Union[JsonScalar, JsonObject, JsonArray] @@ -23,8 +24,8 @@ Subject = NewType("Subject", str) VersionTag = Union[str, int] -SchemaMetadata = NewType("SchemaMetadata", Dict[str, Any]) -SchemaRuleSet = NewType("SchemaRuleSet", Dict[str, Any]) +SchemaMetadata = NewType("SchemaMetadata", dict[str, Any]) +SchemaRuleSet = NewType("SchemaRuleSet", dict[str, Any]) # note: the SchemaID is a unique id among all the schemas (and each version should be assigned to a different id) # basically the same SchemaID refer always to the same TypedSchema. diff --git a/src/karapace/utils.py b/src/karapace/utils.py index 071b3e9d3..10db7bfdb 100644 --- a/src/karapace/utils.py +++ b/src/karapace/utils.py @@ -246,19 +246,6 @@ def log( self.logger.exception("Error in logging") -def remove_prefix(string: str, prefix: str) -> str: - """ - Not available in python 3.8. - """ - i = 0 - while i < len(string) and i < len(prefix): - if string[i] != prefix[i]: - return string - i += 1 - - return string[i:] - - def shutdown(): """ Send a SIGTERM into the current running application process, which should initiate shutdown logic. diff --git a/stubs/confluent_kafka/__init__.pyi b/stubs/confluent_kafka/__init__.pyi index 175569fb4..e27cf4880 100644 --- a/stubs/confluent_kafka/__init__.pyi +++ b/stubs/confluent_kafka/__init__.pyi @@ -8,6 +8,7 @@ from .cimpl import ( TIMESTAMP_CREATE_TIME, TIMESTAMP_LOG_APPEND_TIME, TIMESTAMP_NOT_AVAILABLE, + TopicCollection, TopicPartition, ) @@ -22,4 +23,5 @@ __all__ = ( "TIMESTAMP_LOG_APPEND_TIME", "TIMESTAMP_NOT_AVAILABLE", "TopicPartition", + "TopicCollection", ) diff --git a/stubs/confluent_kafka/admin/__init__.pyi b/stubs/confluent_kafka/admin/__init__.pyi index 02abcc033..1dafa51b8 100644 --- a/stubs/confluent_kafka/admin/__init__.pyi +++ b/stubs/confluent_kafka/admin/__init__.pyi @@ -4,7 +4,7 @@ from ._listoffsets import ListOffsetsResultInfo, OffsetSpec from ._metadata import BrokerMetadata, ClusterMetadata, PartitionMetadata, TopicMetadata from ._resource import ResourceType from concurrent.futures import Future -from confluent_kafka import IsolationLevel, TopicPartition +from confluent_kafka import IsolationLevel, TopicCollection, TopicPartition from typing import Callable __all__ = ( @@ -52,3 +52,4 @@ class AdminClient: def describe_configs( self, resources: list[ConfigResource], request_timeout: float = -1 ) -> dict[ConfigResource, Future[dict[str, ConfigEntry]]]: ... + def describe_topics(self, topics: TopicCollection) -> dict[str, Future]: ... diff --git a/stubs/confluent_kafka/cimpl.pyi b/stubs/confluent_kafka/cimpl.pyi index 6936d10f0..ed163e5fc 100644 --- a/stubs/confluent_kafka/cimpl.pyi +++ b/stubs/confluent_kafka/cimpl.pyi @@ -48,6 +48,13 @@ class TopicPartition: self.leader_epoch: int | None self.error: KafkaError | None +class TopicCollection: + def __init__( + self, + topic_names: list[str], + ) -> None: + self.topic_names: list[str] + class Message: def offset(self) -> int: ... def timestamp(self) -> tuple[int, int]: ... diff --git a/tests/conftest.py b/tests/conftest.py index 99ba55809..d62663633 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,7 @@ from avro.compatibility import SchemaCompatibilityResult from pathlib import Path from tempfile import mkstemp -from typing import List, Optional +from typing import Optional import json import os @@ -15,19 +15,19 @@ pytest_plugins = "aiohttp.pytest_plugin" KAFKA_BOOTSTRAP_SERVERS_OPT = "--kafka-bootstrap-servers" KAFKA_VERION_OPT = "--kafka-version" -KAFKA_VERSION = "2.7.0" +KAFKA_VERSION = "3.4.1" LOG_DIR_OPT = "--log-dir" VERSION_REGEX = "([0-9]+[.])*[0-9]+" -def pytest_assertrepr_compare(op, left, right) -> Optional[List[str]]: +def pytest_assertrepr_compare(op, left, right) -> Optional[list[str]]: if isinstance(left, SchemaCompatibilityResult) and isinstance(right, SchemaCompatibilityResult) and op in ("==", "!="): lines = ["Comparing SchemaCompatibilityResult instances:"] def pad(depth: int, *msg: str) -> str: return " " * depth + " ".join(msg) - def list_details(header: str, depth: int, items: List[str]) -> None: + def list_details(header: str, depth: int, items: list[str]) -> None: qty = len(items) if qty == 1: @@ -56,7 +56,7 @@ def compatibility_details(header: str, depth: int, obj: SchemaCompatibilityResul return None -def split_by_comma(arg: str) -> List[str]: +def split_by_comma(arg: str) -> list[str]: return arg.split(",") diff --git a/tests/integration/backup/test_avro_export.py b/tests/integration/backup/test_avro_export.py index 951cc2e57..041023580 100644 --- a/tests/integration/backup/test_avro_export.py +++ b/tests/integration/backup/test_avro_export.py @@ -12,7 +12,7 @@ from pathlib import Path from tests.integration.utils.cluster import RegistryDescription from tests.integration.utils.kafka_server import KafkaServers -from typing import Any, Dict +from typing import Any import base64 import json @@ -73,7 +73,7 @@ EXPECTED_COMPATIBILITY_CHANGE = {"compatibilityLevel": "NONE"} -async def insert_data(c: Client, schemaType: str, subject: str, data: Dict[str, Any]) -> None: +async def insert_data(c: Client, schemaType: str, subject: str, data: dict[str, Any]) -> None: schema_string = json.dumps(data) res = await c.post( f"subjects/{subject}/versions", @@ -83,7 +83,7 @@ async def insert_data(c: Client, schemaType: str, subject: str, data: Dict[str, assert "id" in res.json() -async def insert_compatibility_level_change(c: Client, subject: str, data: Dict[str, Any]) -> None: +async def insert_compatibility_level_change(c: Client, subject: str, data: dict[str, Any]) -> None: res = await c.put( f"config/{subject}", json=data, diff --git a/tests/integration/backup/test_v3_backup.py b/tests/integration/backup/test_v3_backup.py index 8e01365ed..6f2e5df35 100644 --- a/tests/integration/backup/test_v3_backup.py +++ b/tests/integration/backup/test_v3_backup.py @@ -4,7 +4,8 @@ """ from __future__ import annotations -from aiokafka.errors import UnknownTopicOrPartitionError +from aiokafka.errors import InvalidReplicationFactorError, UnknownTopicOrPartitionError +from collections.abc import Iterator from confluent_kafka import Message, TopicPartition from confluent_kafka.admin import NewTopic from dataclasses import fields @@ -27,7 +28,7 @@ from tempfile import mkdtemp from tests.integration.utils.cluster import RegistryDescription from tests.integration.utils.kafka_server import KafkaServers -from typing import Iterator, NoReturn +from typing import NoReturn from unittest.mock import patch import datetime @@ -118,14 +119,13 @@ def test_roundtrip_from_kafka_state( admin_client.update_topic_config(new_topic.topic, {"max.message.bytes": "999"}) # Populate topic. - producer.send( + first_record_fut = producer.send( new_topic.topic, key=b"bar", value=b"foo", partition=0, - timestamp=1683474641, ) - producer.send( + second_record_fut = producer.send( new_topic.topic, key=b"foo", value=b"bar", @@ -134,10 +134,12 @@ def test_roundtrip_from_kafka_state( ("some-header", b"some header value"), ("other-header", b"some other header value"), ], - timestamp=1683474657, ) producer.flush() + first_message_timestamp = first_record_fut.result(timeout=5).timestamp()[1] + second_message_timestamp = second_record_fut.result(timeout=5).timestamp()[1] + topic_config = get_topic_configurations(admin_client, new_topic.topic, {ConfigSource.DYNAMIC_TOPIC_CONFIG}) # Execute backup creation. @@ -211,7 +213,7 @@ def test_roundtrip_from_kafka_state( # Note: This might be unreliable due to not using idempotent producer, i.e. we have # no guarantee against duplicates currently. assert first_record.offset() == 0 - assert first_record.timestamp()[1] == 1683474641 + assert first_record.timestamp()[1] == first_message_timestamp assert first_record.timestamp()[0] == Timestamp.CREATE_TIME assert first_record.key() == b"bar" assert first_record.value() == b"foo" @@ -222,7 +224,7 @@ def test_roundtrip_from_kafka_state( assert second_record.topic() == new_topic.topic assert second_record.partition() == partition assert second_record.offset() == 1 - assert second_record.timestamp()[1] == 1683474657 + assert second_record.timestamp()[1] == second_message_timestamp assert second_record.timestamp()[0] == Timestamp.CREATE_TIME assert second_record.key() == b"foo" assert second_record.value() == b"bar" @@ -696,6 +698,56 @@ def __exit__(self, exc_type, exc_value, exc_traceback): ) +def test_backup_restoration_override_replication_factor( + admin_client: KafkaAdminClient, + kafka_servers: KafkaServers, + producer: KafkaProducer, + new_topic: NewTopic, +) -> None: + backup_directory = Path(__file__).parent.parent.resolve() / "test_data" / "backup_v3_single_partition" / new_topic.topic + metadata_path = backup_directory / f"{new_topic.topic}.metadata" + config = set_config_defaults( + { + "bootstrap_uri": kafka_servers.bootstrap_servers, + } + ) + + # pupulate the topic and create a backup + for i in range(10): + producer.send( + new_topic.topic, + key=f"message-key-{i}", + value=f"message-value-{i}-" + 1000 * "X", + ) + producer.flush() + api.create_backup( + config=config, + backup_location=backup_directory, + topic_name=TopicName(new_topic.topic), + version=BackupVersion.V3, + replication_factor=6, + ) + + # make sure topic doesn't exist beforehand. + _delete_topic(admin_client, new_topic.topic) + + # assert that the restore would fail without the replication factor override + with pytest.raises(InvalidReplicationFactorError): + api.restore_backup( + config=config, + backup_location=metadata_path, + topic_name=TopicName(new_topic.topic), + ) + + # finally restore the backup with override + api.restore_backup( + config=config, + backup_location=metadata_path, + topic_name=TopicName(new_topic.topic), + override_replication_factor=1, + ) + + def no_color_env() -> dict[str, str]: env = os.environ.copy() try: diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ecc52470a..1673445ba 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -9,6 +9,7 @@ from _pytest.fixtures import SubRequest from aiohttp.pytest_plugin import AiohttpClient from aiohttp.test_utils import TestClient +from collections.abc import AsyncGenerator, AsyncIterator, Iterator from confluent_kafka.admin import NewTopic from contextlib import ExitStack from dataclasses import asdict @@ -35,7 +36,6 @@ from tests.integration.utils.synchronization import lock_path_for from tests.integration.utils.zookeeper import configure_and_start_zk from tests.utils import repeat_until_successful_request -from typing import AsyncGenerator, AsyncIterator, Iterator from urllib.parse import urlparse import asyncio @@ -79,7 +79,7 @@ def fixture_kafka_description(request: SubRequest) -> KafkaDescription: kafka_tgz=RUNTIME_DIR / kafka_tgz, install_dir=kafka_dir, download_url=kafka_url, - protocol_version="2.7", + protocol_version="3.4.1", ) diff --git a/tests/integration/schema_registry/test_jsonschema_references.py b/tests/integration/schema_registry/test_jsonschema_references.py new file mode 100644 index 000000000..9b1616046 --- /dev/null +++ b/tests/integration/schema_registry/test_jsonschema_references.py @@ -0,0 +1,1078 @@ +"""Copyright (c) 2023 Aiven Ltd See LICENSE for details This version of the tests applies the same schemas as in +test_jsonschema.py, but these schemas are embedded within an additional helper schema, which is then referenced by +another schema. This setup allows us to test the behavior of JSON schema references""" +from jsonschema import Draft7Validator +from karapace.client import Client +from karapace.compatibility import CompatibilityModes +from karapace.schema_reader import SchemaType +from tests.schemas.json_schemas import ( + A_DINT_B_DINT_OBJECT_SCHEMA, + A_DINT_B_INT_OBJECT_SCHEMA, + A_DINT_B_NUM_C_DINT_OBJECT_SCHEMA, + A_DINT_B_NUM_OBJECT_SCHEMA, + A_DINT_OBJECT_SCHEMA, + A_INT_B_DINT_OBJECT_SCHEMA, + A_INT_B_DINT_REQUIRED_OBJECT_SCHEMA, + A_INT_B_INT_OBJECT_SCHEMA, + A_INT_B_INT_REQUIRED_OBJECT_SCHEMA, + A_INT_OBJECT_SCHEMA, + A_INT_OPEN_OBJECT_SCHEMA, + A_OBJECT_SCHEMA, + ARRAY_OF_INT_SCHEMA, + ARRAY_OF_NUMBER_SCHEMA, + ARRAY_OF_POSITIVE_INTEGER, + ARRAY_OF_POSITIVE_INTEGER_THROUGH_REF, + ARRAY_OF_STRING_SCHEMA, + ARRAY_SCHEMA, + B_DINT_OPEN_OBJECT_SCHEMA, + B_INT_OBJECT_SCHEMA, + B_INT_OPEN_OBJECT_SCHEMA, + B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + B_NUM_C_INT_OBJECT_SCHEMA, + B_NUM_C_INT_OPEN_OBJECT_SCHEMA, + BOOLEAN_SCHEMA, + EMPTY_OBJECT_SCHEMA, + EMPTY_SCHEMA, + ENUM_AB_SCHEMA, + ENUM_ABC_SCHEMA, + ENUM_BC_SCHEMA, + EXCLUSIVE_MAXIMUM_DECREASED_INTEGER_SCHEMA, + EXCLUSIVE_MAXIMUM_DECREASED_NUMBER_SCHEMA, + EXCLUSIVE_MAXIMUM_INTEGER_SCHEMA, + EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + EXCLUSIVE_MINIMUM_INCREASED_INTEGER_SCHEMA, + EXCLUSIVE_MINIMUM_INCREASED_NUMBER_SCHEMA, + EXCLUSIVE_MINIMUM_INTEGER_SCHEMA, + EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + INT_SCHEMA, + MAX_ITEMS_DECREASED_SCHEMA, + MAX_ITEMS_SCHEMA, + MAX_LENGTH_DECREASED_SCHEMA, + MAX_LENGTH_SCHEMA, + MAX_PROPERTIES_DECREASED_SCHEMA, + MAX_PROPERTIES_SCHEMA, + MAXIMUM_DECREASED_INTEGER_SCHEMA, + MAXIMUM_DECREASED_NUMBER_SCHEMA, + MAXIMUM_INTEGER_SCHEMA, + MAXIMUM_NUMBER_SCHEMA, + MIN_ITEMS_INCREASED_SCHEMA, + MIN_ITEMS_SCHEMA, + MIN_LENGTH_INCREASED_SCHEMA, + MIN_LENGTH_SCHEMA, + MIN_PATTERN_SCHEMA, + MIN_PATTERN_STRICT_SCHEMA, + MIN_PROPERTIES_INCREASED_SCHEMA, + MIN_PROPERTIES_SCHEMA, + MINIMUM_INCREASED_INTEGER_SCHEMA, + MINIMUM_INCREASED_NUMBER_SCHEMA, + MINIMUM_INTEGER_SCHEMA, + MINIMUM_NUMBER_SCHEMA, + NOT_OF_EMPTY_SCHEMA, + NOT_OF_TRUE_SCHEMA, + NUMBER_SCHEMA, + OBJECT_SCHEMA, + ONEOF_ARRAY_A_DINT_B_NUM_SCHEMA, + ONEOF_ARRAY_B_NUM_C_DINT_OPEN_SCHEMA, + ONEOF_ARRAY_B_NUM_C_INT_SCHEMA, + ONEOF_INT_SCHEMA, + ONEOF_NUMBER_SCHEMA, + ONEOF_STRING_INT_SCHEMA, + ONEOF_STRING_SCHEMA, + PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + STRING_SCHEMA, + TUPLE_OF_INT_INT_OPEN_SCHEMA, + TUPLE_OF_INT_INT_SCHEMA, + TUPLE_OF_INT_OPEN_SCHEMA, + TUPLE_OF_INT_SCHEMA, + TUPLE_OF_INT_WITH_ADDITIONAL_INT_SCHEMA, + TYPES_STRING_INT_SCHEMA, + TYPES_STRING_SCHEMA, +) +from tests.utils import new_random_name + +import json + + +async def debugging_details( + newer: Draft7Validator, + older: Draft7Validator, + client: Client, + subject: str, +) -> str: + newer_schema = json.dumps(newer.schema) + older_schema = json.dumps(older.schema) + config_res = await client.get(f"config/{subject}?defaultToGlobal=true") + config = config_res.json() + return f"subject={subject} newer={newer_schema} older={older_schema} compatibility={config}" + + +async def sainty_check(client: Client, subject: str, compatibility_mode: CompatibilityModes) -> None: + # Sanity check. The compatibility must be explicitly set because any + # difference can result in unexpected errors. + subject_config_res = await client.get(f"config/{subject}_dependency?defaultToGlobal=true") + subject_config = subject_config_res.json() + assert subject_config["compatibilityLevel"] == compatibility_mode.value + subject_config_res = await client.get(f"config/{subject}?defaultToGlobal=true") + subject_config = subject_config_res.json() + assert subject_config["compatibilityLevel"] == compatibility_mode.value + + +async def not_schemas_are_compatible( + newer: Draft7Validator, + older: Draft7Validator, + client: Client, + compatibility_mode: CompatibilityModes, +) -> None: + subject = new_random_name("subject") + + # sanity check + subject_res = await client.get(f"subjects/{subject}/versions") + assert subject_res.status_code == 404, "random subject should no exist {subject}" + subject_res = await client.get(f"subjects/{subject}_dependency/versions") + assert subject_res.status_code == 404, "random subject should no exist {subject}" + + template_schema = { + "$id": "https://example.com/dependency.schema.json", + "title": "Dependency", + } + older_schema = dict(template_schema) + older_schema.update(older.schema) + older_dependency_res = await client.post( + f"subjects/{subject}_dependency/versions", + json={ + "schema": json.dumps(older_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + }, + ) + assert older_dependency_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + assert "id" in older_dependency_res.json(), await debugging_details(newer, older, client, f"{subject}_dependency") + + main_schema = { + "$id": "https://example.com/main.schema.json", + "title": "Main", + "type": "object", + "properties": { + "x1": {"type": "string"}, + "x2": {"type": "string"}, + "x3": {"$ref": "https://example.com/dependency.schema.json"}, + }, + "required": ["x1", "x2", "x3"], + } + + main_res = await client.post( + f"subjects/{subject}/versions", + json={ + "schema": json.dumps(main_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + "references": [{"name": "dependency.schema.json", "subject": f"{subject}_dependency", "version": 1}], + }, + ) + + assert main_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + assert "id" in main_res.json(), await debugging_details(newer, older, client, f"{subject}_dependency") + + # enforce the target compatibility mode. not using the global setting + # because that interfere with parallel runs. + subject_config_res = await client.put(f"config/{subject}", json={"compatibility": compatibility_mode.value}) + assert subject_config_res.status_code == 200 + subject_config_res = await client.put(f"config/{subject}_dependency", json={"compatibility": compatibility_mode.value}) + assert subject_config_res.status_code == 200 + + newer_schema = dict(template_schema) + newer_schema.update(newer.schema) + newer_res = await client.post( + f"subjects/{subject}_dependency/versions", + json={ + "schema": json.dumps(newer_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + }, + ) + assert newer_res.status_code != 200, await debugging_details(newer, older, client, f"{subject}_dependency") + + await sainty_check(client, subject, compatibility_mode) + + +async def schemas_are_compatible( + client: Client, + newer: Draft7Validator, + older: Draft7Validator, + compatibility_mode: CompatibilityModes, +) -> None: + subject = new_random_name("subject") + + # sanity check + subject_res = await client.get(f"subjects/{subject}/versions") + assert subject_res.status_code == 404, "random subject should no exist {subject}" + subject_res = await client.get(f"subjects/{subject}_dependency/versions") + assert subject_res.status_code == 404, "random subject should no exist {subject}" + + template_schema = { + "$id": "https://example.com/dependency.schema.json", + "title": "Dependency", + } + older_schema = dict(template_schema) + older_schema.update(older.schema) + older_dependency_res = await client.post( + f"subjects/{subject}_dependency/versions", + json={ + "schema": json.dumps(older_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + }, + ) + assert older_dependency_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + assert "id" in older_dependency_res.json(), await debugging_details(newer, older, client, f"{subject}_dependency") + + main_schema = { + "$id": "https://example.com/main.schema.json", + "title": "Main", + "type": "object", + "properties": { + "x1": {"type": "string"}, + "x2": {"type": "string"}, + "x3": {"$ref": "https://example.com/dependency.schema.json"}, + }, + "required": ["x1", "x2", "x3"], + } + + main_res = await client.post( + f"subjects/{subject}/versions", + json={ + "schema": json.dumps(main_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + "references": [{"name": "dependency.schema.json", "subject": f"{subject}_dependency", "version": 1}], + }, + ) + + assert main_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + assert "id" in main_res.json(), await debugging_details(newer, older, client, f"{subject}_dependency") + + # enforce the target compatibility mode. not using the global setting + # because that interfere with parallel runs. + subject_config_res = await client.put(f"config/{subject}", json={"compatibility": compatibility_mode.value}) + assert subject_config_res.status_code == 200 + subject_config_res = await client.put(f"config/{subject}_dependency", json={"compatibility": compatibility_mode.value}) + assert subject_config_res.status_code == 200 + + newer_schema = dict(template_schema) + newer_schema.update(newer.schema) + newer_res = await client.post( + f"subjects/{subject}_dependency/versions", + json={ + "schema": json.dumps(newer_schema), + "schemaType": SchemaType.JSONSCHEMA.value, + }, + ) + assert newer_res.status_code == 200, await debugging_details(newer, older, client, f"{subject}_dependency") + # Because the IDs are global, and the same schema is used in multiple + # tests, their order is unknown. + assert older_dependency_res.json()["id"] != newer_res.json()["id"], await debugging_details( + newer, older, client, f"{subject}_dependency" + ) + + await sainty_check(client, subject, compatibility_mode) + + +async def schemas_are_backward_compatible( + reader: Draft7Validator, + writer: Draft7Validator, + client: Client, +) -> None: + await schemas_are_compatible( + # For backwards compatibility the newer schema is the reader + newer=reader, + older=writer, + client=client, + compatibility_mode=CompatibilityModes.BACKWARD, + ) + + +async def not_schemas_are_backward_compatible( + reader: Draft7Validator, + writer: Draft7Validator, + client: Client, +) -> None: + await not_schemas_are_compatible( + # For backwards compatibility the newer schema is the reader + newer=reader, + older=writer, + client=client, + compatibility_mode=CompatibilityModes.BACKWARD, + ) + + +async def test_schemaregistry_schemaregistry_extra_optional_field_with_open_model_is_compatible( + registry_async_client: Client, +) -> None: + # - the newer is an open model, the extra field produced by the older is + # automatically accepted + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=EMPTY_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - the older is a closed model, so the field `b` was never produced, which + # means that the older never produced an invalid value. + # - the newer's `b` field is optional, so the absenced of the field is not + # a problem, and `a` is ignored because of the open model + await schemas_are_backward_compatible( + reader=B_INT_OPEN_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - if the model is closed, then `a` must also be accepted + await schemas_are_backward_compatible( + reader=A_INT_B_INT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # Examples a bit more complex + await schemas_are_backward_compatible( + reader=A_DINT_B_NUM_C_DINT_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_C_DINT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_NUM_C_INT_OPEN_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schemaregistry_extra_field_with_closed_model_is_incompatible( + registry_async_client: Client, +) -> None: + await not_schemas_are_backward_compatible( + reader=NOT_OF_TRUE_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=NOT_OF_EMPTY_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=B_INT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=B_NUM_C_INT_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=B_NUM_C_INT_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_C_DINT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schemaregistry_missing_required_field_is_incompatible(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=A_INT_B_INT_REQUIRED_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=A_INT_B_DINT_REQUIRED_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_giving_a_default_value_for_a_non_required_field_is_compatible( + registry_async_client: Client, +) -> None: + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=A_DINT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=EMPTY_SCHEMA, + writer=A_DINT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_DINT_OPEN_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_INT_B_DINT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_DINT_B_INT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + writer=A_DINT_B_NUM_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_DINT_B_DINT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_DINT_B_DINT_OBJECT_SCHEMA, + writer=EMPTY_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_from_closed_to_open_is_incompatible(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=B_NUM_C_INT_OBJECT_SCHEMA, + writer=B_NUM_C_DINT_OPEN_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_union_with_incompatible_elements(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=ONEOF_ARRAY_B_NUM_C_INT_SCHEMA, + writer=ONEOF_ARRAY_A_DINT_B_NUM_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_union_with_compatible_elements(registry_async_client: Client) -> None: + await schemas_are_backward_compatible( + reader=ONEOF_ARRAY_B_NUM_C_DINT_OPEN_SCHEMA, + writer=ONEOF_ARRAY_A_DINT_B_NUM_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_array_and_tuples_are_incompatible(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=TUPLE_OF_INT_OPEN_SCHEMA, + writer=ARRAY_OF_INT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ARRAY_OF_INT_SCHEMA, + writer=TUPLE_OF_INT_OPEN_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schema_compatibility_successes(registry_async_client: Client) -> None: + # allowing a broader set of values is compatible + await schemas_are_backward_compatible( + reader=NUMBER_SCHEMA, + writer=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ARRAY_OF_NUMBER_SCHEMA, + writer=ARRAY_OF_INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=TUPLE_OF_INT_OPEN_SCHEMA, + writer=TUPLE_OF_INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=TUPLE_OF_INT_WITH_ADDITIONAL_INT_SCHEMA, + writer=TUPLE_OF_INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ENUM_ABC_SCHEMA, + writer=ENUM_AB_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ONEOF_STRING_INT_SCHEMA, + writer=ONEOF_STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ONEOF_STRING_INT_SCHEMA, + writer=STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_INT_OPEN_OBJECT_SCHEMA, + writer=A_INT_B_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # requiring less values is compatible + await schemas_are_backward_compatible( + reader=TUPLE_OF_INT_OPEN_SCHEMA, + writer=TUPLE_OF_INT_INT_OPEN_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=TUPLE_OF_INT_OPEN_SCHEMA, + writer=TUPLE_OF_INT_INT_SCHEMA, + client=registry_async_client, + ) + + # equivalences + await schemas_are_backward_compatible( + reader=ONEOF_STRING_SCHEMA, + writer=STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=STRING_SCHEMA, + writer=ONEOF_STRING_SCHEMA, + client=registry_async_client, + ) + + # new non-required fields is compatible + await schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=EMPTY_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=A_INT_B_INT_OBJECT_SCHEMA, + writer=A_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_type_narrowing_incompabilities(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=INT_SCHEMA, + writer=NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ARRAY_OF_INT_SCHEMA, + writer=ARRAY_OF_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ENUM_AB_SCHEMA, + writer=ENUM_ABC_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ENUM_BC_SCHEMA, + writer=ENUM_ABC_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ONEOF_INT_SCHEMA, + writer=ONEOF_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ONEOF_STRING_SCHEMA, + writer=ONEOF_STRING_INT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=INT_SCHEMA, + writer=ONEOF_STRING_INT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_type_mismatch_incompabilities(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + reader=BOOLEAN_SCHEMA, + writer=INT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=INT_SCHEMA, + writer=BOOLEAN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=STRING_SCHEMA, + writer=BOOLEAN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=STRING_SCHEMA, + writer=INT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ARRAY_OF_INT_SCHEMA, + writer=ARRAY_OF_STRING_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=TUPLE_OF_INT_INT_SCHEMA, + writer=TUPLE_OF_INT_OPEN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=TUPLE_OF_INT_INT_OPEN_SCHEMA, + writer=TUPLE_OF_INT_OPEN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=INT_SCHEMA, + writer=ENUM_AB_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=ENUM_AB_SCHEMA, + writer=INT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schema_restrict_attributes_is_incompatible(registry_async_client: Client) -> None: + await not_schemas_are_backward_compatible( + writer=STRING_SCHEMA, + reader=MAX_LENGTH_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAX_LENGTH_SCHEMA, + reader=MAX_LENGTH_DECREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=STRING_SCHEMA, + reader=MIN_LENGTH_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MIN_LENGTH_SCHEMA, + reader=MIN_LENGTH_INCREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=STRING_SCHEMA, + reader=MIN_PATTERN_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MIN_PATTERN_SCHEMA, + reader=MIN_PATTERN_STRICT_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=MAXIMUM_INTEGER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=NUMBER_SCHEMA, + reader=MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAXIMUM_NUMBER_SCHEMA, + reader=MAXIMUM_DECREASED_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAXIMUM_INTEGER_SCHEMA, + reader=MAXIMUM_DECREASED_INTEGER_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=NUMBER_SCHEMA, + reader=MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MINIMUM_NUMBER_SCHEMA, + reader=MINIMUM_INCREASED_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MINIMUM_INTEGER_SCHEMA, + reader=MINIMUM_INCREASED_INTEGER_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=NUMBER_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_DECREASED_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_DECREASED_INTEGER_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=NUMBER_SCHEMA, + reader=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=INT_SCHEMA, + reader=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + reader=EXCLUSIVE_MINIMUM_INCREASED_NUMBER_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_INTEGER_SCHEMA, + reader=EXCLUSIVE_MINIMUM_INCREASED_INTEGER_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=OBJECT_SCHEMA, + reader=MAX_PROPERTIES_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAX_PROPERTIES_SCHEMA, + reader=MAX_PROPERTIES_DECREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=OBJECT_SCHEMA, + reader=MIN_PROPERTIES_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MIN_PROPERTIES_SCHEMA, + reader=MIN_PROPERTIES_INCREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=ARRAY_SCHEMA, + reader=MAX_ITEMS_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MAX_ITEMS_SCHEMA, + reader=MAX_ITEMS_DECREASED_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + writer=ARRAY_SCHEMA, + reader=MIN_ITEMS_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + writer=MIN_ITEMS_SCHEMA, + reader=MIN_ITEMS_INCREASED_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_schema_broadenning_attributes_is_compatible(registry_async_client: Client) -> None: + await schemas_are_backward_compatible( + writer=MAX_LENGTH_SCHEMA, + reader=STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAX_LENGTH_DECREASED_SCHEMA, + reader=MAX_LENGTH_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MIN_LENGTH_SCHEMA, + reader=STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MIN_LENGTH_INCREASED_SCHEMA, + reader=MIN_LENGTH_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MIN_PATTERN_SCHEMA, + reader=STRING_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MAXIMUM_INTEGER_SCHEMA, + reader=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAXIMUM_NUMBER_SCHEMA, + reader=NUMBER_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAXIMUM_DECREASED_NUMBER_SCHEMA, + reader=MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MINIMUM_INTEGER_SCHEMA, + reader=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MINIMUM_NUMBER_SCHEMA, + reader=NUMBER_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MINIMUM_INCREASED_NUMBER_SCHEMA, + reader=MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_INTEGER_SCHEMA, + reader=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + reader=NUMBER_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MAXIMUM_DECREASED_NUMBER_SCHEMA, + reader=EXCLUSIVE_MAXIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + reader=NUMBER_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_INTEGER_SCHEMA, + reader=INT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=EXCLUSIVE_MINIMUM_INCREASED_NUMBER_SCHEMA, + reader=EXCLUSIVE_MINIMUM_NUMBER_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MAX_PROPERTIES_SCHEMA, + reader=OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAX_PROPERTIES_DECREASED_SCHEMA, + reader=MAX_PROPERTIES_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MIN_PROPERTIES_SCHEMA, + reader=OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MIN_PROPERTIES_INCREASED_SCHEMA, + reader=MIN_PROPERTIES_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MAX_ITEMS_SCHEMA, + reader=ARRAY_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MAX_ITEMS_DECREASED_SCHEMA, + reader=MAX_ITEMS_SCHEMA, + client=registry_async_client, + ) + + await schemas_are_backward_compatible( + writer=MIN_ITEMS_SCHEMA, + reader=ARRAY_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + writer=MIN_ITEMS_INCREASED_SCHEMA, + reader=MIN_ITEMS_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_pattern_properties(registry_async_client: Client): + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + # In backward compatibility mode it is allowed to delete fields + await schemas_are_backward_compatible( + reader=A_OBJECT_SCHEMA, + writer=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + # In backward compatibility mode it is allowed to add optional fields + await schemas_are_backward_compatible( + reader=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + writer=A_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - older accept any value for `a` + # - newer requires it to be an `int`, therefore the other values became + # invalid + await not_schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - older has property `b` + # - newer only accepts properties with match regex `a*` + await not_schemas_are_backward_compatible( + reader=B_INT_OBJECT_SCHEMA, + writer=PATTERN_PROPERTY_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_object_properties(registry_async_client: Client): + await not_schemas_are_backward_compatible( + reader=A_OBJECT_SCHEMA, + writer=OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=A_OBJECT_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=OBJECT_SCHEMA, + client=registry_async_client, + ) + + await not_schemas_are_backward_compatible( + reader=B_INT_OBJECT_SCHEMA, + writer=OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_property_names(registry_async_client: Client): + await schemas_are_backward_compatible( + reader=OBJECT_SCHEMA, + writer=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + await not_schemas_are_backward_compatible( + reader=A_OBJECT_SCHEMA, + writer=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + writer=A_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - older accept any value for `a` + # - newer requires it to be an `int`, therefore the other values became + # invalid + await not_schemas_are_backward_compatible( + reader=A_INT_OBJECT_SCHEMA, + writer=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + client=registry_async_client, + ) + + # - older has property `b` + # - newer only accepts properties with match regex `a*` + await schemas_are_backward_compatible( + reader=PROPERTY_NAMES_ASTAR_OBJECT_SCHEMA, + writer=B_INT_OBJECT_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_type_with_list(registry_async_client: Client): + # "type": [] is treated as a shortcut for anyOf + await schemas_are_backward_compatible( + reader=STRING_SCHEMA, + writer=TYPES_STRING_SCHEMA, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=TYPES_STRING_INT_SCHEMA, + writer=TYPES_STRING_SCHEMA, + client=registry_async_client, + ) + + +async def test_schemaregistry_ref(registry_async_client: Client): + await schemas_are_backward_compatible( + reader=ARRAY_OF_POSITIVE_INTEGER, + writer=ARRAY_OF_POSITIVE_INTEGER_THROUGH_REF, + client=registry_async_client, + ) + await schemas_are_backward_compatible( + reader=ARRAY_OF_POSITIVE_INTEGER_THROUGH_REF, + writer=ARRAY_OF_POSITIVE_INTEGER, + client=registry_async_client, + ) diff --git a/tests/integration/test_dependencies_compatibility_protobuf.py b/tests/integration/test_dependencies_compatibility_protobuf.py index 2bacbdf7b..725611b5c 100644 --- a/tests/integration/test_dependencies_compatibility_protobuf.py +++ b/tests/integration/test_dependencies_compatibility_protobuf.py @@ -183,7 +183,7 @@ async def test_protobuf_schema_compatibility_dependencies(registry_async_client: json={"schemaType": "PROTOBUF", "schema": evolved_schema, "references": evolved_references}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False @pytest.mark.parametrize("trail", ["", "/"]) @@ -271,7 +271,7 @@ async def test_protobuf_schema_compatibility_dependencies1(registry_async_client json={"schemaType": "PROTOBUF", "schema": evolved_schema, "references": evolved_references}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False # Do compatibility check when message field is altered from referenced type to google type @@ -339,7 +339,7 @@ async def test_protobuf_schema_compatibility_dependencies1g(registry_async_clien json={"schemaType": "PROTOBUF", "schema": evolved_schema}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False # Do compatibility check when message field is altered from google type to referenced type @@ -407,7 +407,7 @@ async def test_protobuf_schema_compatibility_dependencies1g_otherway(registry_as json={"schemaType": "PROTOBUF", "schema": evolved_schema, "references": container_references}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False @pytest.mark.parametrize("trail", ["", "/"]) @@ -491,7 +491,7 @@ async def test_protobuf_schema_compatibility_dependencies2(registry_async_client json={"schemaType": "PROTOBUF", "schema": evolved_schema, "references": evolved_references}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False SIMPLE_SCHEMA = """\ diff --git a/tests/integration/test_health_check.py b/tests/integration/test_health_check.py new file mode 100644 index 000000000..c4958651e --- /dev/null +++ b/tests/integration/test_health_check.py @@ -0,0 +1,27 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from karapace.client import Client +from karapace.kafka.admin import KafkaAdminClient +from tenacity import retry, stop_after_delay, wait_fixed +from tests.integration.utils.cluster import RegistryDescription + +import http + + +async def test_health_check( + registry_cluster: RegistryDescription, registry_async_client: Client, admin_client: KafkaAdminClient +) -> None: + res = await registry_async_client.get("/_health") + assert res.ok + + admin_client.delete_topic(registry_cluster.schemas_topic) + + @retry(stop=stop_after_delay(10), wait=wait_fixed(1), reraise=True) + async def check_health(): + res = await registry_async_client.get("/_health") + assert res.status_code == http.HTTPStatus.SERVICE_UNAVAILABLE, "should report unhealthy after topic has been deleted" + + await check_health() diff --git a/tests/integration/test_json_references.py b/tests/integration/test_json_references.py new file mode 100644 index 000000000..69084729c --- /dev/null +++ b/tests/integration/test_json_references.py @@ -0,0 +1,229 @@ +""" +karapace - json schema (with references) tests + +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" + +from karapace.client import Client, Result +from tests.utils import create_subject_name_factory + +import json + +baseurl = "http://localhost:8081" + +# country.schema.json +SCHEMA_COUNTRY = { + "$id": "https://example.com/country.schema.json", + "title": "Country", + "type": "object", + "description": "A country of registration", + "properties": {"name": {"type": "string"}, "code": {"type": "string"}}, + "required": ["name", "code"], +} + +# address.schema.json +SCHEMA_ADDRESS = { + "$id": "https://example.com/address.schema.json", + "title": "Address", + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + "postalCode": {"type": "string"}, + "country": {"$ref": "https://example.com/country.schema.json"}, + }, + "required": ["street", "city", "postalCode", "country"], +} + +# job.schema.json +SCHEMA_JOB = { + "$id": "https://example.com/job.schema.json", + "title": "Job", + "type": "object", + "properties": {"title": {"type": "string"}, "salary": {"type": "number"}}, + "required": ["title", "salary"], +} + +# person.schema.json +SCHEMA_PERSON = { + "$id": "https://example.com/person.schema.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": {"$ref": "https://example.com/address.schema.json"}, + "job": {"$ref": "https://example.com/job.schema.json"}, + }, + "required": ["name", "age", "address", "job"], +} + +SCHEMA_PERSON_AGE_INT_LONG = { + "$id": "https://example.com/person.schema.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": {"$ref": "https://example.com/address.schema.json"}, + "job": {"$ref": "https://example.com/job.schema.json"}, + }, + "required": ["name", "age", "address", "job"], +} + +SCHEMA_PERSON_AGE_LONG_STRING = { + "$id": "https://example.com/person.schema.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "string"}, + "address": {"$ref": "https://example.com/address.schema.json"}, + "job": {"$ref": "https://example.com/job.schema.json"}, + }, + "required": ["name", "age", "address", "job"], +} + +SCHEMA_ADDRESS_INCOMPATIBLE = { + "$id": "https://example.com/address2.schema.json", + "title": "ChangedAddress", + "type": "object", + "properties": { + "street2": {"type": "string"}, + "city": {"type": "string"}, + "postalCode": {"type": "string"}, + "country": {"$ref": "https://example.com/country.schema.json"}, + }, + "required": ["street", "city", "postalCode", "country"], +} + + +def address_references(subject_prefix: str) -> list: + return [{"name": "country.schema.json", "subject": f"{subject_prefix}country", "version": 1}] + + +def person_references(subject_prefix: str) -> list: + return [ + {"name": "address.schema.json", "subject": f"{subject_prefix}address", "version": 1}, + {"name": "job.schema.json", "subject": f"{subject_prefix}job", "version": 1}, + ] + + +def stored_person_subject(subject_prefix: str, subject_id: int) -> dict: + return { + "id": subject_id, + "references": [ + {"name": "address.schema.json", "subject": f"{subject_prefix}address", "version": 1}, + {"name": "job.schema.json", "subject": f"{subject_prefix}job", "version": 1}, + ], + "schema": SCHEMA_PERSON, + "schemaType": "JSON", + "subject": f"{subject_prefix}person", + "version": 1, + } + + +async def basic_json_references_fill_test(registry_async_client: Client, subject_prefix: str) -> Result: + res = await registry_async_client.post( + f"subjects/{subject_prefix}country/versions", json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_COUNTRY)} + ) + assert res.status_code == 200 + assert "id" in res.json() + + res = await registry_async_client.post( + f"subjects/{subject_prefix}address/versions", + json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_ADDRESS), "references": address_references(subject_prefix)}, + ) + assert res.status_code == 200 + assert "id" in res.json() + address_id = res.json()["id"] + + # Check if the schema has now been registered under the subject + + res = await registry_async_client.post( + f"subjects/{subject_prefix}address", + json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_ADDRESS), "references": address_references(subject_prefix)}, + ) + assert res.status_code == 200 + assert "subject" in res.json() + assert "id" in res.json() + assert address_id == res.json()["id"] + assert "version" in res.json() + assert "schema" in res.json() + + res = await registry_async_client.post( + f"subjects/{subject_prefix}job/versions", json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_JOB)} + ) + assert res.status_code == 200 + assert "id" in res.json() + res = await registry_async_client.post( + f"subjects/{subject_prefix}person/versions", + json={"schemaType": "JSON", "schema": json.dumps(SCHEMA_PERSON), "references": person_references(subject_prefix)}, + ) + assert res.status_code == 200 + assert "id" in res.json() + return res + + +async def test_basic_json_references(registry_async_client: Client) -> None: + subject_prefix = create_subject_name_factory("basic-json-references-")() + res = await basic_json_references_fill_test(registry_async_client, subject_prefix) + person_id = res.json()["id"] + res = await registry_async_client.get(f"subjects/{subject_prefix}country/versions/latest") + assert res.status_code == 200 + res = await registry_async_client.get(f"subjects/{subject_prefix}person/versions/latest") + assert res.status_code == 200 + r = res.json() + r["schema"] = json.loads(r["schema"]) + assert r == stored_person_subject(subject_prefix, person_id) + + +async def test_json_references_compatibility(registry_async_client: Client) -> None: + subject_prefix = create_subject_name_factory("json-references-compatibility-")() + await basic_json_references_fill_test(registry_async_client, subject_prefix) + + res = await registry_async_client.post( + f"compatibility/subjects/{subject_prefix}person/versions/latest", + json={ + "schemaType": "JSON", + "schema": json.dumps(SCHEMA_PERSON_AGE_INT_LONG), + "references": person_references(subject_prefix), + }, + ) + assert res.status_code == 200 + assert res.json() == {"is_compatible": True} + res = await registry_async_client.post( + f"compatibility/subjects/{subject_prefix}person/versions/latest", + json={ + "schemaType": "JSON", + "schema": json.dumps(SCHEMA_PERSON_AGE_LONG_STRING), + "references": person_references(subject_prefix), + }, + ) + assert res.status_code == 200 + assert res.json() == { + "is_compatible": False, + "messages": ["type Instance.STRING is not compatible with type Instance.INTEGER"], + } + + +async def test_json_incompatible_name_references(registry_async_client: Client) -> None: + subject_prefix = create_subject_name_factory("json-references-incompatible-name-")() + await basic_json_references_fill_test(registry_async_client, subject_prefix) + res = await registry_async_client.post( + f"subjects/{subject_prefix}address/versions", + json={ + "schemaType": "JSON", + "schema": json.dumps(SCHEMA_ADDRESS_INCOMPATIBLE), + "references": address_references(subject_prefix), + }, + ) + assert res.status_code == 409 + msg = ( + "Incompatible schema, compatibility_mode=BACKWARD. Incompatibilities: Restricting acceptable values of " + "properties is an incompatible change. The following properties street2 accepted any value because of the " + "lack of validation (the object schema had neither patternProperties nor additionalProperties), " + "now these values are restricted." + ) + assert res.json()["message"] == msg diff --git a/tests/integration/test_karapace.py b/tests/integration/test_karapace.py index c6352ecfd..281cd7338 100644 --- a/tests/integration/test_karapace.py +++ b/tests/integration/test_karapace.py @@ -2,13 +2,13 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Iterator from contextlib import closing, contextmanager, ExitStack from karapace.config import set_config_defaults from pathlib import Path from tests.integration.utils.kafka_server import KafkaServers from tests.integration.utils.process import stop_process from tests.utils import popen_karapace_all -from typing import Iterator import json import socket diff --git a/tests/integration/test_rest.py b/tests/integration/test_rest.py index e4949b43d..ee504366b 100644 --- a/tests/integration/test_rest.py +++ b/tests/integration/test_rest.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from collections.abc import Mapping from dataclasses import dataclass from karapace.client import Client from karapace.kafka.admin import KafkaAdminClient @@ -23,7 +24,7 @@ test_objects_avro_evolution, wait_for_topics, ) -from typing import Any, Mapping +from typing import Any import asyncio import base64 diff --git a/tests/integration/test_rest_consumer.py b/tests/integration/test_rest_consumer.py index 1c5f6083a..f0003dbdd 100644 --- a/tests/integration/test_rest_consumer.py +++ b/tests/integration/test_rest_consumer.py @@ -167,6 +167,16 @@ async def test_subscription(rest_async_client, admin_client, producer, trail): res = await rest_async_client.post(assign_path, headers=REST_HEADERS["json"], json=assign_payload) assert res.status_code == 409, "Expecting status code 409 on assign after subscribe on the same consumer instance" + # topics parameter is expected to be array, 4xx error returned + res = await rest_async_client.post(sub_path, json={"topics": topic_name}, headers=REST_HEADERS["json"]) + assert res.status_code == 422, "Expecting status code 422 on subscription update with invalid topics param" + + # topic pattern parameter is expected to be a string, 4xx error returned + res = await rest_async_client.post( + sub_path, json={"topic_pattern": ["not", "a", "string"]}, headers=REST_HEADERS["json"] + ) + assert res.status_code == 422, "Expecting status code 422 on subscription update with invalid topics param" + @pytest.mark.parametrize("trail", ["", "/"]) async def test_seek(rest_async_client, admin_client, trail): diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index dd0502c89..668bec657 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -21,7 +21,6 @@ create_subject_name_factory, repeat_until_successful_request, ) -from typing import List, Tuple import asyncio import json @@ -333,7 +332,8 @@ async def test_compatibility_endpoint(registry_async_client: Client, trail: str) json={"schema": json.dumps(schema)}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False + assert res.json().get("messages") == ["reader type: string not compatible with writer type: int"] @pytest.mark.parametrize("trail", ["", "/"]) @@ -537,7 +537,7 @@ def _test_cases(): json={"schema": json.dumps(schema)}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": expected} + assert res.json().get("is_compatible") == expected @pytest.mark.parametrize("trail", ["", "/"]) @@ -1060,7 +1060,7 @@ async def test_transitive_compatibility(registry_async_client: Client) -> None: assert res_json["error_code"] == 409 -async def assert_schema_versions(client: Client, trail: str, schema_id: int, expected: List[Tuple[str, int]]) -> None: +async def assert_schema_versions(client: Client, trail: str, schema_id: int, expected: list[tuple[str, int]]) -> None: """ Calls /schemas/ids/{schema_id}/versions and asserts the expected results were in the response. """ @@ -1084,7 +1084,7 @@ async def assert_schema_versions_failed(client: Client, trail: str, schema_id: i async def register_schema( registry_async_client: Client, trail: str, subject: str, schema_str: str, schema_type: SchemaType = SchemaType.AVRO -) -> Tuple[int, int]: +) -> tuple[int, int]: # Register to get the id payload = {"schema": schema_str} if schema_type == SchemaType.JSONSCHEMA: @@ -3244,7 +3244,7 @@ async def test_schema_non_compliant_name_in_existing( json={"schema": json.dumps(evolved_schema)}, ) assert res.status_code == 200 - assert not res.json().get("is_compatible") + assert res.json().get("is_compatible") is False # Post evolved schema, should not be compatible and rejected. res = await registry_async_client.post( @@ -3254,7 +3254,10 @@ async def test_schema_non_compliant_name_in_existing( assert res.status_code == 409 assert res.json() == { "error_code": 409, - "message": "Incompatible schema, compatibility_mode=BACKWARD expected: compliant_name_test.test-schema", + "message": ( + "Incompatible schema, compatibility_mode=BACKWARD. " + "Incompatibilities: expected: compliant_name_test.test-schema" + ), } # Send compatibility configuration for subject that disabled backwards compatibility. diff --git a/tests/integration/test_schema_compatibility.py b/tests/integration/test_schema_compatibility.py new file mode 100644 index 000000000..d71237d7c --- /dev/null +++ b/tests/integration/test_schema_compatibility.py @@ -0,0 +1,235 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" +from __future__ import annotations + +from collections.abc import Coroutine +from dataclasses import dataclass +from karapace.client import Client +from karapace.typing import JsonObject, Subject +from tests.base_testcase import BaseTestCase +from typing import Any, Callable, Final + +import json +import logging +import pytest + +SchemaRegitrationFunc = Callable[[Client, Subject], Coroutine[Any, Any, None]] + +LOG = logging.getLogger(__name__) + +schema_int: Final[JsonObject] = {"type": "record", "name": "schema_name", "fields": [{"type": "int", "name": "field_name"}]} +schema_long: Final[JsonObject] = { + "type": "record", + "name": "schema_name", + "fields": [{"type": "long", "name": "field_name"}], +} +schema_string: Final[JsonObject] = { + "type": "record", + "name": "schema_name", + "fields": [{"type": "string", "name": "field_name"}], +} +schema_double: Final[JsonObject] = { + "type": "record", + "name": "schema_name", + "fields": [{"type": "double", "name": "field_name"}], +} + + +@dataclass +class SchemaCompatibilityTestCase(BaseTestCase): + new_schema: str + compatibility_mode: str + register_baseline_schemas: SchemaRegitrationFunc + expected_is_compatible: bool | None + expected_status_code: int + expected_incompatibilities: list[str] | None + + +async def _register_baseline_schemas_no_incompatibilities(registry_async_client: Client, subject: Subject) -> None: + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_int)}, + ) + assert res.status_code == 200 + + # Changing type from int to long is compatible + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_long)}, + ) + assert res.status_code == 200 + + +async def _register_baseline_schemas_with_incompatibilities(registry_async_client: Client, subject: Subject) -> None: + # Allow registering non backward compatible schemas + await _set_compatibility_mode(registry_async_client, subject, "NONE") + + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_string)}, + ) + assert res.status_code == 200 + + # Changing type from string to double is incompatible + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_double)}, + ) + assert res.status_code == 200 + + +async def _register_baseline_schemas_with_incompatibilities_and_a_deleted_schema( + registry_async_client: Client, subject: Subject +) -> None: + await _register_baseline_schemas_with_incompatibilities(registry_async_client, subject) + + # Register schema + # Changing type from double to long is incompatible + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_long)}, + ) + assert res.status_code == 200 + + # And delete it + res = await registry_async_client.delete(f"subjects/{subject}/versions/latest") + assert res.status_code == 200 + assert res.json() == 3 + + +async def _register_no_baseline_schemas( + registry_async_client: Client, subject: Subject # pylint: disable=unused-argument +) -> None: + pass + + +async def _set_compatibility_mode(registry_async_client: Client, subject: Subject, compatibility_mode: str) -> None: + res = await registry_async_client.put(f"config/{subject}", json={"compatibility": compatibility_mode}) + assert res.status_code == 200 + + +@pytest.mark.parametrize( + "test_case", + [ + # Case 0 + # New schema compatible with all baseline ones (int --> long, long --> long) + # Transitive mode + # --> No incompatibilities are found + SchemaCompatibilityTestCase( + test_name="case0", + compatibility_mode="BACKWARD", + register_baseline_schemas=_register_baseline_schemas_no_incompatibilities, + new_schema=json.dumps(schema_long), + expected_is_compatible=True, + expected_status_code=200, + expected_incompatibilities=None, + ), + # Case 1 + # Same as previous case, but in non-transitive mode + # --> No incompatibilities are found + SchemaCompatibilityTestCase( + test_name="case1", + compatibility_mode="BACKWARD_TRANSITIVE", + register_baseline_schemas=_register_baseline_schemas_no_incompatibilities, + new_schema=json.dumps(schema_long), + expected_is_compatible=True, + expected_status_code=200, + expected_incompatibilities=None, + ), + # Case 2 + # New schema incompatible with both baseline schemas (string --> int, double --> int) + # Non-transitive mode + # --> Incompatibilies are found only against last baseline schema (double --> int) + SchemaCompatibilityTestCase( + test_name="case2", + compatibility_mode="BACKWARD", + register_baseline_schemas=_register_baseline_schemas_with_incompatibilities, + new_schema=json.dumps(schema_int), + expected_is_compatible=False, + expected_status_code=200, + expected_incompatibilities=["reader type: int not compatible with writer type: double"], + ), + # Case 3 + # Same as previous case, but in non-transitive mode + # --> Incompatibilies are found in the first baseline schema (string --> int) + SchemaCompatibilityTestCase( + test_name="case3", + compatibility_mode="BACKWARD_TRANSITIVE", + register_baseline_schemas=_register_baseline_schemas_with_incompatibilities, + new_schema=json.dumps(schema_int), + expected_is_compatible=False, + expected_status_code=200, + expected_incompatibilities=["reader type: int not compatible with writer type: string"], + ), + # Case 4 + # Same as case 2, but with a deleted schema among baseline ones + # Non-transitive mode + # --> The delete schema is ignored + # --> Incompatibilies are found only against last baseline schema (double --> int) + SchemaCompatibilityTestCase( + test_name="case4", + compatibility_mode="BACKWARD", + register_baseline_schemas=_register_baseline_schemas_with_incompatibilities_and_a_deleted_schema, + new_schema=json.dumps(schema_int), + expected_is_compatible=False, + expected_status_code=200, + expected_incompatibilities=["reader type: int not compatible with writer type: double"], + ), + # Case 5 + # Same as case 3, but with a deleted schema among baseline ones + # --> The delete schema is ignored + # --> Incompatibilies are found in the first baseline schema (string --> int) + SchemaCompatibilityTestCase( + test_name="case5", + compatibility_mode="BACKWARD_TRANSITIVE", + register_baseline_schemas=_register_baseline_schemas_with_incompatibilities_and_a_deleted_schema, + new_schema=json.dumps(schema_int), + expected_is_compatible=False, + expected_status_code=200, + expected_incompatibilities=["reader type: int not compatible with writer type: string"], + ), + # Case 6 + # A new schema and no baseline schemas + # Non-transitive mode + # --> No incompatibilities are found + # --> Status code is 404 because `latest` version to check against does not exists + SchemaCompatibilityTestCase( + test_name="case6", + compatibility_mode="BACKWARD", + register_baseline_schemas=_register_no_baseline_schemas, + new_schema=json.dumps(schema_int), + expected_is_compatible=None, + expected_status_code=404, + expected_incompatibilities=None, + ), + # Case 7 + # Same as previous case, but in non-transitive mode + # --> No incompatibilities are found + # --> Status code is 404 because `latest` version to check against does not exists + SchemaCompatibilityTestCase( + test_name="case7", + compatibility_mode="BACKWARD_TRANSITIVE", + register_baseline_schemas=_register_no_baseline_schemas, + new_schema=json.dumps(schema_int), + expected_is_compatible=None, + expected_status_code=404, + expected_incompatibilities=None, + ), + ], +) +async def test_schema_compatibility(test_case: SchemaCompatibilityTestCase, registry_async_client: Client) -> None: + subject = Subject(f"subject_{test_case.test_name}") + + await test_case.register_baseline_schemas(registry_async_client, subject) + await _set_compatibility_mode(registry_async_client, subject, test_case.compatibility_mode) + + LOG.info("Validating new schema: %s", test_case.new_schema) + res = await registry_async_client.post( + f"compatibility/subjects/{subject}/versions/latest", json={"schema": test_case.new_schema} + ) + + assert res.status_code == test_case.expected_status_code + assert res.json().get("is_compatible") == test_case.expected_is_compatible + assert res.json().get("messages") == test_case.expected_incompatibilities diff --git a/tests/integration/test_schema_coordinator.py b/tests/integration/test_schema_coordinator.py index e6ccbc699..5af1d17c0 100644 --- a/tests/integration/test_schema_coordinator.py +++ b/tests/integration/test_schema_coordinator.py @@ -18,13 +18,14 @@ SyncGroupResponse_v0 as SyncGroupResponse, ) from aiokafka.util import create_future, create_task +from collections.abc import AsyncGenerator, Iterator from karapace.coordinator.schema_coordinator import Assignment, SchemaCoordinator, SchemaCoordinatorGroupRebalance from karapace.utils import json_encode from karapace.version import __version__ from tenacity import retry, stop_after_delay, TryAgain, wait_fixed from tests.integration.utils.kafka_server import KafkaServers from tests.utils import new_random_name -from typing import AsyncGenerator, Final, Iterator +from typing import Final from unittest import mock import aiokafka.errors as Errors diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 9eae2b994..55825fb92 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -12,7 +12,7 @@ from karapace.typing import JsonData, SchemaMetadata, SchemaRuleSet from tests.base_testcase import BaseTestCase from tests.utils import create_subject_name_factory -from typing import List, Optional, Union +from typing import Optional, Union import logging import pytest @@ -472,7 +472,7 @@ class TestCaseSchema: schema_type: SchemaType schema_str: str subject: str - references: Optional[List[JsonData]] = None + references: Optional[list[JsonData]] = None expected: int = 200 expected_msg: str = "" expected_error_code: Optional[int] = None @@ -501,7 +501,7 @@ class TestCaseHardDeleteSchema(TestCaseDeleteSchema): @dataclass class ReferenceTestCase(BaseTestCase): - schemas: List[Union[TestCaseSchema, TestCaseDeleteSchema]] + schemas: list[Union[TestCaseSchema, TestCaseDeleteSchema]] # Base case @@ -1123,8 +1123,8 @@ async def test_protobuf_error(registry_async_client: Client) -> None: expected=409, expected_msg=( # ACTUALLY THERE NO MESSAGE_DROP!!! - "Incompatible schema, compatibility_mode=BACKWARD " - "Incompatible modification Modification.MESSAGE_DROP found" + "Incompatible schema, compatibility_mode=BACKWARD. " + "Incompatibilities: Incompatible modification Modification.MESSAGE_DROP found" ), ) print(f"Adding new schema, subject: '{testdata.subject}'\n{testdata.schema_str}") diff --git a/tests/integration/test_schema_reader.py b/tests/integration/test_schema_reader.py index 738f76498..4d00a5581 100644 --- a/tests/integration/test_schema_reader.py +++ b/tests/integration/test_schema_reader.py @@ -18,7 +18,6 @@ from tests.integration.utils.kafka_server import KafkaServers from tests.schemas.json_schemas import FALSE_SCHEMA, TRUE_SCHEMA from tests.utils import create_group_name_factory, create_subject_name_factory, new_random_name, new_topic -from typing import List, Tuple import asyncio import pytest @@ -204,7 +203,7 @@ async def test_regression_config_for_inexisting_object_should_not_throw( @dataclass class DetectKeyFormatCase(BaseTestCase): - raw_msgs: List[Tuple[bytes, bytes]] + raw_msgs: list[tuple[bytes, bytes]] expected: KeyMode diff --git a/tests/integration/test_schema_registry_auth.py b/tests/integration/test_schema_registry_auth.py index 5f780f3ce..89832355f 100644 --- a/tests/integration/test_schema_registry_auth.py +++ b/tests/integration/test_schema_registry_auth.py @@ -15,7 +15,6 @@ test_objects_avro, wait_for_topics, ) -from typing import List from urllib.parse import quote import aiohttp @@ -205,7 +204,7 @@ async def test_sr_ids(registry_async_retry_client_auth: RetryRestClient) -> None async def test_sr_auth_forwarding( - registry_async_auth_pair: List[str], registry_async_retry_client_auth: RetryRestClient + registry_async_auth_pair: list[str], registry_async_retry_client_auth: RetryRestClient ) -> None: auth = aiohttp.BasicAuth("admin", "admin") diff --git a/tests/integration/utils/cluster.py b/tests/integration/utils/cluster.py index 04560b453..0e992499e 100644 --- a/tests/integration/utils/cluster.py +++ b/tests/integration/utils/cluster.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from collections.abc import AsyncIterator from contextlib import asynccontextmanager, ExitStack from dataclasses import dataclass from karapace.config import Config, set_config_defaults, write_config @@ -11,7 +12,6 @@ from tests.integration.utils.network import allocate_port from tests.integration.utils.process import stop_process, wait_for_port_subprocess from tests.utils import new_random_name, popen_karapace_all -from typing import AsyncIterator @dataclass(frozen=True) diff --git a/tests/integration/utils/network.py b/tests/integration/utils/network.py index ef9439e1d..506751012 100644 --- a/tests/integration/utils/network.py +++ b/tests/integration/utils/network.py @@ -2,8 +2,8 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Iterator from contextlib import closing, contextmanager -from typing import Iterator import socket diff --git a/tests/integration/utils/process.py b/tests/integration/utils/process.py index 4c9c93076..e3c36e412 100644 --- a/tests/integration/utils/process.py +++ b/tests/integration/utils/process.py @@ -5,7 +5,7 @@ from karapace.utils import Expiration from subprocess import Popen from tests.integration.utils.network import port_is_listening -from typing import List, Optional +from typing import Optional import os import signal @@ -45,7 +45,7 @@ def stop_process(proc: Optional[Popen]) -> None: pass -def get_java_process_configuration(java_args: List[str]) -> List[str]: +def get_java_process_configuration(java_args: list[str]) -> list[str]: command = [ "/usr/bin/java", "-server", diff --git a/tests/integration/utils/zookeeper.py b/tests/integration/utils/zookeeper.py index cd4aaa00b..5dffcfeca 100644 --- a/tests/integration/utils/zookeeper.py +++ b/tests/integration/utils/zookeeper.py @@ -7,10 +7,9 @@ from tests.integration.utils.config import KafkaDescription, ZKConfig from tests.integration.utils.process import get_java_process_configuration from tests.utils import write_ini -from typing import List -def zk_java_args(cfg_path: Path, kafka_description: KafkaDescription) -> List[str]: +def zk_java_args(cfg_path: Path, kafka_description: KafkaDescription) -> list[str]: msg = f"Couldn't find kafka installation at {kafka_description.install_dir} to run integration tests." assert kafka_description.install_dir.exists(), msg java_args = [ diff --git a/tests/schemas/protobuf.py b/tests/schemas/protobuf.py index 1bd3f05d5..afbb3f890 100644 --- a/tests/schemas/protobuf.py +++ b/tests/schemas/protobuf.py @@ -261,3 +261,21 @@ "lzdGVyLk1ldGFkYXRhEhYKDmNvbXBhbnlfbnVtYmVyGAIgASgJGhYKCE1ldGFkYXRhEgoK" "AmlkGAEgASgJYgZwcm90bzM=" ) + +schema_protobuf_optionals_bin = ( + "Cgp0ZXN0LnByb3RvIqYBCgpEaW1lbnNpb25zEhEKBHNpemUYASABKAFIAIgBARISCgV3aWR0aBgCIAEoAUgBiAEBEhMKBmhlaWdodBgDIAEo" + + "AUgCiAEBEhMKBmxlbmd0aBgEIAEoAUgDiAEBEhMKBndlaWdodBgFIAEoAUgEiAEBQgcKBV9zaXplQggKBl93aWR0aEIJCgdfaGVpZ2h0Qg" + + "kKB19sZW5ndGhCCQoHX3dlaWdodGIGcHJvdG8z" +) + +schema_protobuf_optionals = """\ +syntax = "proto3"; + +message Dimensions { + optional double size = 1; + optional double width = 2; + optional double height = 3; + optional double length = 4; + optional double weight = 5; +} +""" diff --git a/tests/unit/anonymize_schemas/test_anonymize_avro.py b/tests/unit/anonymize_schemas/test_anonymize_avro.py index 9b99e16c5..2dc8870dc 100644 --- a/tests/unit/anonymize_schemas/test_anonymize_avro.py +++ b/tests/unit/anonymize_schemas/test_anonymize_avro.py @@ -5,7 +5,7 @@ See LICENSE for details """ from karapace.anonymize_schemas.anonymize_avro import anonymize -from typing import Dict, Union +from typing import Union import json import pytest @@ -565,6 +565,6 @@ [EMPTY_STR, EMPTY_STR], ], ) -def test_anonymize(test_schema: str, expected_schema: Union[str, Dict[str, str]]): +def test_anonymize(test_schema: str, expected_schema: Union[str, dict[str, str]]): res = anonymize(test_schema) assert res == expected_schema diff --git a/tests/unit/avro_dataclasses/test_introspect.py b/tests/unit/avro_dataclasses/test_introspect.py index e9765609a..b816cf128 100644 --- a/tests/unit/avro_dataclasses/test_introspect.py +++ b/tests/unit/avro_dataclasses/test_introspect.py @@ -2,11 +2,12 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Mapping, Sequence from dataclasses import dataclass, Field, field, fields from enum import Enum from karapace.avro_dataclasses.introspect import field_schema, record_schema, UnsupportedAnnotation from karapace.avro_dataclasses.schema import FieldSchema -from typing import Final, Mapping, Optional, Sequence, Tuple +from typing import Final, Optional import datetime import pytest @@ -35,8 +36,8 @@ class ValidRecord: optional_bytes_field: Optional[bytes] enum_field: Symbols dt_field: datetime.datetime - int_array: Tuple[int, ...] - nested_values: Tuple[Nested, ...] + int_array: tuple[int, ...] + nested_values: tuple[Nested, ...] enum_field_default: Symbols = Symbols.a int_field_default: int = 123 @@ -49,9 +50,9 @@ class ValidRecord: @dataclass class InvalidRecord: any_tuple: tuple - homogenous_short_tuple: Tuple[int] - homogenous_bi_tuple: Tuple[int, int] - homogenous_tri_tuple: Tuple[int, int, int] + homogenous_short_tuple: tuple[int] + homogenous_bi_tuple: tuple[int, int] + homogenous_tri_tuple: tuple[int, int, int] any_list: list any_sequence: Sequence diff --git a/tests/unit/avro_dataclasses/test_models.py b/tests/unit/avro_dataclasses/test_models.py index 85eff7df5..4161b84a5 100644 --- a/tests/unit/avro_dataclasses/test_models.py +++ b/tests/unit/avro_dataclasses/test_models.py @@ -4,7 +4,7 @@ """ from dataclasses import dataclass, field from karapace.avro_dataclasses.models import AvroModel -from typing import List, Optional, Tuple +from typing import Optional import datetime import enum @@ -21,7 +21,7 @@ class Symbol(enum.Enum): @dataclass(frozen=True) class NestedModel: bool_field: bool - values: Tuple[int, ...] + values: tuple[int, ...] @dataclass(frozen=True) @@ -29,14 +29,14 @@ class RecordModel(AvroModel): symbol: Symbol height: int = field(metadata={"type": "long"}) name: str - nested: Tuple[NestedModel, ...] + nested: tuple[NestedModel, ...] dt: datetime.datetime id: uuid.UUID @dataclass(frozen=True) class HasList(AvroModel): - values: List[NestedModel] + values: list[NestedModel] @dataclass(frozen=True) diff --git a/tests/unit/backup/backends/v3/conftest.py b/tests/unit/backup/backends/v3/conftest.py index 18fc33c88..412a3f2ac 100644 --- a/tests/unit/backup/backends/v3/conftest.py +++ b/tests/unit/backup/backends/v3/conftest.py @@ -2,8 +2,8 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Iterator from contextlib import closing -from typing import Iterator import contextlib import io diff --git a/tests/unit/compatibility/test_compatibility.py b/tests/unit/compatibility/test_compatibility.py index 641f7df06..af41aae99 100644 --- a/tests/unit/compatibility/test_compatibility.py +++ b/tests/unit/compatibility/test_compatibility.py @@ -3,17 +3,44 @@ See LICENSE for details """ from avro.compatibility import SchemaCompatibilityType -from karapace.compatibility import check_compatibility, CompatibilityModes +from karapace.compatibility import CompatibilityModes +from karapace.compatibility.schema_compatibility import SchemaCompatibility from karapace.schema_models import SchemaType, ValidatedTypedSchema import json -def test_schema_type_can_change_when_mode_none(): +def test_schema_type_can_change_when_mode_none() -> None: avro_str = json.dumps({"type": "record", "name": "Record1", "fields": [{"name": "field1", "type": "int"}]}) - json_str = '{"type":"array"}' + json_str = '{"type": "array"}' avro_schema = ValidatedTypedSchema.parse(SchemaType.AVRO, avro_str) json_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, json_str) - result = check_compatibility(old_schema=avro_schema, new_schema=json_schema, compatibility_mode=CompatibilityModes.NONE) + result = SchemaCompatibility.check_compatibility( + old_schema=avro_schema, new_schema=json_schema, compatibility_mode=CompatibilityModes.NONE + ) assert result.compatibility is SchemaCompatibilityType.compatible + + +def test_schema_compatible_in_transitive_mode() -> None: + old_json = '{"type": "array", "name": "name_old"}' + new_json = '{"type": "array", "name": "name_new"}' + old_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, old_json) + new_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, new_json) + + result = SchemaCompatibility.check_compatibility( + old_schema=old_schema, new_schema=new_schema, compatibility_mode=CompatibilityModes.FULL_TRANSITIVE + ) + assert result.compatibility is SchemaCompatibilityType.compatible + + +def test_schema_incompatible_in_transitive_mode() -> None: + old_json = '{"type": "array"}' + new_json = '{"type": "integer"}' + old_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, old_json) + new_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, new_json) + + result = SchemaCompatibility.check_compatibility( + old_schema=old_schema, new_schema=new_schema, compatibility_mode=CompatibilityModes.FULL_TRANSITIVE + ) + assert result.compatibility is SchemaCompatibilityType.incompatible diff --git a/tests/unit/test_in_memory_database.py b/tests/unit/test_in_memory_database.py index aa25adf56..a3720940d 100644 --- a/tests/unit/test_in_memory_database.py +++ b/tests/unit/test_in_memory_database.py @@ -5,6 +5,7 @@ from __future__ import annotations from collections import defaultdict +from collections.abc import Iterable, Sequence from confluent_kafka.cimpl import KafkaError from karapace.config import DEFAULTS from karapace.constants import DEFAULT_SCHEMA_TOPIC @@ -17,7 +18,7 @@ from karapace.schema_references import Reference, Referents from karapace.typing import SchemaId, Version from pathlib import Path -from typing import Final, Iterable, Sequence +from typing import Final TEST_DATA_FOLDER: Final = Path("tests/unit/test_data/") diff --git a/tests/unit/test_json_resolver.py b/tests/unit/test_json_resolver.py new file mode 100644 index 000000000..86048dc35 --- /dev/null +++ b/tests/unit/test_json_resolver.py @@ -0,0 +1,43 @@ +""" +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" + +from jsonschema import RefResolver +from karapace.dependency import Dependency +from karapace.schema_models import json_resolver, parse_jsonschema_definition, ValidatedTypedSchema +from karapace.schema_type import SchemaType +from karapace.typing import Subject, Version + + +def test_json_registry_no_dependencies(): + """Test case when there are no dependencies.""" + schema_str = '{"$id": "http://example.com/schema.json"}' + result = json_resolver(schema_str) + assert result is None + + +def test_json_registry_with_single_dependency(): + """Test json_registry with a single dependency.""" + schema_str = '{"$id": "http://example.com/schema.json"}' + dependency_schema = '{"$id": "http://example.com/dependency-schema.json"}' + + # Using the Dependency class from Karapace + dependencies = { + "dep1": Dependency( + name="dep1", + subject=Subject("subj"), + version=Version(1), + target_schema=ValidatedTypedSchema( + schema_type=SchemaType.JSONSCHEMA, + schema_str=dependency_schema, + schema=parse_jsonschema_definition(dependency_schema), + ), + ) + } + + result = json_resolver(schema_str, dependencies) + assert isinstance(result, RefResolver) + assert result.store + assert result.store.get("http://example.com/schema.json") + assert result.store.get("http://example.com/dependency-schema.json") diff --git a/tests/unit/test_protobuf_binary_serialization.py b/tests/unit/test_protobuf_binary_serialization.py index 6950066d3..99bfe375e 100644 --- a/tests/unit/test_protobuf_binary_serialization.py +++ b/tests/unit/test_protobuf_binary_serialization.py @@ -16,6 +16,8 @@ schema_protobuf_nested_message4_bin_protoc, schema_protobuf_oneof, schema_protobuf_oneof_bin, + schema_protobuf_optionals, + schema_protobuf_optionals_bin, schema_protobuf_order_after, schema_protobuf_order_after_bin, schema_protobuf_plain, @@ -89,6 +91,7 @@ (schema_protobuf_references, schema_protobuf_references_bin), (schema_protobuf_references2, schema_protobuf_references2_bin), (schema_protobuf_complex, schema_protobuf_complex_bin), + (schema_protobuf_optionals, schema_protobuf_optionals_bin), ], ) def test_schema_deserialize(schema_plain, schema_serialized): @@ -125,6 +128,7 @@ def test_protoc_serialized_schema_deserialize(schema_plain, schema_serialized): schema_protobuf_references, schema_protobuf_references2, schema_protobuf_complex, + schema_protobuf_optionals, ], ) def test_simple_schema_serialize(schema): diff --git a/tests/unit/test_schema_models.py b/tests/unit/test_schema_models.py index 392738335..313f77daf 100644 --- a/tests/unit/test_schema_models.py +++ b/tests/unit/test_schema_models.py @@ -10,13 +10,13 @@ from karapace.schema_models import parse_avro_schema_definition, SchemaVersion, TypedSchema, Versioner from karapace.schema_type import SchemaType from karapace.typing import Version, VersionTag -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Optional import operator import pytest # Schema versions factory fixture type -SVFCallable = Callable[[None], Callable[[int, Dict[str, Any]], Dict[int, SchemaVersion]]] +SVFCallable = Callable[[None], Callable[[int, dict[str, Any]], dict[int, SchemaVersion]]] class TestVersion: @@ -90,8 +90,8 @@ def schema_versions_factory( self, avro_schema: str, avro_schema_parsed: AvroSchema, - ) -> Callable[[Version, Dict[str, Any]], Dict[Version, SchemaVersion]]: - def schema_versions(version: Version, schema_version_data: Optional[Dict[str, Any]] = None): + ) -> Callable[[Version, dict[str, Any]], dict[Version, SchemaVersion]]: + def schema_versions(version: Version, schema_version_data: Optional[dict[str, Any]] = None): schema_version_data = schema_version_data or dict() base_schema_version_data = dict( subject="test-topic", diff --git a/tests/unit/test_schema_reader.py b/tests/unit/test_schema_reader.py index 5d625931b..552fa0be7 100644 --- a/tests/unit/test_schema_reader.py +++ b/tests/unit/test_schema_reader.py @@ -6,7 +6,7 @@ """ from _pytest.logging import LogCaptureFixture -from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import Future, ThreadPoolExecutor from confluent_kafka import Message from dataclasses import dataclass from karapace.config import DEFAULTS @@ -25,9 +25,10 @@ ) from karapace.schema_type import SchemaType from karapace.typing import SchemaId, Version +from pytest import MonkeyPatch from tests.base_testcase import BaseTestCase from tests.utils import schema_protobuf_invalid_because_corrupted, schema_protobuf_with_invalid_ref -from typing import Callable, List, Tuple +from typing import Callable, Optional from unittest.mock import Mock import confluent_kafka @@ -325,6 +326,84 @@ def test_handle_msg_delete_subject_logs(caplog: LogCaptureFixture) -> None: assert log.message == "Hard delete: version: Version(2) for subject: 'test-subject' did not exist, should have" +@dataclass +class HealthCheckTestCase(BaseTestCase): + current_time: float + consecutive_unexpected_errors: int + consecutive_unexpected_errors_start: float + healthy: bool + check_topic_error: Optional[Exception] = None + + +@pytest.mark.parametrize( + "testcase", + [ + HealthCheckTestCase( + test_name="No errors", + current_time=0, + consecutive_unexpected_errors=0, + consecutive_unexpected_errors_start=0, + healthy=True, + ), + HealthCheckTestCase( + test_name="10 errors in 5 seconds", + current_time=5, + consecutive_unexpected_errors=10, + consecutive_unexpected_errors_start=0, + healthy=True, + ), + HealthCheckTestCase( + test_name="1 error in 20 seconds", + current_time=20, + consecutive_unexpected_errors=1, + consecutive_unexpected_errors_start=0, + healthy=True, + ), + HealthCheckTestCase( + test_name="3 errors in 10 seconds", + current_time=10, + consecutive_unexpected_errors=3, + consecutive_unexpected_errors_start=0, + healthy=False, + ), + HealthCheckTestCase( + test_name="check topic error", + current_time=5, + consecutive_unexpected_errors=1, + consecutive_unexpected_errors_start=0, + healthy=False, + check_topic_error=Exception("Somethings wrong"), + ), + ], +) +async def test_schema_reader_health_check(testcase: HealthCheckTestCase, monkeypatch: MonkeyPatch) -> None: + offset_watcher = OffsetWatcher() + key_formatter_mock = Mock() + admin_client_mock = Mock() + + emtpy_future = Future() + if testcase.check_topic_error: + emtpy_future.set_exception(testcase.check_topic_error) + else: + emtpy_future.set_result(None) + admin_client_mock.describe_topics.return_value = {DEFAULTS["topic_name"]: emtpy_future} + + schema_reader = KafkaSchemaReader( + config=DEFAULTS, + offset_watcher=offset_watcher, + key_formatter=key_formatter_mock, + master_coordinator=None, + database=InMemoryDatabase(), + ) + + monkeypatch.setattr(time, "monotonic", lambda: testcase.current_time) + schema_reader.admin_client = admin_client_mock + schema_reader.consecutive_unexpected_errors = testcase.consecutive_unexpected_errors + schema_reader.consecutive_unexpected_errors_start = testcase.consecutive_unexpected_errors_start + + assert await schema_reader.is_healthy() == testcase.healthy + + @dataclass class KafkaMessageHandlingErrorTestCase(BaseTestCase): key: bytes @@ -336,8 +415,8 @@ class KafkaMessageHandlingErrorTestCase(BaseTestCase): @pytest.fixture(name="schema_reader_with_consumer_messages_factory") -def fixture_schema_reader_with_consumer_messages_factory() -> Callable[[Tuple[List[Message]]], KafkaSchemaReader]: - def factory(consumer_messages: Tuple[List[Message]]) -> KafkaSchemaReader: +def fixture_schema_reader_with_consumer_messages_factory() -> Callable[[tuple[list[Message]]], KafkaSchemaReader]: + def factory(consumer_messages: tuple[list[Message]]) -> KafkaSchemaReader: key_formatter_mock = Mock(spec=KeyFormatter) consumer_mock = Mock(spec=KafkaConsumer) @@ -507,7 +586,7 @@ def factory(key: bytes, value: bytes, offset: int = 1) -> Message: def test_message_error_handling( caplog: LogCaptureFixture, test_case: KafkaMessageHandlingErrorTestCase, - schema_reader_with_consumer_messages_factory: Callable[[Tuple[List[Message]]], KafkaSchemaReader], + schema_reader_with_consumer_messages_factory: Callable[[tuple[list[Message]]], KafkaSchemaReader], message_factory: Callable[[bytes, bytes, int], Message], ) -> None: message = message_factory(key=test_case.key, value=test_case.value) @@ -528,7 +607,7 @@ def test_message_error_handling( def test_message_error_handling_with_invalid_reference_schema_protobuf( caplog: LogCaptureFixture, - schema_reader_with_consumer_messages_factory: Callable[[Tuple[List[Message]]], KafkaSchemaReader], + schema_reader_with_consumer_messages_factory: Callable[[tuple[list[Message]]], KafkaSchemaReader], message_factory: Callable[[bytes, bytes, int], Message], ) -> None: # Given an invalid schema (corrupted) diff --git a/tests/unit/test_schema_registry_api.py b/tests/unit/test_schema_registry_api.py index 6d850f5fc..7fcecd47e 100644 --- a/tests/unit/test_schema_registry_api.py +++ b/tests/unit/test_schema_registry_api.py @@ -14,7 +14,7 @@ import pytest -async def test_validate_schema_request_body(): +async def test_validate_schema_request_body() -> None: controller = KarapaceSchemaRegistryController(config=set_config_defaults(DEFAULTS)) controller._validate_schema_request_body( # pylint: disable=W0212 @@ -30,7 +30,7 @@ async def test_validate_schema_request_body(): assert str(exc_info.value) == "HTTPResponse 422" -async def test_forward_when_not_ready(): +async def test_forward_when_not_ready() -> None: with patch("karapace.schema_registry_apis.KarapaceSchemaRegistry") as schema_registry_class: schema_reader_mock = Mock(spec=KafkaSchemaReader) ready_property_mock = PropertyMock(return_value=False) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 51633376c..9a3a33d73 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -3,37 +3,12 @@ See LICENSE for details """ from _pytest.logging import LogCaptureFixture -from karapace.utils import remove_prefix, shutdown +from karapace.utils import shutdown from unittest.mock import patch import logging -def test_remove_prefix_basic() -> None: - result = remove_prefix("hello world", "hello ") - assert result == "world" - - -def test_remove_prefix_empty_prefix() -> None: - result = remove_prefix("hello world", "") - assert result == "hello world" - - -def test_remove_prefix_prefix_not_in_string() -> None: - result = remove_prefix("hello world", "hey ") - assert result == "hello world" - - -def test_remove_prefix_multiple_occurrences_of_prefix() -> None: - result = remove_prefix("hello hello world", "hello ") - assert result == "hello world" - - -def test_remove_prefix_empty_string() -> None: - result = remove_prefix("", "hello ") - assert result == "" - - def test_shutdown(caplog: LogCaptureFixture) -> None: with caplog.at_level(logging.WARNING, logger="karapace.utils"): with patch("karapace.utils.signal") as mock_signal: diff --git a/tests/utils.py b/tests/utils.py index 3757e0739..191fba348 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -10,7 +10,7 @@ from karapace.utils import Expiration from pathlib import Path from subprocess import Popen -from typing import Any, Callable, IO, List, Union +from typing import Any, Callable, IO, Union from urllib.parse import quote import asyncio @@ -264,7 +264,7 @@ def new_topic(admin_client: KafkaAdminClient, prefix: str = "topic", *, num_part return topic_name -async def wait_for_topics(rest_async_client: Client, topic_names: List[str], timeout: float, sleep: float) -> None: +async def wait_for_topics(rest_async_client: Client, topic_names: list[str], timeout: float, sleep: float) -> None: for topic in topic_names: expiration = Expiration.from_timeout(timeout=timeout) topic_found = False diff --git a/website/README.rst b/website/README.rst index 67e55aeef..c333ba578 100644 --- a/website/README.rst +++ b/website/README.rst @@ -6,7 +6,7 @@ A static HTML site, generated with Sphinx. You can find the website source in th Dependencies ------------ -You need Python 3.8+. Install the dependencies with ``pip``:: +You need Python 3.9+. Install the dependencies with ``pip``:: pip install -r requirements.txt diff --git a/website/source/quickstart.rst b/website/source/quickstart.rst index 6e6ecdba6..f640e68d2 100644 --- a/website/source/quickstart.rst +++ b/website/source/quickstart.rst @@ -60,6 +60,10 @@ Test the compatibility of a schema with the latest schema under subject "test-ke $KARAPACE_REGISTRY_URI/compatibility/subjects/test-key/versions/latest {"is_compatible":true} +NOTE: if the subject's compatibility mode is transitive (BACKWARD_TRANSITIVE, FORWARD_TRANSITIVE or FULL_TRANSITIVE) then the +compatibility is checked not only against the latest schema, but also against all previous schemas, as it would be done +when trying to register the new schema through the `subjects//versions` endpoint. + Get current global backwards compatibility setting value:: $ curl -X GET $KARAPACE_REGISTRY_URI/config