diff --git a/.dockerignore b/.dockerignore index 3766c24b..fe58a30e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,4 @@ scripts venv data +**.DS_Store diff --git a/.env b/.env index 343c3c9a..8c6792ae 100644 --- a/.env +++ b/.env @@ -10,3 +10,13 @@ KEYCLOAK_ADMIN_PASSWORD=password KEYCLOAK_CLIENT_SECRET="QJiOGn09eCEfnqAmcPP2l4vMU8grlmVQ" REDIRECT_URIS=http://${HOSTNAME}/docs/oauth2-redirect POST_LOGOUT_REDIRECT_URIS=http://${HOSTNAME}/aiod-auth/realms/aiod/protocol/openid-connect/logout + +#ELASTICSEARCH +ES_USER=elastic +ES_PASSWORD=changeme +ES_DISCOVERY_TYPE=single-node +ES_ROLE="edit_aiod_resources" +ES_JAVA_OPTS="-Xmx256m -Xms256m" + +#LOGSTASH +LS_JAVA_OPTS="-Xmx256m -Xms256m" diff --git a/.gitignore b/.gitignore index 2910b82d..85c76209 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,18 @@ # Project Specific + # data/ is intended for database data from the mysql container data/ +# Generated Logstash configuration +logstash/config/config/logstash.yml +logstash/config/config/pipelines.yml +logstash/config/pipeline +logstash/config/sql + + + + + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -113,6 +124,7 @@ venv/ ENV/ env.bak/ venv.bak/ +**.DS_Store # Spyder project settings .spyderproject @@ -135,4 +147,4 @@ dmypy.json # Pyre type checker .pyre/ -.vscode \ No newline at end of file +.vscode diff --git a/README.md b/README.md index 2a4dfad0..7cd7fafa 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,11 @@ For development: - Additional 'mysqlclient' dependencies. Please have a look at [their installation instructions] (https://github.com/PyMySQL/mysqlclient#install). +## Production environment + +For production environments elasticsearch recommends -Xss4G and -Xmx8G for the JVM settings.\ +This parameters can be defined in the .env file. +See the [elasticsearch guide](https://www.elastic.co/guide/en/logstash/current/jvm-settings.html). ## Installation diff --git a/connectors/fill-examples.sh b/connectors/fill-examples.sh index df4600ee..3140b614 100755 --- a/connectors/fill-examples.sh +++ b/connectors/fill-examples.sh @@ -16,10 +16,6 @@ python3 connectors/synchronization.py \ -c connectors.example.example.ExampleEducationalResourceConnector \ -w /opt/connectors/data/example/educational_resource -python3 connectors/synchronization.py \ - -c connectors.example.example.ExampleEventConnector \ - -w /opt/connectors/data/example/event - python3 connectors/synchronization.py \ -c connectors.example.example.ExampleExperimentConnector \ -w /opt/connectors/data/example/experiment @@ -40,6 +36,10 @@ python3 connectors/synchronization.py \ -c connectors.example.example.ExamplePersonConnector \ -w /opt/connectors/data/example/person +python3 connectors/synchronization.py \ + -c connectors.example.example.ExampleEventConnector \ + -w /opt/connectors/data/example/event + python3 connectors/synchronization.py \ -c connectors.example.example.ExampleProjectConnector \ -w /opt/connectors/data/example/project @@ -92,4 +92,4 @@ python3 connectors/synchronization.py \ python3 connectors/synchronization.py \ -c connectors.example.enum.EnumConnectorStatus \ - -w /opt/connectors/data/enum/status \ No newline at end of file + -w /opt/connectors/data/enum/status diff --git a/data/elasticsearch/.gitkeep b/data/elasticsearch/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/docker-compose.yaml b/docker-compose.yaml index 8dbfc548..74e720c0 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,3 +1,4 @@ + version: '3.9' services: @@ -45,8 +46,7 @@ services: depends_on: app: condition: service_healthy - - + deletion: build: context: deletion @@ -167,3 +167,66 @@ services: depends_on: app: condition: service_healthy + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:8.8.2 + container_name: elasticsearch + env_file: .env + environment: + - ES_JAVA_OPTS=$ES_JAVA_OPTS + - ELASTIC_USER=$ES_USER + - ELASTIC_PASSWORD=$ES_PASSWORD + - discovery.type=$ES_DISCOVERY_TYPE + ports: + - 9200:9200 + - 9300:9300 + volumes: + - type: bind + source: ./es/elasticsearch.yml + target: /usr/share/elasticsearch/config/elasticsearch.yml + read_only: true + - ./data/elasticsearch:/usr/share/elasticsearch/data + healthcheck: + test: ["CMD-SHELL", "curl -u $ES_USER:$ES_PASSWORD --silent --fail localhost:9200/_cluster/health || exit 1"] + interval: 5s + timeout: 30s + retries: 30 + + es_logstash_setup: + image: ai4eu_server + container_name: es_logstash_setup + env_file: .env + environment: + - MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD + - ES_USER=$ES_USER + - ES_PASSWORD=$ES_PASSWORD + volumes: + - ./src:/app + - ./logstash:/logstash + command: > + /bin/bash -c "python setup/logstash_setup/generate_logstash_config_files.py && + python setup/es_setup/generate_elasticsearch_indices.py" + restart: "no" + depends_on: + elasticsearch: + condition: service_healthy + logstash: + build: + context: logstash/ + dockerfile: Dockerfile + container_name: logstash + env_file: .env + environment: + - LS_JAVA_OPTS=$LS_JAVA_OPTS + ports: + - 5044:5044 + - 5000:5000/tcp + - 5000:5000/udp + - 9600:9600 + volumes: + - ./logstash/config/config:/usr/share/logstash/config:ro + - ./logstash/config/pipeline:/usr/share/logstash/pipeline:ro + - ./logstash/config/sql:/usr/share/logstash/sql:ro + depends_on: + es_logstash_setup: + condition: service_completed_successfully diff --git a/es/elasticsearch.yml b/es/elasticsearch.yml new file mode 100644 index 00000000..b06c1d21 --- /dev/null +++ b/es/elasticsearch.yml @@ -0,0 +1,13 @@ +--- +## Default Elasticsearch configuration from Elasticsearch base image. +## https://github.com/elastic/elasticsearch/blob/master/distribution/docker/src/docker/config/elasticsearch.yml +# +cluster.name: "docker-cluster" +network.host: 0.0.0.0 + +## X-Pack settings +## see https://www.elastic.co/guide/en/elasticsearch/reference/current/setup-xpack.html +# +xpack.license.self_generated.type: basic +xpack.security.enabled: true +xpack.monitoring.collection.enabled: true diff --git a/logstash/Dockerfile b/logstash/Dockerfile new file mode 100644 index 00000000..432bdf4b --- /dev/null +++ b/logstash/Dockerfile @@ -0,0 +1,13 @@ +# https://www.docker.elastic.co/ +FROM docker.elastic.co/logstash/logstash:8.11.0 + +# Download MySQL JDBC driver to connect Logstash to MySQL +RUN curl -Lo "mysql-connector-j-8.2.0.tar.gz" "https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-j-8.2.0.tar.gz" \ + && tar -xf "mysql-connector-j-8.2.0.tar.gz" "mysql-connector-j-8.2.0/mysql-connector-j-8.2.0.jar" \ + && mv "mysql-connector-j-8.2.0/mysql-connector-j-8.2.0.jar" "mysql-connector-j.jar" \ + && rm -r "mysql-connector-j-8.2.0" "mysql-connector-j-8.2.0.tar.gz" + +ENTRYPOINT ["/usr/local/bin/docker-entrypoint"] + +# Add your logstash plugins setup here +# Example: RUN logstash-plugin install logstash-filter-json diff --git a/logstash/config/config/jvm.options b/logstash/config/config/jvm.options new file mode 100644 index 00000000..9b1b6616 --- /dev/null +++ b/logstash/config/config/jvm.options @@ -0,0 +1,72 @@ +## JVM configuration + +# Xms represents the initial size of total heap space +# Xmx represents the maximum size of total heap space + +-Xms1g +-Xmx1g + +################################################################ +## Expert settings +################################################################ +## +## All settings below this section are considered +## expert settings. Don't tamper with them unless +## you understand what you are doing +## +################################################################ + +## GC configuration +11-13:-XX:+UseConcMarkSweepGC +11-13:-XX:CMSInitiatingOccupancyFraction=75 +11-13:-XX:+UseCMSInitiatingOccupancyOnly + +## Locale +# Set the locale language +#-Duser.language=en + +# Set the locale country +#-Duser.country=US + +# Set the locale variant, if any +#-Duser.variant= + +## basic + +# set the I/O temp directory +#-Djava.io.tmpdir=$HOME + +# set to headless, just in case +-Djava.awt.headless=true + +# ensure UTF-8 encoding by default (e.g. filenames) +-Dfile.encoding=UTF-8 + +# use our provided JNA always versus the system one +#-Djna.nosys=true + +# Turn on JRuby invokedynamic +-Djruby.compile.invokedynamic=true + +## heap dumps + +# generate a heap dump when an allocation from the Java heap fails +# heap dumps are created in the working directory of the JVM +-XX:+HeapDumpOnOutOfMemoryError + +# specify an alternative path for heap dumps +# ensure the directory exists and has sufficient space +#-XX:HeapDumpPath=${LOGSTASH_HOME}/heapdump.hprof + +## GC logging +#-Xlog:gc*,gc+age=trace,safepoint:file=@loggc@:utctime,pid,tags:filecount=32,filesize=64m + +# log GC status to a file with time stamps +# ensure the directory exists +#-Xloggc:${LS_GC_LOG_FILE} + +# Entropy source for randomness +-Djava.security.egd=file:/dev/urandom + +# Copy the logging context from parent threads to children +-Dlog4j2.isThreadContextMapInheritable=true \ No newline at end of file diff --git a/logstash/config/config/log4j2.file.properties b/logstash/config/config/log4j2.file.properties new file mode 100644 index 00000000..234b23db --- /dev/null +++ b/logstash/config/config/log4j2.file.properties @@ -0,0 +1,147 @@ +status = error +name = LogstashPropertiesConfig + +appender.console.type = Console +appender.console.name = plain_console +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c]%notEmpty{[%X{pipeline.id}]}%notEmpty{[%X{plugin.id}]} %m%n + +appender.json_console.type = Console +appender.json_console.name = json_console +appender.json_console.layout.type = JSONLayout +appender.json_console.layout.compact = true +appender.json_console.layout.eventEol = true + +appender.rolling.type = RollingFile +appender.rolling.name = plain_rolling +appender.rolling.fileName = ${sys:ls.logs}/logstash-plain.log +appender.rolling.filePattern = ${sys:ls.logs}/logstash-plain-%d{yyyy-MM-dd}-%i.log.gz +appender.rolling.policies.type = Policies +appender.rolling.policies.time.type = TimeBasedTriggeringPolicy +appender.rolling.policies.time.interval = 1 +appender.rolling.policies.time.modulate = true +appender.rolling.layout.type = PatternLayout +appender.rolling.layout.pattern = [%d{ISO8601}][%-5p][%-25c]%notEmpty{[%X{pipeline.id}]}%notEmpty{[%X{plugin.id}]} %m%n +appender.rolling.policies.size.type = SizeBasedTriggeringPolicy +appender.rolling.policies.size.size = 100MB +appender.rolling.strategy.type = DefaultRolloverStrategy +appender.rolling.strategy.max = 30 +appender.rolling.avoid_pipelined_filter.type = PipelineRoutingFilter + +appender.json_rolling.type = RollingFile +appender.json_rolling.name = json_rolling +appender.json_rolling.fileName = ${sys:ls.logs}/logstash-json.log +appender.json_rolling.filePattern = ${sys:ls.logs}/logstash-json-%d{yyyy-MM-dd}-%i.log.gz +appender.json_rolling.policies.type = Policies +appender.json_rolling.policies.time.type = TimeBasedTriggeringPolicy +appender.json_rolling.policies.time.interval = 1 +appender.json_rolling.policies.time.modulate = true +appender.json_rolling.layout.type = JSONLayout +appender.json_rolling.layout.compact = true +appender.json_rolling.layout.eventEol = true +appender.json_rolling.policies.size.type = SizeBasedTriggeringPolicy +appender.json_rolling.policies.size.size = 100MB +appender.json_rolling.strategy.type = DefaultRolloverStrategy +appender.json_rolling.strategy.max = 30 +appender.json_rolling.avoid_pipelined_filter.type = PipelineRoutingFilter + +appender.routing.type = PipelineRouting +appender.routing.name = pipeline_routing_appender +appender.routing.pipeline.type = RollingFile +appender.routing.pipeline.name = appender-${ctx:pipeline.id} +appender.routing.pipeline.fileName = ${sys:ls.logs}/pipeline_${ctx:pipeline.id}.log +appender.routing.pipeline.filePattern = ${sys:ls.logs}/pipeline_${ctx:pipeline.id}.%i.log.gz +appender.routing.pipeline.layout.type = PatternLayout +appender.routing.pipeline.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %m%n +appender.routing.pipeline.policy.type = SizeBasedTriggeringPolicy +appender.routing.pipeline.policy.size = 100MB +appender.routing.pipeline.strategy.type = DefaultRolloverStrategy +appender.routing.pipeline.strategy.max = 30 + +rootLogger.level = ${sys:ls.log.level} +rootLogger.appenderRef.console.ref = ${sys:ls.log.format}_console +rootLogger.appenderRef.rolling.ref = ${sys:ls.log.format}_rolling +rootLogger.appenderRef.routing.ref = pipeline_routing_appender + +# Slowlog + +appender.console_slowlog.type = Console +appender.console_slowlog.name = plain_console_slowlog +appender.console_slowlog.layout.type = PatternLayout +appender.console_slowlog.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %m%n + +appender.json_console_slowlog.type = Console +appender.json_console_slowlog.name = json_console_slowlog +appender.json_console_slowlog.layout.type = JSONLayout +appender.json_console_slowlog.layout.compact = true +appender.json_console_slowlog.layout.eventEol = true + +appender.rolling_slowlog.type = RollingFile +appender.rolling_slowlog.name = plain_rolling_slowlog +appender.rolling_slowlog.fileName = ${sys:ls.logs}/logstash-slowlog-plain.log +appender.rolling_slowlog.filePattern = ${sys:ls.logs}/logstash-slowlog-plain-%d{yyyy-MM-dd}-%i.log.gz +appender.rolling_slowlog.policies.type = Policies +appender.rolling_slowlog.policies.time.type = TimeBasedTriggeringPolicy +appender.rolling_slowlog.policies.time.interval = 1 +appender.rolling_slowlog.policies.time.modulate = true +appender.rolling_slowlog.layout.type = PatternLayout +appender.rolling_slowlog.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %m%n +appender.rolling_slowlog.policies.size.type = SizeBasedTriggeringPolicy +appender.rolling_slowlog.policies.size.size = 100MB +appender.rolling_slowlog.strategy.type = DefaultRolloverStrategy +appender.rolling_slowlog.strategy.max = 30 + +appender.json_rolling_slowlog.type = RollingFile +appender.json_rolling_slowlog.name = json_rolling_slowlog +appender.json_rolling_slowlog.fileName = ${sys:ls.logs}/logstash-slowlog-json.log +appender.json_rolling_slowlog.filePattern = ${sys:ls.logs}/logstash-slowlog-json-%d{yyyy-MM-dd}-%i.log.gz +appender.json_rolling_slowlog.policies.type = Policies +appender.json_rolling_slowlog.policies.time.type = TimeBasedTriggeringPolicy +appender.json_rolling_slowlog.policies.time.interval = 1 +appender.json_rolling_slowlog.policies.time.modulate = true +appender.json_rolling_slowlog.layout.type = JSONLayout +appender.json_rolling_slowlog.layout.compact = true +appender.json_rolling_slowlog.layout.eventEol = true +appender.json_rolling_slowlog.policies.size.type = SizeBasedTriggeringPolicy +appender.json_rolling_slowlog.policies.size.size = 100MB +appender.json_rolling_slowlog.strategy.type = DefaultRolloverStrategy +appender.json_rolling_slowlog.strategy.max = 30 + +logger.slowlog.name = slowlog +logger.slowlog.level = trace +logger.slowlog.appenderRef.console_slowlog.ref = ${sys:ls.log.format}_console_slowlog +logger.slowlog.appenderRef.rolling_slowlog.ref = ${sys:ls.log.format}_rolling_slowlog +logger.slowlog.additivity = false + +logger.licensereader.name = logstash.licensechecker.licensereader +logger.licensereader.level = error + +# Silence http-client by default +logger.apache_http_client.name = org.apache.http +logger.apache_http_client.level = fatal + +# Deprecation log +appender.deprecation_rolling.type = RollingFile +appender.deprecation_rolling.name = deprecation_plain_rolling +appender.deprecation_rolling.fileName = ${sys:ls.logs}/logstash-deprecation.log +appender.deprecation_rolling.filePattern = ${sys:ls.logs}/logstash-deprecation-%d{yyyy-MM-dd}-%i.log.gz +appender.deprecation_rolling.policies.type = Policies +appender.deprecation_rolling.policies.time.type = TimeBasedTriggeringPolicy +appender.deprecation_rolling.policies.time.interval = 1 +appender.deprecation_rolling.policies.time.modulate = true +appender.deprecation_rolling.layout.type = PatternLayout +appender.deprecation_rolling.layout.pattern = [%d{ISO8601}][%-5p][%-25c]%notEmpty{[%X{pipeline.id}]}%notEmpty{[%X{plugin.id}]} %m%n +appender.deprecation_rolling.policies.size.type = SizeBasedTriggeringPolicy +appender.deprecation_rolling.policies.size.size = 100MB +appender.deprecation_rolling.strategy.type = DefaultRolloverStrategy +appender.deprecation_rolling.strategy.max = 30 + +logger.deprecation.name = org.logstash.deprecation, deprecation +logger.deprecation.level = WARN +logger.deprecation.appenderRef.deprecation_rolling.ref = deprecation_plain_rolling +logger.deprecation.additivity = false + +logger.deprecation_root.name = deprecation +logger.deprecation_root.level = WARN +logger.deprecation_root.appenderRef.deprecation_rolling.ref = deprecation_plain_rolling +logger.deprecation_root.additivity = false diff --git a/logstash/config/config/log4j2.properties b/logstash/config/config/log4j2.properties new file mode 100644 index 00000000..663a0158 --- /dev/null +++ b/logstash/config/config/log4j2.properties @@ -0,0 +1,16 @@ +status = error +name = LogstashPropertiesConfig + +appender.console.type = Console +appender.console.name = plain_console +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c]%notEmpty{[%X{pipeline.id}]}%notEmpty{[%X{plugin.id}]} %m%n + +appender.json_console.type = Console +appender.json_console.name = json_console +appender.json_console.layout.type = JSONLayout +appender.json_console.layout.compact = true +appender.json_console.layout.eventEol = true + +rootLogger.level = ${sys:ls.log.level} +rootLogger.appenderRef.console.ref = ${sys:ls.log.format}_console diff --git a/logstash/config/config/logstash-sample.conf b/logstash/config/config/logstash-sample.conf new file mode 100644 index 00000000..2fa9229d --- /dev/null +++ b/logstash/config/config/logstash-sample.conf @@ -0,0 +1,17 @@ +# Sample Logstash configuration for creating a simple +# Beats -> Logstash -> Elasticsearch pipeline. + +input { + beats { + port => 5044 + } +} + +output { + elasticsearch { + hosts => ["http://localhost:9200"] + index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}" + #user => "elastic" + #password => "changeme" + } +} diff --git a/logstash/config/config/pipelines.yml b/logstash/config/config/pipelines.yml new file mode 100644 index 00000000..c0007336 --- /dev/null +++ b/logstash/config/config/pipelines.yml @@ -0,0 +1,5 @@ +- pipeline.id: init-table-pipeline + path.config: "/usr/share/logstash/pipeline/init_table.conf" + +- pipeline.id: sync-table-pipeline + path.config: "/usr/share/logstash/pipeline/sync_table.conf" diff --git a/logstash/config/config/startup.options b/logstash/config/config/startup.options new file mode 100644 index 00000000..3829fdb9 --- /dev/null +++ b/logstash/config/config/startup.options @@ -0,0 +1,53 @@ +################################################################################ +# These settings are ONLY used by $LS_HOME/bin/system-install to create a custom +# startup script for Logstash and is not used by Logstash itself. It should +# automagically use the init system (systemd, upstart, sysv, etc.) that your +# Linux distribution uses. +# +# After changing anything here, you need to re-run $LS_HOME/bin/system-install +# as root to push the changes to the init script. +################################################################################ + +# Override Java location +#JAVACMD=/usr/bin/java + +# Set a home directory +LS_HOME=/usr/share/logstash + +# logstash settings directory, the path which contains logstash.yml +LS_SETTINGS_DIR=/etc/logstash + +# Arguments to pass to logstash +LS_OPTS="--path.settings ${LS_SETTINGS_DIR}" + +# Arguments to pass to java +LS_JAVA_OPTS="" + +# pidfiles aren't used the same way for upstart and systemd; this is for sysv users. +LS_PIDFILE=/var/run/logstash.pid + +# user and group id to be invoked as +LS_USER=logstash +LS_GROUP=logstash + +# Enable GC logging by uncommenting the appropriate lines in the GC logging +# section in jvm.options +LS_GC_LOG_FILE=/var/log/logstash/gc.log + +# Open file limit +LS_OPEN_FILES=16384 + +# Nice level +LS_NICE=19 + +# Change these to have the init script named and described differently +# This is useful when running multiple instances of Logstash on the same +# physical box or vm +SERVICE_NAME="logstash" +SERVICE_DESCRIPTION="logstash" + +# If you need to run a command or script before launching Logstash, put it +# between the lines beginning with `read` and `EOM`, and uncomment those lines. +### +## read -r -d '' PRESTART << EOM +## EOM diff --git a/logstash/config/pipeline/.gitkeep b/logstash/config/pipeline/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/logstash/config/sql/.gitkeep b/logstash/config/sql/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/pyproject.toml b/pyproject.toml index a96076b8..7a8617b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,8 @@ dependencies = [ "xmltodict==0.13.0", "python-multipart==0.0.6", "mysql-connector-python==8.2.0", + "elasticsearch==8.10.1", + "jinja2==3.1.2", ] readme = "README.md" diff --git a/src/connectors/example/resources/resource/events.json b/src/connectors/example/resources/resource/events.json index ea54cc98..67972cdc 100644 --- a/src/connectors/example/resources/resource/events.json +++ b/src/connectors/example/resources/resource/events.json @@ -90,6 +90,190 @@ "registration_link": "https://example.com/registration-form", "status": "scheduled", "mode": "offline" + }, + { + "platform": "example", + "platform_resource_identifier": "2", + "name": "Name of the Event 2", + "description": {"plain": "A description."}, + "same_as": "https://www.example.com/resource/this_resource", + "date_published": "2022-01-01T15:15:00.000", + "version": "1.1.0", + "pid": "https://doi.org/10.1000/182", + "aiod_entry": { + "editor": [], + "status": "draft" + }, + "alternate_name": [ + "alias 1", + "alias 2" + ], + "application_area": [ + "Fraud Prevention", + "Voice Assistance", + "Disease Classification" + ], + "citation": [], + "contact": [], + "creator": [], + "distribution": [ + { + "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "checksum_algorithm": "sha256", + "copyright": "2010-2020 Example Company. All rights reserved.", + "content_url": "https://www.example.com/case_study/file.pdf", + "content_size_kb": 10000, + "date_published": "2022-01-01T15:15:00.000", + "description": "Description of this file.", + "encoding_format": "application/pdf", + "name": "Name of this file.", + "technology_readiness_level": 1 + } + ], + "has_part": [], + "industrial_sector": [ + "Finance", + "eCommerce", + "Healthcare" + ], + "is_part_of": [], + "keyword": [ + "keyword1", + "keyword2" + ], + "location": [ + { + "address": {"country": "NED", "street": "Street Name 10", "postal_code": "1234AB"}, + "geo": {"latitude": 37.42242, "longitude": -122.08585, "elevation_millimeters": 2000} + } + ], + "relevant_link": ["https://www.example.com/a_relevant_link", "https://www.example.com/another_relevant_link"], + "license": "https://creativecommons.org/share-your-work/public-domain/cc0/", + "media": [ + { + "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "checksum_algorithm": "sha256", + "copyright": "2010-2020 Example Company. All rights reserved.", + "content_url": "https://www.example.com/image.jpeg", + "content_size_kb": 10000, + "date_published": "2022-01-01T15:15:00.000", + "description": "Description of this file.", + "encoding_format": "image/jpeg", + "name": "Name of this file." + } + ], + "note": [ + { + "value": "A brief record of points or ideas about this AI resource." + } + ], + "research_area": [ + "Explainable AI", + "Physical AI" + ], + "scientific_domain": [ + "Anomaly Detection", + "Voice Recognition", + "Computer Vision." + ], + "start_date": "2021-02-03T15:15:00", + "end_date": "2022-02-03T15:15:00", + "schedule": "10:00-10:30: Opening. 10:30-11:00 ...", + "registration_link": "https://example.com/registration-form", + "status": "scheduled", + "mode": "offline" + }, + { + "platform": "example", + "platform_resource_identifier": "3", + "name": "Name of the Event 3", + "description": {"plain": "A description."}, + "same_as": "https://www.example.com/resource/this_resource", + "date_published": "2022-01-01T15:15:00.000", + "version": "1.1.0", + "pid": "https://doi.org/10.1000/182", + "aiod_entry": { + "editor": [], + "status": "draft" + }, + "alternate_name": [ + "alias 1", + "alias 2" + ], + "application_area": [ + "Fraud Prevention", + "Voice Assistance", + "Disease Classification" + ], + "citation": [], + "contact": [], + "creator": [], + "distribution": [ + { + "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "checksum_algorithm": "sha256", + "copyright": "2010-2020 Example Company. All rights reserved.", + "content_url": "https://www.example.com/case_study/file.pdf", + "content_size_kb": 10000, + "date_published": "2022-01-01T15:15:00.000", + "description": "Description of this file.", + "encoding_format": "application/pdf", + "name": "Name of this file.", + "technology_readiness_level": 1 + } + ], + "has_part": [], + "industrial_sector": [ + "Finance", + "eCommerce", + "Healthcare" + ], + "is_part_of": [], + "keyword": [ + "keyword1", + "keyword2" + ], + "location": [ + { + "address": {"country": "NED", "street": "Street Name 10", "postal_code": "1234AB"}, + "geo": {"latitude": 37.42242, "longitude": -122.08585, "elevation_millimeters": 2000} + } + ], + "relevant_link": ["https://www.example.com/a_relevant_link", "https://www.example.com/another_relevant_link"], + "license": "https://creativecommons.org/share-your-work/public-domain/cc0/", + "media": [ + { + "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "checksum_algorithm": "sha256", + "copyright": "2010-2020 Example Company. All rights reserved.", + "content_url": "https://www.example.com/image.jpeg", + "content_size_kb": 10000, + "date_published": "2022-01-01T15:15:00.000", + "description": "Description of this file.", + "encoding_format": "image/jpeg", + "name": "Name of this file." + } + ], + "note": [ + { + "value": "A brief record of points or ideas about this AI resource." + } + ], + "research_area": [ + "Explainable AI", + "Physical AI" + ], + "scientific_domain": [ + "Anomaly Detection", + "Voice Recognition", + "Computer Vision." + ], + "start_date": "2021-02-03T15:15:00", + "end_date": "2022-02-03T15:15:00", + "schedule": "10:00-10:30: Opening. 10:30-11:00 ...", + "registration_link": "https://example.com/registration-form", + "status": "scheduled", + "mode": "offline" } ] diff --git a/src/connectors/synchronization.py b/src/connectors/synchronization.py index 5bd223d2..4d4ae8ec 100644 --- a/src/connectors/synchronization.py +++ b/src/connectors/synchronization.py @@ -17,6 +17,7 @@ from database.session import DbSession from database.setup import _create_or_fetch_related_objects, _get_existing_resource from routers import ResourceRouter, resource_routers, enum_routers +from setup_logger import setup_logger RELATIVE_PATH_STATE_JSON = pathlib.Path("state.json") RELATIVE_PATH_ERROR_CSV = pathlib.Path("errors.csv") @@ -125,11 +126,7 @@ def main(): shutil.rmtree(working_dir) working_dir.mkdir(parents=True, exist_ok=True) - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - ) + setup_logger() sys.excepthook = exception_handler module_path = ".".join(args.connector.split(".")[0:-1]) diff --git a/src/database/model/concept/aiod_entry.py b/src/database/model/concept/aiod_entry.py index d3b9f879..d7dded41 100644 --- a/src/database/model/concept/aiod_entry.py +++ b/src/database/model/concept/aiod_entry.py @@ -32,7 +32,7 @@ class AIoDEntryORM(AIoDEntryBase, table=True): # type: ignore [call-arg] link_model=many_to_many_link_factory("aiod_entry", "person", table_prefix="editor"), ) status_identifier: int | None = Field(foreign_key=Status.__tablename__ + ".identifier") - status: Status = Relationship() + status: Status | None = Relationship() # date_modified is updated in the resource_router date_modified: datetime | None = Field(default_factory=datetime.utcnow) @@ -40,7 +40,7 @@ class AIoDEntryORM(AIoDEntryBase, table=True): # type: ignore [call-arg] class RelationshipConfig: editor: list[int] = ManyToMany() # No deletion triggers: "orphan" Persons should be kept - status: str = ManyToOne( + status: str | None = ManyToOne( example="draft", identifier_name="status_identifier", deserializer=FindByNameDeserializer(Status), @@ -53,7 +53,7 @@ class AIoDEntryCreate(AIoDEntryBase): default_factory=list, schema_extra={"example": []}, ) - status: str = Field( + status: str | None = Field( description="Status of the entry (published, draft, rejected)", schema_extra={"example": "published"}, default="draft", @@ -66,7 +66,7 @@ class AIoDEntryRead(AIoDEntryBase): default_factory=list, schema_extra={"example": []}, ) - status: str = Field( + status: str | None = Field( description="Status of the entry (published, draft, rejected)", schema_extra={"example": "published"}, default="draft", diff --git a/src/database/model/platform/platform_names.py b/src/database/model/platform/platform_names.py index 1a74161e..dadb61d4 100644 --- a/src/database/model/platform/platform_names.py +++ b/src/database/model/platform/platform_names.py @@ -13,3 +13,4 @@ class PlatformName(str, enum.Enum): openml = "openml" huggingface = "huggingface" zenodo = "zenodo" + ai4experiments = "ai4experiments" diff --git a/src/main.py b/src/main.py index e0b1f3a1..8beaa096 100644 --- a/src/main.py +++ b/src/main.py @@ -22,6 +22,8 @@ from database.session import EngineSingleton, DbSession from database.setup import drop_or_create_database from routers import resource_routers, parent_routers, enum_routers +from routers import search_routers +from setup_logger import setup_logger def _parse_args() -> argparse.Namespace: @@ -81,12 +83,14 @@ def counts() -> dict: + routers.other_routers + parent_routers.router_list + enum_routers.router_list + + search_routers.router_list ): app.include_router(router.create(url_prefix)) def create_app() -> FastAPI: """Create the FastAPI application, complete with routes.""" + setup_logger() args = _parse_args() app = FastAPI( openapi_url=f"{args.url_prefix}/openapi.json", diff --git a/src/routers/search_router.py b/src/routers/search_router.py new file mode 100644 index 00000000..1e0180b5 --- /dev/null +++ b/src/routers/search_router.py @@ -0,0 +1,161 @@ +import abc +from typing import TypeVar, Generic, Any, Type, Annotated + +from fastapi import APIRouter, HTTPException, Query +from pydantic import BaseModel +from sqlmodel import SQLModel, select +from starlette import status + +from database.model.concept.aiod_entry import AIoDEntryRead +from database.model.concept.concept import AIoDConcept +from database.model.platform.platform import Platform +from database.model.resource_read_and_create import resource_read +from database.session import DbSession +from .resource_router import _wrap_as_http_exception +from .search_routers.elasticsearch import ElasticsearchSingleton + +SORT = {"identifier": "asc"} +LIMIT_MAX = 1000 + +RESOURCE = TypeVar("RESOURCE", bound=AIoDConcept) + + +class SearchResult(BaseModel, Generic[RESOURCE]): + total_hits: int + resources: list + limit: int + offset: int + + +class SearchRouter(Generic[RESOURCE], abc.ABC): + """ + Providing search functionality in ElasticSearch + """ + + @property + @abc.abstractmethod + def es_index(self) -> str: + """The name of the elasticsearch index""" + + @property + @abc.abstractmethod + def resource_name_plural(self) -> str: + """The name of the resource (plural)""" + + @property + def key_translations(self) -> dict[str, str]: + """If an attribute is called differently in elasticsearch than in our + metadata model, you can define a translation dictionary here. The key + should be the name in elasticsearch, the value the name in our data + model.""" + return {} + + @property + @abc.abstractmethod + def resource_class(self) -> RESOURCE: + """The resource class""" + + @property + @abc.abstractmethod + def indexed_fields(self) -> set[str]: + """The set of indexed fields""" + + def create(self, url_prefix: str) -> APIRouter: + router = APIRouter() + read_class = resource_read(self.resource_class) # type: ignore + + @router.get(f"{url_prefix}/search/{self.resource_name_plural}/v1", tags=["search"]) + def search( + platforms: Annotated[list[str] | None, Query()] = None, + search_query: str = "", + search_fields: Annotated[list[str] | None, Query()] = None, + limit: Annotated[int | None, Query(ge=1, le=LIMIT_MAX)] = 10, + offset: Annotated[int | None, Query(ge=0)] = 0, + get_all: bool = True, + ) -> SearchResult[read_class]: # type: ignore + f""" + Search for {self.resource_name_plural}. + """ + try: + with DbSession() as session: + query = select(Platform) + database_platforms = session.scalars(query).all() + platform_names = {p.name for p in database_platforms} + except Exception as e: + raise _wrap_as_http_exception(e) + + if platforms and not set(platforms).issubset(platform_names): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"The available platforms are: {platform_names}", + ) + fields = search_fields if search_fields else self.indexed_fields + if not set(fields).issubset(self.indexed_fields): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"The available search fields for this entity " + f"are: {self.indexed_fields}", + ) + query_matches = [{"match": {f: search_query}} for f in fields] + query = {"bool": {"should": query_matches, "minimum_should_match": 1}} + if platforms: + platform_matches = [{"match": {"platform": p}} for p in platforms] + query["bool"]["must"] = { + "bool": {"should": platform_matches, "minimum_should_match": 1} + } + result = ElasticsearchSingleton().client.search( + index=self.es_index, query=query, from_=offset, size=limit, sort=SORT + ) + total_hits = result["hits"]["total"]["value"] + if get_all: + resources: list[SQLModel] = [ + self._db_query(read_class, self.resource_class, hit["_source"]["identifier"]) + for hit in result["hits"]["hits"] + ] + else: + resources: list[Type[RESOURCE]] = [ # type: ignore + self._cast_resource(read_class, hit["_source"]) + for hit in result["hits"]["hits"] + ] + return SearchResult[RESOURCE]( # type: ignore + total_hits=total_hits, + resources=resources, + limit=limit, + offset=offset, + ) + + return router + + def _db_query( + self, + read_class: Type[SQLModel], + resource_class: RESOURCE, + identifier: int, + ) -> SQLModel: + try: + with DbSession() as session: + query = select(resource_class).where(resource_class.identifier == identifier) + resource = session.scalars(query).first() + if not resource: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Resource not found in the database.", + ) + return read_class.from_orm(resource) + except Exception as e: + raise _wrap_as_http_exception(e) + + def _cast_resource( + self, read_class: Type[SQLModel], resource_dict: dict[str, Any] + ) -> Type[RESOURCE]: + kwargs = { + self.key_translations.get(key, key): val + for key, val in resource_dict.items() + if key != "type" and not key.startswith("@") + } + resource = read_class(**kwargs) + resource.aiod_entry = AIoDEntryRead( + date_modified=resource_dict["date_modified"], status=None + ) + resource.description = {"plain": resource_dict["plain"], "html": resource_dict["html"]} + return resource diff --git a/src/routers/search_routers/__init__.py b/src/routers/search_routers/__init__.py new file mode 100644 index 00000000..7dcceb07 --- /dev/null +++ b/src/routers/search_routers/__init__.py @@ -0,0 +1,22 @@ +from .search_router_datasets import SearchRouterDatasets +from .search_router_events import SearchRouterEvents +from .search_router_experiments import SearchRouterExperiments +from .search_router_ml_models import SearchRouterMLModels +from .search_router_news import SearchRouterNews +from .search_router_organisations import SearchRouterOrganisations +from .search_router_projects import SearchRouterProjects +from .search_router_publications import SearchRouterPublications +from .search_router_services import SearchRouterServices +from ..search_router import SearchRouter + +router_list: list[SearchRouter] = [ + SearchRouterDatasets(), + SearchRouterEvents(), + SearchRouterExperiments(), + SearchRouterMLModels(), + SearchRouterNews(), + SearchRouterOrganisations(), + SearchRouterProjects(), + SearchRouterPublications(), + SearchRouterServices(), +] diff --git a/src/routers/search_routers/elasticsearch.py b/src/routers/search_routers/elasticsearch.py new file mode 100644 index 00000000..7e423e91 --- /dev/null +++ b/src/routers/search_routers/elasticsearch.py @@ -0,0 +1,24 @@ +import os + +from elasticsearch import Elasticsearch + + +class ElasticsearchSingleton: + """ + Making sure the Elasticsearch client is created only once, and easy to patch for + unittests. + """ + + __monostate = None + + def __init__(self): + if not ElasticsearchSingleton.__monostate: + ElasticsearchSingleton.__monostate = self.__dict__ + user = os.getenv("ES_USER", "") + pw = os.getenv("ES_PASSWORD", "") + self.client = Elasticsearch("http://elasticsearch:9200", basic_auth=(user, pw)) + else: + self.__dict__ = ElasticsearchSingleton.__monostate + + def patch(self, elasticsearch: Elasticsearch): + self.__monostate["client"] = elasticsearch # type:ignore diff --git a/src/routers/search_routers/search_router_datasets.py b/src/routers/search_routers/search_router_datasets.py new file mode 100644 index 00000000..b5a6d8cb --- /dev/null +++ b/src/routers/search_routers/search_router_datasets.py @@ -0,0 +1,20 @@ +from database.model.dataset.dataset import Dataset +from routers.search_router import SearchRouter + + +class SearchRouterDatasets(SearchRouter[Dataset]): + @property + def es_index(self) -> str: + return "dataset" + + @property + def resource_name_plural(self) -> str: + return "datasets" + + @property + def resource_class(self): + return Dataset + + @property + def indexed_fields(self): + return {"name", "plain", "html", "issn"} diff --git a/src/routers/search_routers/search_router_events.py b/src/routers/search_routers/search_router_events.py new file mode 100644 index 00000000..242407fb --- /dev/null +++ b/src/routers/search_routers/search_router_events.py @@ -0,0 +1,20 @@ +from database.model.event.event import Event +from routers.search_router import SearchRouter + + +class SearchRouterEvents(SearchRouter[Event]): + @property + def es_index(self) -> str: + return "event" + + @property + def resource_name_plural(self) -> str: + return "events" + + @property + def resource_class(self): + return Event + + @property + def indexed_fields(self): + return {"name", "plain", "html"} diff --git a/src/routers/search_routers/search_router_experiments.py b/src/routers/search_routers/search_router_experiments.py new file mode 100644 index 00000000..9c9954a3 --- /dev/null +++ b/src/routers/search_routers/search_router_experiments.py @@ -0,0 +1,20 @@ +from database.model.models_and_experiments.experiment import Experiment +from routers.search_router import SearchRouter + + +class SearchRouterExperiments(SearchRouter[Experiment]): + @property + def es_index(self) -> str: + return "experiment" + + @property + def resource_name_plural(self) -> str: + return "experiments" + + @property + def resource_class(self): + return Experiment + + @property + def indexed_fields(self): + return {"name", "plain", "html"} diff --git a/src/routers/search_routers/search_router_ml_models.py b/src/routers/search_routers/search_router_ml_models.py new file mode 100644 index 00000000..bc380e41 --- /dev/null +++ b/src/routers/search_routers/search_router_ml_models.py @@ -0,0 +1,20 @@ +from database.model.models_and_experiments.ml_model import MLModel +from routers.search_router import SearchRouter + + +class SearchRouterMLModels(SearchRouter[MLModel]): + @property + def es_index(self) -> str: + return "ml_model" + + @property + def resource_name_plural(self) -> str: + return "ml_models" + + @property + def resource_class(self): + return MLModel + + @property + def indexed_fields(self): + return {"name", "plain", "html"} diff --git a/src/routers/search_routers/search_router_news.py b/src/routers/search_routers/search_router_news.py new file mode 100644 index 00000000..c1f44bbc --- /dev/null +++ b/src/routers/search_routers/search_router_news.py @@ -0,0 +1,20 @@ +from database.model.news.news import News +from routers.search_router import SearchRouter + + +class SearchRouterNews(SearchRouter[News]): + @property + def es_index(self) -> str: + return "news" + + @property + def resource_name_plural(self) -> str: + return "news" + + @property + def resource_class(self): + return News + + @property + def indexed_fields(self): + return {"name", "plain", "html", "headline", "alternative_headline"} diff --git a/src/routers/search_routers/search_router_organisations.py b/src/routers/search_routers/search_router_organisations.py new file mode 100644 index 00000000..922162f9 --- /dev/null +++ b/src/routers/search_routers/search_router_organisations.py @@ -0,0 +1,20 @@ +from database.model.agent.organisation import Organisation +from routers.search_router import SearchRouter + + +class SearchRouterOrganisations(SearchRouter[Organisation]): + @property + def es_index(self) -> str: + return "organisation" + + @property + def resource_name_plural(self) -> str: + return "organisations" + + @property + def resource_class(self): + return Organisation + + @property + def indexed_fields(self): + return {"name", "legal_name", "plain", "html"} diff --git a/src/routers/search_routers/search_router_projects.py b/src/routers/search_routers/search_router_projects.py new file mode 100644 index 00000000..e8f8fe71 --- /dev/null +++ b/src/routers/search_routers/search_router_projects.py @@ -0,0 +1,20 @@ +from database.model.project.project import Project +from routers.search_router import SearchRouter + + +class SearchRouterProjects(SearchRouter[Project]): + @property + def es_index(self) -> str: + return "project" + + @property + def resource_name_plural(self) -> str: + return "projects" + + @property + def resource_class(self): + return Project + + @property + def indexed_fields(self): + return {"name", "plain", "html"} diff --git a/src/routers/search_routers/search_router_publications.py b/src/routers/search_routers/search_router_publications.py new file mode 100644 index 00000000..f4f71675 --- /dev/null +++ b/src/routers/search_routers/search_router_publications.py @@ -0,0 +1,20 @@ +from database.model.knowledge_asset.publication import Publication +from routers.search_router import SearchRouter + + +class SearchRouterPublications(SearchRouter[Publication]): + @property + def es_index(self) -> str: + return "publication" + + @property + def resource_name_plural(self) -> str: + return "publications" + + @property + def resource_class(self): + return Publication + + @property + def indexed_fields(self): + return {"name", "plain", "html", "issn", "isbn"} diff --git a/src/routers/search_routers/search_router_services.py b/src/routers/search_routers/search_router_services.py new file mode 100644 index 00000000..58dd1748 --- /dev/null +++ b/src/routers/search_routers/search_router_services.py @@ -0,0 +1,20 @@ +from database.model.service.service import Service +from routers.search_router import SearchRouter + + +class SearchRouterServices(SearchRouter[Service]): + @property + def es_index(self) -> str: + return "service" + + @property + def resource_name_plural(self) -> str: + return "services" + + @property + def resource_class(self): + return Service + + @property + def indexed_fields(self): + return {"name", "plain", "html", "slogan"} diff --git a/src/setup/__init__.py b/src/setup/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/setup/es_setup/__init__.py b/src/setup/es_setup/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/setup/es_setup/definitions.py b/src/setup/es_setup/definitions.py new file mode 100755 index 00000000..c8afeaa1 --- /dev/null +++ b/src/setup/es_setup/definitions.py @@ -0,0 +1,11 @@ +BASE_MAPPING = { + "mappings": { + "properties": { + "date_modified": {"type": "date"}, + "identifier": {"type": "long"}, + "name": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, + "plain": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, + "html": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, + } + } +} diff --git a/src/setup/es_setup/generate_elasticsearch_indices.py b/src/setup/es_setup/generate_elasticsearch_indices.py new file mode 100755 index 00000000..60d59ab5 --- /dev/null +++ b/src/setup/es_setup/generate_elasticsearch_indices.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +"""Generates the elasticsearch indices + +Launched by the es_logstash_setup container in the docker-compose file. +""" + +import copy +import logging + +from definitions import BASE_MAPPING +from routers.search_routers import router_list +from routers.search_routers.elasticsearch import ElasticsearchSingleton +from setup_logger import setup_logger + + +def generate_mapping(fields): + mapping = copy.deepcopy(BASE_MAPPING) + for field_name in fields: + mapping["mappings"]["properties"][field_name] = { + "type": "text", + "fields": {"keyword": {"type": "keyword"}}, + } + return mapping + + +def main(): + setup_logger() + es_client = ElasticsearchSingleton().client + global_fields = {"name", "plain", "html"} + entities = { + router.es_index: list(router.indexed_fields ^ global_fields) for router in router_list + } + logging.info("Generating indices...") + for es_index, fields in entities.items(): + mapping = generate_mapping(fields) + + # ignore 400 cause by IndexAlreadyExistsException when creating an index + es_client.indices.create(index=es_index, body=mapping, ignore=400) + logging.info("Generating indices completed.") + + +if __name__ == "__main__": + main() diff --git a/src/setup/logstash_setup/__init__.py b/src/setup/logstash_setup/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/setup/logstash_setup/generate_logstash_config_files.py b/src/setup/logstash_setup/generate_logstash_config_files.py new file mode 100755 index 00000000..38624b5b --- /dev/null +++ b/src/setup/logstash_setup/generate_logstash_config_files.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +"""Generates the logstash configuration and pipelines files + +This file generates the logstash configuration file in logstash/config, the +pipelines configuration files in logstash/pipelines/conf and the pipelines +sql sentences in logstash/pipelines/sql. + +Launched by the es_logstash_setup container in the docker-compose file. +""" +import logging +import os +from pathlib import Path +from jinja2 import Template + +from routers.search_routers import router_list +from setup.logstash_setup.templates.config import TEMPLATE_CONFIG +from setup.logstash_setup.templates.file_header import FILE_IS_GENERATED_COMMENT +from setup.logstash_setup.templates.init_table import TEMPLATE_INIT_TABLE +from setup.logstash_setup.templates.sql_init import TEMPLATE_SQL_INIT +from setup.logstash_setup.templates.sql_rm import TEMPLATE_SQL_RM +from setup.logstash_setup.templates.sql_sync import TEMPLATE_SQL_SYNC +from setup.logstash_setup.templates.sync_table import TEMPLATE_SYNC_TABLE +from setup_logger import setup_logger + +PATH_BASE = Path("/logstash/config") +PATH_CONFIG = PATH_BASE / "config" +PATH_PIPELINE = PATH_BASE / "pipeline" +PATH_SQL = PATH_BASE / "sql" + +DB_USER = "root" +DB_PASS = os.environ["MYSQL_ROOT_PASSWORD"] +ES_USER = os.environ["ES_USER"] +ES_PASS = os.environ["ES_PASSWORD"] + +GLOBAL_FIELDS = {"name", "plain", "html"} + + +def generate_file(file_path, template, file_data): + with open(file_path, "w") as f: + f.write(Template(FILE_IS_GENERATED_COMMENT).render(file_data)) + f.write(Template(template).render(file_data)) + + +def main(): + setup_logger() + for path in (PATH_CONFIG, PATH_PIPELINE, PATH_SQL): + path.mkdir(parents=True, exist_ok=True) + entities = { + router.es_index: list(router.indexed_fields ^ GLOBAL_FIELDS) for router in router_list + } + render_parameters = { + "file": os.path.basename(__file__), + "path": os.path.dirname(__file__).replace("/app", "src"), + "comment_tag": "#", + "es_user": ES_USER, + "es_pass": ES_PASS, + "db_user": DB_USER, + "db_pass": DB_PASS, + "entities": entities.keys(), + } + logging.info("Generating configuration files...") + config_file = os.path.join(PATH_CONFIG, "logstash.yml") + config_init_file = os.path.join(PATH_PIPELINE, "init_table.conf") + config_sync_file = os.path.join(PATH_PIPELINE, "sync_table.conf") + generate_file(config_file, TEMPLATE_CONFIG, render_parameters) + generate_file(config_init_file, TEMPLATE_INIT_TABLE, render_parameters) + generate_file(config_sync_file, TEMPLATE_SYNC_TABLE, render_parameters) + + render_parameters["comment_tag"] = "--" + logging.info("Generating configuration files completed.") + logging.info("Generating sql files...") + for es_index, extra_fields in entities.items(): + render_parameters["entity_name"] = es_index + render_parameters["extra_fields"] = ( + ",\n " + ",\n ".join(extra_fields) if extra_fields else "" + ) + + sql_init_file = os.path.join(PATH_SQL, f"init_{es_index}.sql") + sql_sync_file = os.path.join(PATH_SQL, f"sync_{es_index}.sql") + sql_rm_file = os.path.join(PATH_SQL, f"rm_{es_index}.sql") + generate_file(sql_init_file, TEMPLATE_SQL_INIT, render_parameters) + generate_file(sql_sync_file, TEMPLATE_SQL_SYNC, render_parameters) + generate_file(sql_rm_file, TEMPLATE_SQL_RM, render_parameters) + logging.info("Generating configuration files completed.") + + +if __name__ == "__main__": + main() diff --git a/src/setup/logstash_setup/templates/__init__.py b/src/setup/logstash_setup/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/setup/logstash_setup/templates/config.py b/src/setup/logstash_setup/templates/config.py new file mode 100755 index 00000000..661d1619 --- /dev/null +++ b/src/setup/logstash_setup/templates/config.py @@ -0,0 +1,6 @@ +TEMPLATE_CONFIG = """http.host: "0.0.0.0" +xpack.monitoring.elasticsearch.hosts: [ "http://elasticsearch:9200" ] +xpack.monitoring.enabled: true +xpack.monitoring.elasticsearch.username: {{es_user}} +xpack.monitoring.elasticsearch.password: {{es_pass}} +""" diff --git a/src/setup/logstash_setup/templates/file_header.py b/src/setup/logstash_setup/templates/file_header.py new file mode 100755 index 00000000..de26912b --- /dev/null +++ b/src/setup/logstash_setup/templates/file_header.py @@ -0,0 +1,5 @@ +FILE_IS_GENERATED_COMMENT = """{{comment_tag}} This file has been generated by `{{file}}` +{{comment_tag}} file, placed in `{{path}}` +{{comment_tag}} ------------------------------------------------------------------- + +""" diff --git a/src/setup/logstash_setup/templates/init_table.py b/src/setup/logstash_setup/templates/init_table.py new file mode 100755 index 00000000..09ca42a6 --- /dev/null +++ b/src/setup/logstash_setup/templates/init_table.py @@ -0,0 +1,36 @@ +TEMPLATE_INIT_TABLE = """ +input { +{% for entity in entities %} + jdbc { + jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" + jdbc_driver_class => "com.mysql.jdbc.Driver" + jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" + jdbc_user => "{{db_user}}" + jdbc_password => "{{db_pass}}" + clean_run => true + record_last_run => false + statement_filepath => "/usr/share/logstash/sql/init_{{entity}}.sql" + type => "{{entity}}" + } +{% endfor %} +} +filter { + mutate { + remove_field => ["@version", "@timestamp"] + } +} +output { +{% for entity in entities %} + if [type] == "{{entity}}" { + elasticsearch { + hosts => "elasticsearch:9200" + user => "{{es_user}}" + password => "{{es_pass}}" + ecs_compatibility => disabled + index => "{{entity}}" + document_id => "{{entity}}_%{identifier}" + } + } +{% endfor %} +} +""" diff --git a/src/setup/logstash_setup/templates/sql_init.py b/src/setup/logstash_setup/templates/sql_init.py new file mode 100755 index 00000000..b1c47528 --- /dev/null +++ b/src/setup/logstash_setup/templates/sql_init.py @@ -0,0 +1,11 @@ +TEMPLATE_SQL_INIT = """SELECT + {{entity_name}}.identifier, + {{entity_name}}.name, + text.plain as 'plain', + text.html as 'html', + aiod_entry.date_modified{{extra_fields}} +FROM aiod.{{entity_name}} +INNER JOIN aiod.aiod_entry ON aiod.{{entity_name}}.aiod_entry_identifier=aiod.aiod_entry.identifier +LEFT JOIN aiod.text ON aiod.{{entity_name}}.description_identifier=aiod.text.identifier +WHERE aiod.{{entity_name}}.date_deleted IS NULL +""" diff --git a/src/setup/logstash_setup/templates/sql_rm.py b/src/setup/logstash_setup/templates/sql_rm.py new file mode 100755 index 00000000..335f9da3 --- /dev/null +++ b/src/setup/logstash_setup/templates/sql_rm.py @@ -0,0 +1,5 @@ +TEMPLATE_SQL_RM = """SELECT {{entity_name}}.identifier +FROM aiod.{{entity_name}} +WHERE aiod.{{entity_name}}.date_deleted IS NOT NULL +AND aiod.{{entity_name}}.date_deleted > :sql_last_value +""" diff --git a/src/setup/logstash_setup/templates/sql_sync.py b/src/setup/logstash_setup/templates/sql_sync.py new file mode 100755 index 00000000..c4cb552b --- /dev/null +++ b/src/setup/logstash_setup/templates/sql_sync.py @@ -0,0 +1,11 @@ +TEMPLATE_SQL_SYNC = """SELECT + {{entity_name}}.identifier, + {{entity_name}}.name, + text.plain as 'plain', + text.html as 'html', + aiod_entry.date_modified{{extra_fields}} +FROM aiod.{{entity_name}} +INNER JOIN aiod.aiod_entry ON aiod.{{entity_name}}.aiod_entry_identifier=aiod.aiod_entry.identifier +LEFT JOIN aiod.text ON aiod.{{entity_name}}.description_identifier=aiod.text.identifier +WHERE aiod.{{entity_name}}.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value +""" diff --git a/src/setup/logstash_setup/templates/sync_table.py b/src/setup/logstash_setup/templates/sync_table.py new file mode 100755 index 00000000..1ebda66c --- /dev/null +++ b/src/setup/logstash_setup/templates/sync_table.py @@ -0,0 +1,62 @@ +TEMPLATE_SYNC_TABLE = """ +input { +{% for entity in entities %} + jdbc { + jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" + jdbc_driver_class => "com.mysql.jdbc.Driver" + jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" + jdbc_user => "{{db_user}}" + jdbc_password => "{{db_pass}}" + use_column_value => true + tracking_column => "date_modified" + tracking_column_type => "timestamp" + schedule => "*/5 * * * * *" + statement_filepath => "/usr/share/logstash/sql/sync_{{entity}}.sql" + type => "{{entity}}" + } + jdbc { + jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" + jdbc_driver_class => "com.mysql.jdbc.Driver" + jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" + jdbc_user => "{{db_user}}" + jdbc_password => "{{db_pass}}" + use_column_value => true + tracking_column => "date_deleted" + tracking_column_type => "timestamp" + schedule => "*/5 * * * * *" + statement_filepath => "/usr/share/logstash/sql/rm_{{entity}}.sql" + type => "rm_{{entity}}" + } +{% endfor %} +} +filter { + mutate { + remove_field => ["@version", "@timestamp"] + } +} +output { +{% for entity in entities %} + if [type] == "{{entity}}" { + elasticsearch { + hosts => "elasticsearch:9200" + user => "{{es_user}}" + password => "{{es_pass}}" + ecs_compatibility => disabled + index => "{{entity}}" + document_id => "{{entity}}_%{identifier}" + } + } + if [type] == "rm_{{entity}}" { + elasticsearch { + action => "delete" + hosts => "elasticsearch:9200" + user => "{{es_user}}" + password => "{{es_pass}}" + ecs_compatibility => disabled + index => "{{entity}}" + document_id => "{{entity}}_%{identifier}" + } + } +{% endfor %} +} +""" diff --git a/src/setup_logger.py b/src/setup_logger.py new file mode 100644 index 00000000..d22930af --- /dev/null +++ b/src/setup_logger.py @@ -0,0 +1,9 @@ +import logging + + +def setup_logger(): + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) diff --git a/src/tests/resources/elasticsearch/dataset_search.json b/src/tests/resources/elasticsearch/dataset_search.json new file mode 100644 index 00000000..9445ce69 --- /dev/null +++ b/src/tests/resources/elasticsearch/dataset_search.json @@ -0,0 +1,36 @@ +{ + "took" : 5, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "dataset", + "_id" : "dataset_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "plain" : "A plain text description.", + "issn" : "20493630", + "type" : "dataset", + "html" : "An html description." + }, + "sort" : [ + 1 + ] + } + ] + } +} diff --git a/src/tests/resources/elasticsearch/event_search.json b/src/tests/resources/elasticsearch/event_search.json new file mode 100644 index 00000000..302cdad1 --- /dev/null +++ b/src/tests/resources/elasticsearch/event_search.json @@ -0,0 +1,35 @@ +{ + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "event", + "_id" : "event_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "plain" : "A plain text description.", + "type" : "event", + "html" : "An html description." + }, + "sort" : [ + 1 + ] + } + ] + } +} diff --git a/src/tests/resources/elasticsearch/experiment_search.json b/src/tests/resources/elasticsearch/experiment_search.json new file mode 100644 index 00000000..dfceb200 --- /dev/null +++ b/src/tests/resources/elasticsearch/experiment_search.json @@ -0,0 +1,35 @@ +{ + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "experiment", + "_id" : "experiment_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "plain" : "A plain text description.", + "type" : "experiment", + "html" : "An html description." + }, + "sort" : [ + 1 + ] + } + ] + } +} diff --git a/src/tests/resources/elasticsearch/ml_model_search.json b/src/tests/resources/elasticsearch/ml_model_search.json new file mode 100644 index 00000000..ce6e8bc0 --- /dev/null +++ b/src/tests/resources/elasticsearch/ml_model_search.json @@ -0,0 +1,35 @@ +{ + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "ml_model", + "_id" : "ml_model_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "plain" : "A plain text description.", + "type" : "ml_model", + "html" : "An html description." + }, + "sort" : [ + 1 + ] + } + ] + } +} diff --git a/src/tests/resources/elasticsearch/news_search.json b/src/tests/resources/elasticsearch/news_search.json new file mode 100644 index 00000000..e08ee850 --- /dev/null +++ b/src/tests/resources/elasticsearch/news_search.json @@ -0,0 +1,37 @@ +{ + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "news", + "_id" : "news_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "headline" : "A headline.", + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "plain" : "A plain text description.", + "type" : "news", + "html" : "An html description.", + "alternative_headline" : "An alternative headline." + }, + "sort" : [ + 1 + ] + } + ] + } +} diff --git a/src/tests/resources/elasticsearch/organisation_search.json b/src/tests/resources/elasticsearch/organisation_search.json new file mode 100644 index 00000000..b1288671 --- /dev/null +++ b/src/tests/resources/elasticsearch/organisation_search.json @@ -0,0 +1,36 @@ +{ + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "organisation", + "_id" : "organisation_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "plain" : "A plain text description.", + "type" : "organisation", + "html" : "An html description.", + "legal_name" : "A legal name." + }, + "sort" : [ + 1 + ] + } + ] + } +} diff --git a/src/tests/resources/elasticsearch/project_search.json b/src/tests/resources/elasticsearch/project_search.json new file mode 100644 index 00000000..d83d3c6b --- /dev/null +++ b/src/tests/resources/elasticsearch/project_search.json @@ -0,0 +1,35 @@ +{ + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "project", + "_id" : "project_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "plain" : "A plain text description.", + "type" : "project", + "html" : "An html description." + }, + "sort" : [ + 1 + ] + } + ] + } +} diff --git a/src/tests/resources/elasticsearch/publication_search.json b/src/tests/resources/elasticsearch/publication_search.json new file mode 100644 index 00000000..ed56038b --- /dev/null +++ b/src/tests/resources/elasticsearch/publication_search.json @@ -0,0 +1,37 @@ +{ + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "publication", + "_id" : "publication_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "plain" : "A plain text description.", + "issn" : "20493630", + "type" : "publication", + "isbn" : "9783161484100", + "html" : "An html description." + }, + "sort" : [ + 1 + ] + } + ] + } +} diff --git a/src/tests/resources/elasticsearch/service_search.json b/src/tests/resources/elasticsearch/service_search.json new file mode 100644 index 00000000..f2a151af --- /dev/null +++ b/src/tests/resources/elasticsearch/service_search.json @@ -0,0 +1,36 @@ +{ + "took" : 2, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "service", + "_id" : "service_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "slogan" : "A slogan.", + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "plain" : "A plain text description.", + "type" : "service", + "html" : "An html description." + }, + "sort" : [ + 1 + ] + } + ] + } +} diff --git a/src/tests/routers/search_routers/__init__.py b/src/tests/routers/search_routers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/tests/routers/search_routers/test_search_routers.py b/src/tests/routers/search_routers/test_search_routers.py new file mode 100644 index 00000000..404717f6 --- /dev/null +++ b/src/tests/routers/search_routers/test_search_routers.py @@ -0,0 +1,145 @@ +import os +import json +import pytest + +from unittest.mock import Mock + +from elasticsearch import Elasticsearch +from starlette.testclient import TestClient + +from routers.search_routers.elasticsearch import ElasticsearchSingleton +from tests.testutils.paths import path_test_resources +import routers.search_routers as sr + + +@pytest.mark.parametrize("search_router", sr.router_list) +def test_search_happy_path(client: TestClient, search_router): + """Tests the search router""" + mocked_elasticsearch = Elasticsearch("https://example.com:9200") + ElasticsearchSingleton().patch(mocked_elasticsearch) + + resources_path = os.path.join(path_test_resources(), "elasticsearch") + resource_file = f"{search_router.es_index}_search.json" + mocked_file = os.path.join(resources_path, resource_file) + with open(mocked_file, "r") as f: + mocked_results = json.load(f) + + mocked_elasticsearch.search = Mock(return_value=mocked_results) + search_service = f"/search/{search_router.resource_name_plural}/v1" + params = {"search_query": "description", "get_all": False} + response = client.get(search_service, params=params) + + assert response.status_code == 200, response.json() + resource = response.json()["resources"][0] + + assert resource["identifier"] == 1 + assert resource["name"] == "A name." + assert resource["description"]["plain"] == "A plain text description." + assert resource["description"]["html"] == "An html description." + assert resource["aiod_entry"]["date_modified"] == "2023-09-01T00:00:00+00:00" + assert resource["aiod_entry"]["status"] is None + + global_fields = {"name", "plain", "html"} + extra_fields = list(search_router.indexed_fields ^ global_fields) + for field in extra_fields: + assert resource[field] + + +@pytest.mark.parametrize("search_router", sr.router_list) +def test_search_bad_platform(client: TestClient, search_router): + """Tests the search router bad platform error""" + mocked_elasticsearch = Elasticsearch("https://example.com:9200") + ElasticsearchSingleton().patch(mocked_elasticsearch) + + resources_path = os.path.join(path_test_resources(), "elasticsearch") + resource_file = f"{search_router.es_index}_search.json" + mocked_file = os.path.join(resources_path, resource_file) + with open(mocked_file, "r") as f: + mocked_results = json.load(f) + + mocked_elasticsearch.search = Mock(return_value=mocked_results) + search_service = f"/search/{search_router.resource_name_plural}/v1" + params = {"search_query": "description", "platforms": ["bad_platform"]} + response = client.get(search_service, params=params) + + assert response.status_code == 400, response.json() + err_msg = "The available platforms are" + assert response.json()["detail"][: len(err_msg)] == err_msg + + +@pytest.mark.parametrize("search_router", sr.router_list) +def test_search_bad_fields(client: TestClient, search_router): + """Tests the search router bad fields error""" + mocked_elasticsearch = Elasticsearch("https://example.com:9200") + ElasticsearchSingleton().patch(mocked_elasticsearch) + + resources_path = os.path.join(path_test_resources(), "elasticsearch") + resource_file = f"{search_router.es_index}_search.json" + mocked_file = os.path.join(resources_path, resource_file) + with open(mocked_file, "r") as f: + mocked_results = json.load(f) + + mocked_elasticsearch.search = Mock(return_value=mocked_results) + search_service = f"/search/{search_router.resource_name_plural}/v1" + params = {"search_query": "description", "search_fields": ["bad_field"]} + response = client.get(search_service, params=params) + + assert response.status_code == 400, response.json() + err_msg = "The available search fields for this entity are" + assert response.json()["detail"][: len(err_msg)] == err_msg + + +@pytest.mark.parametrize("search_router", sr.router_list) +def test_search_bad_limit(client: TestClient, search_router): + """Tests the search router bad fields error""" + mocked_elasticsearch = Elasticsearch("https://example.com:9200") + ElasticsearchSingleton().patch(mocked_elasticsearch) + + resources_path = os.path.join(path_test_resources(), "elasticsearch") + resource_file = f"{search_router.es_index}_search.json" + mocked_file = os.path.join(resources_path, resource_file) + with open(mocked_file, "r") as f: + mocked_results = json.load(f) + + mocked_elasticsearch.search = Mock(return_value=mocked_results) + search_service = f"/search/{search_router.resource_name_plural}/v1" + params = {"search_query": "description", "limit": 1001} + response = client.get(search_service, params=params) + + assert response.status_code == 422, response.json() + assert response.json()["detail"] == [ + { + "ctx": {"limit_value": 1000}, + "loc": ["query", "limit"], + "msg": "ensure this value is less than or equal to 1000", + "type": "value_error.number.not_le", + } + ] + + +@pytest.mark.parametrize("search_router", sr.router_list) +def test_search_bad_offset(client: TestClient, search_router): + """Tests the search router bad fields error""" + mocked_elasticsearch = Elasticsearch("https://example.com:9200") + ElasticsearchSingleton().patch(mocked_elasticsearch) + + resources_path = os.path.join(path_test_resources(), "elasticsearch") + resource_file = f"{search_router.es_index}_search.json" + mocked_file = os.path.join(resources_path, resource_file) + with open(mocked_file, "r") as f: + mocked_results = json.load(f) + + mocked_elasticsearch.search = Mock(return_value=mocked_results) + search_service = f"/search/{search_router.resource_name_plural}/v1" + params = {"search_query": "description", "offset": -1} + response = client.get(search_service, params=params) + + assert response.status_code == 422, response.json() + assert response.json()["detail"] == [ + { + "ctx": {"limit_value": 0}, + "loc": ["query", "offset"], + "msg": "ensure this value is greater than or equal to 0", + "type": "value_error.number.not_ge", + } + ]