diff --git a/docker-compose.yaml b/docker-compose.yaml index afc3770e..c7a40e73 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -44,8 +44,7 @@ services: depends_on: app: condition: service_healthy - - + deletion: build: context: deletion @@ -187,35 +186,25 @@ services: interval: 5s timeout: 30s retries: 30 - -# elasticsearch_setup: -# build: -# context: es/setup/ -# dockerfile: Dockerfile -# env_file: .env -# environment: -# - ES_USER=$ES_USER -# - ES_PASSWORD=$ES_PASSWORD -# restart: "no" -# depends_on: -# elasticsearch: -# condition: service_healthy - logstash_setup: - build: - context: logstash/setup/ - dockerfile: Dockerfile + es_logstash_setup: + image: ai4eu_server + container_name: es_logstash_setup env_file: .env environment: - MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD - ES_USER=$ES_USER - ES_PASSWORD=$ES_PASSWORD volumes: - - ./src/setup/logstash:/app + - ./src:/app - ./logstash:/logstash command: > - python generate_logstash_config_files.py + /bin/bash -c "python setup/logstash/generate_logstash_config_files.py && + python setup/elasticsearch/generate_elasticsearch_indices.py" restart: "no" + depends_on: + elasticsearch: + condition: service_healthy logstash: build: @@ -248,13 +237,7 @@ services: target: /usr/share/logstash/sql read_only: true depends_on: - app: - condition: service_healthy - elasticsearch: - condition: service_healthy -# elasticsearch_setup: -# condition: service_completed_successfully - logstash_setup: - condition: service_completed_successfully fill-db-with-examples: condition: service_completed_successfully + es_logstash_setup: + condition: service_completed_successfully diff --git a/es/setup/Dockerfile b/es/setup/Dockerfile deleted file mode 100644 index 1e9c0bef..00000000 --- a/es/setup/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM ubuntu:22.04 - -RUN apt-get update && apt-get install -y curl - -COPY dataset.json /dataset.json -COPY event.json /event.json -COPY experiment.json /experiment.json -COPY ml_model.json /ml_model.json -COPY news.json /news.json -COPY organisation.json /organisation.json -COPY project.json /project.json -COPY publication.json /publication.json -COPY service.json /service.json -COPY create_indices.sh /create_indices.sh - -ENTRYPOINT ["/bin/bash", "/create_indices.sh"] - diff --git a/es/setup/create_indices.sh b/es/setup/create_indices.sh deleted file mode 100755 index cb0cb43a..00000000 --- a/es/setup/create_indices.sh +++ /dev/null @@ -1,9 +0,0 @@ -curl -u ${ES_USER}:${ES_PASSWORD} -X PUT elasticsearch:9200/dataset?pretty -H 'Content-Type: application/json' -d @/dataset.json -curl -u ${ES_USER}:${ES_PASSWORD} -X PUT elasticsearch:9200/event?pretty -H 'Content-Type: application/json' -d @/event.json -curl -u ${ES_USER}:${ES_PASSWORD} -X PUT elasticsearch:9200/experiment?pretty -H 'Content-Type: application/json' -d @/experiment.json -curl -u ${ES_USER}:${ES_PASSWORD} -X PUT elasticsearch:9200/ml_model?pretty -H 'Content-Type: application/json' -d @/ml_model.json -curl -u ${ES_USER}:${ES_PASSWORD} -X PUT elasticsearch:9200/news?pretty -H 'Content-Type: application/json' -d @/news.json -curl -u ${ES_USER}:${ES_PASSWORD} -X PUT elasticsearch:9200/organisation?pretty -H 'Content-Type: application/json' -d @/organisation.json -curl -u ${ES_USER}:${ES_PASSWORD} -X PUT elasticsearch:9200/project?pretty -H 'Content-Type: application/json' -d @/project.json -curl -u ${ES_USER}:${ES_PASSWORD} -X PUT elasticsearch:9200/publication?pretty -H 'Content-Type: application/json' -d @/publication.json -curl -u ${ES_USER}:${ES_PASSWORD} -X PUT elasticsearch:9200/service?pretty -H 'Content-Type: application/json' -d @/service.json diff --git a/es/setup/dataset.json b/es/setup/dataset.json deleted file mode 100644 index 395f68c8..00000000 --- a/es/setup/dataset.json +++ /dev/null @@ -1,159 +0,0 @@ -{ - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date", - "index" : false - }, - "@version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "asset_identifier" : { - "type" : "long", - "index" : false - }, - "date_created" : { - "type" : "date", - "index" : false - }, - "date_modified" : { - "type" : "date" - }, - "date_published" : { - "type" : "date", - "index" : false - }, - "description" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "identifier" : { - "type" : "long", - "index" : false - }, - "issn" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "license" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "measurement_technique" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "resource_identifier" : { - "type" : "long", - "index" : false - }, - "same_as" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "status" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "temporal_coverage" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - } - } - } -} diff --git a/es/setup/event.json b/es/setup/event.json deleted file mode 100644 index 2deb13ef..00000000 --- a/es/setup/event.json +++ /dev/null @@ -1,152 +0,0 @@ -{ - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date", - "index" : false - }, - "@version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "date_created" : { - "type" : "date", - "index" : false - }, - "date_modified" : { - "type" : "date" - }, - "description" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "end_date" : { - "type" : "date", - "index" : false - }, - "identifier" : { - "type" : "long", - "index" : false - }, - "mode" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "organiser_identifier" : { - "type" : "long", - "index" : false - }, - "organiser_type" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "registration_link" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "resource_identifier" : { - "type" : "long", - "index" : false - }, - "same_as" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "schedule" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "start_date" : { - "type" : "date", - "index" : false - }, - "status" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - } - } - } -} diff --git a/es/setup/experiment.json b/es/setup/experiment.json deleted file mode 100644 index 4787149b..00000000 --- a/es/setup/experiment.json +++ /dev/null @@ -1,161 +0,0 @@ -{ - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date", - "index" : false - }, - "@version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "asset_identifier" : { - "type" : "long", - "index" : false - }, - "date_created" : { - "type" : "date", - "index" : false - }, - "date_modified" : { - "type" : "date" - }, - "date_published" : { - "type" : "date", - "index" : false - }, - "description" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "execution_settings" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "experimental_workflow" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "identifier" : { - "type" : "long", - "index" : false - }, - "license" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "reproducibility_explanation" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "resource_identifier" : { - "type" : "long", - "index" : false - }, - "same_as" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "status" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - } - } - } -} - diff --git a/es/setup/ml_model.json b/es/setup/ml_model.json deleted file mode 100644 index d847b674..00000000 --- a/es/setup/ml_model.json +++ /dev/null @@ -1,141 +0,0 @@ -{ - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date", - "index" : false - }, - "@version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "asset_identifier" : { - "type" : "long", - "index" : false - }, - "date_created" : { - "type" : "date", - "index" : false - }, - "date_modified" : { - "type" : "date" - }, - "date_published" : { - "type" : "date", - "index" : false - }, - "description" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "identifier" : { - "type" : "long", - "index" : false - }, - "license" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "ml_model_type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "resource_identifier" : { - "type" : "long", - "index" : false - }, - "same_as" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "status" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - } - } - } -} - diff --git a/es/setup/news.json b/es/setup/news.json deleted file mode 100644 index 7349acad..00000000 --- a/es/setup/news.json +++ /dev/null @@ -1,120 +0,0 @@ -{ - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date", - "index" : false - }, - "@version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "alternative_headline" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "date_created" : { - "type" : "date", - "index" : false - }, - "date_modified" : { - "type" : "date" - }, - "description" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "headline" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "identifier" : { - "type" : "long", - "index" : false - }, - "name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "resource_identifier" : { - "type" : "long", - "index" : false - }, - "same_as" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "status" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - } - } - } -} diff --git a/es/setup/organisation.json b/es/setup/organisation.json deleted file mode 100644 index 9d02c7b3..00000000 --- a/es/setup/organisation.json +++ /dev/null @@ -1,136 +0,0 @@ -{ - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date", - "index" : false - }, - "@version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "agent" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "date_created" : { - "type" : "date", - "index" : false - }, - "date_founded" : { - "type" : "date", - "format": "yyyy-MM-dd", - "index" : false - }, - "date_modified" : { - "type" : "date" - }, - "description" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "identifier" : { - "type" : "long", - "index" : false - }, - "legal_name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "organisation_type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "resource_identifier" : { - "type" : "long", - "index" : false - }, - "same_as" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "status" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - } - } - } -} diff --git a/es/setup/project.json b/es/setup/project.json deleted file mode 100644 index 74dfb9cd..00000000 --- a/es/setup/project.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date", - "index" : false - }, - "@version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "coordinator_identifier" : { - "type" : "long", - "index" : false - }, - "coordinator_name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "date_created" : { - "type" : "date", - "index" : false - }, - "date_modified" : { - "type" : "date" - }, - "description" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "end_date" : { - "type" : "date", - "index" : false - }, - "identifier" : { - "type" : "long", - "index" : false - }, - "name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "resource_identifier" : { - "type" : "long", - "index" : false - }, - "same_as" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "start_date" : { - "type" : "date", - "index" : false - }, - "status" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "total_cost_euro" : { - "type" : "float", - "index" : false - }, - "type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - } - } - } -} diff --git a/es/setup/publication.json b/es/setup/publication.json deleted file mode 100644 index 5d07ad11..00000000 --- a/es/setup/publication.json +++ /dev/null @@ -1,168 +0,0 @@ -{ - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date", - "index" : false - }, - "@version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "asset_identifier" : { - "type" : "long", - "index" : false - }, - "date_created" : { - "type" : "date", - "index" : false - }, - "date_modified" : { - "type" : "date" - }, - "date_published" : { - "type" : "date", - "index" : false - }, - "description" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "identifier" : { - "type" : "long", - "index" : false - }, - "isbn" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "issn" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "license" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "permanent_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "publication_type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "resource_identifier" : { - "type" : "long", - "index" : false - }, - "same_as" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "status" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - } - } - } -} diff --git a/es/setup/service.json b/es/setup/service.json deleted file mode 100644 index 0fc18eaf..00000000 --- a/es/setup/service.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date", - "index" : false - }, - "@version" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "date_created" : { - "type" : "date", - "index" : false - }, - "date_modified" : { - "type" : "date" - }, - "description" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "identifier" : { - "type" : "long", - "index" : false - }, - "name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "platform_identifier" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "resource_identifier" : { - "type" : "long", - "index" : false - }, - "same_as" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "slogan" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "status" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "terms_of_service" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "type" : { - "type" : "text", - "index" : false, - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - } - } - } -} - diff --git a/logstash/config/logstash.yml b/logstash/config/logstash.yml index f6f2926e..8b137891 100644 --- a/logstash/config/logstash.yml +++ b/logstash/config/logstash.yml @@ -1,7 +1 @@ -# This file has been generated by `logstash_config.py` file -# --------------------------------------------------------- -http.host: "0.0.0.0" -xpack.monitoring.elasticsearch.hosts: [ "http://elasticsearch:9200" ] -xpack.monitoring.enabled: true -xpack.monitoring.elasticsearch.username: elastic -xpack.monitoring.elasticsearch.password: changeme + diff --git a/logstash/pipeline/conf/init_table.conf b/logstash/pipeline/conf/init_table.conf deleted file mode 100644 index 32dda9f1..00000000 --- a/logstash/pipeline/conf/init_table.conf +++ /dev/null @@ -1,215 +0,0 @@ -# This file has been generated by `logstash_config.py` file -# --------------------------------------------------------- -input { - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_dataset.sql" - type => "dataset" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_event.sql" - type => "event" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_experiment.sql" - type => "experiment" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_ml_model.sql" - type => "ml_model" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_news.sql" - type => "news" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_organisation.sql" - type => "organisation" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_project.sql" - type => "project" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_publication.sql" - type => "publication" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_service.sql" - type => "service" - } -} -filter { - if ![application_area] { - mutate { - replace => {"application_area" => ""} - } - } - mutate { - remove_field => ["@version", "@timestamp"] - split => {"application_area" => ","} - } - if [type] == "organisation" { - ruby { - code => ' - t = Time.at(event.get("date_founded").to_f) - event.set("date_founded", t.strftime("%Y-%m-%d")) - ' - } - } - -} -output { - if [type] == "dataset" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "dataset" - document_id => "dataset_%{identifier}" - } - } - if [type] == "event" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "event" - document_id => "event_%{identifier}" - } - } - if [type] == "experiment" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "experiment" - document_id => "experiment_%{identifier}" - } - } - if [type] == "ml_model" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "ml_model" - document_id => "ml_model_%{identifier}" - } - } - if [type] == "news" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "news" - document_id => "news_%{identifier}" - } - } - if [type] == "organisation" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "organisation" - document_id => "organisation_%{identifier}" - } - } - if [type] == "project" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "project" - document_id => "project_%{identifier}" - } - } - if [type] == "publication" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "publication" - document_id => "publication_%{identifier}" - } - } - if [type] == "service" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "service" - document_id => "service_%{identifier}" - } - } -} diff --git a/logstash/pipeline/conf/sync_table.conf b/logstash/pipeline/conf/sync_table.conf deleted file mode 100644 index 76c76fcf..00000000 --- a/logstash/pipeline/conf/sync_table.conf +++ /dev/null @@ -1,449 +0,0 @@ -# This file has been generated by `logstash_config.py` file -# --------------------------------------------------------- -input { - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_dataset.sql" - type => "dataset" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_dataset.sql" - type => "rm_dataset" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_event.sql" - type => "event" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_event.sql" - type => "rm_event" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_experiment.sql" - type => "experiment" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_experiment.sql" - type => "rm_experiment" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_ml_model.sql" - type => "ml_model" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_ml_model.sql" - type => "rm_ml_model" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_news.sql" - type => "news" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_news.sql" - type => "rm_news" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_organisation.sql" - type => "organisation" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_organisation.sql" - type => "rm_organisation" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_project.sql" - type => "project" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_project.sql" - type => "rm_project" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_publication.sql" - type => "publication" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_publication.sql" - type => "rm_publication" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_service.sql" - type => "service" - } - jdbc { - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "root" - jdbc_password => "ok" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_service.sql" - type => "rm_service" - } -} -filter { - if ![application_area] { - mutate { - replace => {"application_area" => ""} - } - } - mutate { - remove_field => ["@version", "@timestamp"] - split => {"application_area" => ","} - } - if [type] == "organisation" or [type] == "rm_organisation" { - ruby { - code => ' - t = Time.at(event.get("date_founded").to_f) - event.set("date_founded", t.strftime("%Y-%m-%d")) - ' - } - } - -} -output { - if [type] == "dataset" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "dataset" - document_id => "dataset_%{identifier}" - } - } - if [type] == "rm_dataset" { - elasticsearch { - action => "delete" - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "dataset" - document_id => "dataset_%{identifier}" - } - } - if [type] == "event" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "event" - document_id => "event_%{identifier}" - } - } - if [type] == "rm_event" { - elasticsearch { - action => "delete" - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "event" - document_id => "event_%{identifier}" - } - } - if [type] == "experiment" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "experiment" - document_id => "experiment_%{identifier}" - } - } - if [type] == "rm_experiment" { - elasticsearch { - action => "delete" - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "experiment" - document_id => "experiment_%{identifier}" - } - } - if [type] == "ml_model" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "ml_model" - document_id => "ml_model_%{identifier}" - } - } - if [type] == "rm_ml_model" { - elasticsearch { - action => "delete" - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "ml_model" - document_id => "ml_model_%{identifier}" - } - } - if [type] == "news" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "news" - document_id => "news_%{identifier}" - } - } - if [type] == "rm_news" { - elasticsearch { - action => "delete" - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "news" - document_id => "news_%{identifier}" - } - } - if [type] == "organisation" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "organisation" - document_id => "organisation_%{identifier}" - } - } - if [type] == "rm_organisation" { - elasticsearch { - action => "delete" - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "organisation" - document_id => "organisation_%{identifier}" - } - } - if [type] == "project" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "project" - document_id => "project_%{identifier}" - } - } - if [type] == "rm_project" { - elasticsearch { - action => "delete" - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "project" - document_id => "project_%{identifier}" - } - } - if [type] == "publication" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "publication" - document_id => "publication_%{identifier}" - } - } - if [type] == "rm_publication" { - elasticsearch { - action => "delete" - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "publication" - document_id => "publication_%{identifier}" - } - } - if [type] == "service" { - elasticsearch { - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "service" - document_id => "service_%{identifier}" - } - } - if [type] == "rm_service" { - elasticsearch { - action => "delete" - hosts => "elasticsearch:9200" - user => "elastic" - password => "changeme" - ecs_compatibility => disabled - index => "service" - document_id => "service_%{identifier}" - } - } -} diff --git a/logstash/pipeline/sql/init_dataset.sql b/logstash/pipeline/sql/init_dataset.sql deleted file mode 100644 index 7b66724b..00000000 --- a/logstash/pipeline/sql/init_dataset.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, dataset.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', issn -FROM aiod.dataset -INNER JOIN aiod.aiod_entry ON aiod.dataset.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.dataset.description_identifier=aiod.text.identifier -WHERE aiod.dataset.date_deleted IS NULL diff --git a/logstash/pipeline/sql/init_event.sql b/logstash/pipeline/sql/init_event.sql deleted file mode 100644 index 19882005..00000000 --- a/logstash/pipeline/sql/init_event.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, event.identifier, name, description_identifier, text.plain, text.html -FROM aiod.event -INNER JOIN aiod.aiod_entry ON aiod.event.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.event.description_identifier=aiod.text.identifier -WHERE aiod.event.date_deleted IS NULL diff --git a/logstash/pipeline/sql/init_experiment.sql b/logstash/pipeline/sql/init_experiment.sql deleted file mode 100644 index 045cfd5b..00000000 --- a/logstash/pipeline/sql/init_experiment.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, experiment.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html' -FROM aiod.experiment -INNER JOIN aiod.aiod_entry ON aiod.experiment.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.experiment.description_identifier=aiod.text.identifier -WHERE aiod.experiment.date_deleted IS NULL diff --git a/logstash/pipeline/sql/init_ml_model.sql b/logstash/pipeline/sql/init_ml_model.sql deleted file mode 100644 index 8aa9a400..00000000 --- a/logstash/pipeline/sql/init_ml_model.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, ml_model.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html' -FROM aiod.ml_model -INNER JOIN aiod.aiod_entry ON aiod.ml_model.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.ml_model.description_identifier=aiod.text.identifier -WHERE aiod.ml_model.date_deleted IS NULL diff --git a/logstash/pipeline/sql/init_news.sql b/logstash/pipeline/sql/init_news.sql deleted file mode 100644 index a53cd961..00000000 --- a/logstash/pipeline/sql/init_news.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, news.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', headline, alternative_headline -FROM aiod.news -INNER JOIN aiod.aiod_entry ON aiod.news.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.news.description_identifier=aiod.text.identifier -WHERE aiod.news.date_deleted IS NULL diff --git a/logstash/pipeline/sql/init_organisation.sql b/logstash/pipeline/sql/init_organisation.sql deleted file mode 100644 index 4272025b..00000000 --- a/logstash/pipeline/sql/init_organisation.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, organisation.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', legal_name -FROM aiod.organisation -INNER JOIN aiod.aiod_entry ON aiod.organisation.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.organisation.description_identifier=aiod.text.identifier -WHERE aiod.organisation.date_deleted IS NULL diff --git a/logstash/pipeline/sql/init_project.sql b/logstash/pipeline/sql/init_project.sql deleted file mode 100644 index 6d58918f..00000000 --- a/logstash/pipeline/sql/init_project.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, project.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html' -FROM aiod.project -INNER JOIN aiod.aiod_entry ON aiod.project.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.project.description_identifier=aiod.text.identifier -WHERE aiod.project.date_deleted IS NULL diff --git a/logstash/pipeline/sql/init_publication.sql b/logstash/pipeline/sql/init_publication.sql deleted file mode 100644 index 6da544db..00000000 --- a/logstash/pipeline/sql/init_publication.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, publication.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', issn, isbn -FROM aiod.publication -INNER JOIN aiod.aiod_entry ON aiod.publication.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.publication.description_identifier=aiod.text.identifier -WHERE aiod.publication.date_deleted IS NULL diff --git a/logstash/pipeline/sql/init_service.sql b/logstash/pipeline/sql/init_service.sql deleted file mode 100644 index c41dfbdf..00000000 --- a/logstash/pipeline/sql/init_service.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, service.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', slogan -FROM aiod.service -INNER JOIN aiod.aiod_entry ON aiod.service.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.service.description_identifier=aiod.text.identifier -WHERE aiod.service.date_deleted IS NULL diff --git a/logstash/pipeline/sql/rm_dataset.sql b/logstash/pipeline/sql/rm_dataset.sql deleted file mode 100644 index da1c359f..00000000 --- a/logstash/pipeline/sql/rm_dataset.sql +++ /dev/null @@ -1,5 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT dataset.identifier -FROM aiod.dataset -WHERE aiod.dataset.date_deleted IS NOT NULL AND aiod.dataset.date_deleted > :sql_last_value diff --git a/logstash/pipeline/sql/rm_event.sql b/logstash/pipeline/sql/rm_event.sql deleted file mode 100644 index 2bba4020..00000000 --- a/logstash/pipeline/sql/rm_event.sql +++ /dev/null @@ -1,5 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT event.identifier -FROM aiod.event -WHERE aiod.event.date_deleted IS NOT NULL AND aiod.event.date_deleted > :sql_last_value diff --git a/logstash/pipeline/sql/rm_experiment.sql b/logstash/pipeline/sql/rm_experiment.sql deleted file mode 100644 index 92ae7107..00000000 --- a/logstash/pipeline/sql/rm_experiment.sql +++ /dev/null @@ -1,5 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT experiment.identifier -FROM aiod.experiment -WHERE aiod.experiment.date_deleted IS NOT NULL AND aiod.experiment.date_deleted > :sql_last_value diff --git a/logstash/pipeline/sql/rm_ml_model.sql b/logstash/pipeline/sql/rm_ml_model.sql deleted file mode 100644 index 51d9e2df..00000000 --- a/logstash/pipeline/sql/rm_ml_model.sql +++ /dev/null @@ -1,5 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT ml_model.identifier -FROM aiod.ml_model -WHERE aiod.ml_model.date_deleted IS NOT NULL AND aiod.ml_model.date_deleted > :sql_last_value diff --git a/logstash/pipeline/sql/rm_news.sql b/logstash/pipeline/sql/rm_news.sql deleted file mode 100644 index 0f53c36a..00000000 --- a/logstash/pipeline/sql/rm_news.sql +++ /dev/null @@ -1,5 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT news.identifier -FROM aiod.news -WHERE aiod.news.date_deleted IS NOT NULL AND aiod.news.date_deleted > :sql_last_value diff --git a/logstash/pipeline/sql/rm_organisation.sql b/logstash/pipeline/sql/rm_organisation.sql deleted file mode 100644 index 160df96d..00000000 --- a/logstash/pipeline/sql/rm_organisation.sql +++ /dev/null @@ -1,5 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT organisation.identifier -FROM aiod.organisation -WHERE aiod.organisation.date_deleted IS NOT NULL AND aiod.organisation.date_deleted > :sql_last_value diff --git a/logstash/pipeline/sql/rm_project.sql b/logstash/pipeline/sql/rm_project.sql deleted file mode 100644 index 486988bd..00000000 --- a/logstash/pipeline/sql/rm_project.sql +++ /dev/null @@ -1,5 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT project.identifier -FROM aiod.project -WHERE aiod.project.date_deleted IS NOT NULL AND aiod.project.date_deleted > :sql_last_value diff --git a/logstash/pipeline/sql/rm_publication.sql b/logstash/pipeline/sql/rm_publication.sql deleted file mode 100644 index 980ab79e..00000000 --- a/logstash/pipeline/sql/rm_publication.sql +++ /dev/null @@ -1,5 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT publication.identifier -FROM aiod.publication -WHERE aiod.publication.date_deleted IS NOT NULL AND aiod.publication.date_deleted > :sql_last_value diff --git a/logstash/pipeline/sql/rm_service.sql b/logstash/pipeline/sql/rm_service.sql deleted file mode 100644 index 82d24c1c..00000000 --- a/logstash/pipeline/sql/rm_service.sql +++ /dev/null @@ -1,5 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT service.identifier -FROM aiod.service -WHERE aiod.service.date_deleted IS NOT NULL AND aiod.service.date_deleted > :sql_last_value diff --git a/logstash/pipeline/sql/sync_dataset.sql b/logstash/pipeline/sql/sync_dataset.sql deleted file mode 100644 index f5440b8f..00000000 --- a/logstash/pipeline/sql/sync_dataset.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, dataset.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', issn -FROM aiod.dataset -INNER JOIN aiod.aiod_entry ON aiod.dataset.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.dataset.description_identifier=aiod.text.identifier -WHERE aiod.dataset.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value diff --git a/logstash/pipeline/sql/sync_event.sql b/logstash/pipeline/sql/sync_event.sql deleted file mode 100644 index 8dcac2f7..00000000 --- a/logstash/pipeline/sql/sync_event.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, event.identifier, name, description_identifier, text.plain, text.html -FROM aiod.event -INNER JOIN aiod.aiod_entry ON aiod.event.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.event.description_identifier=aiod.text.identifier -WHERE aiod.event.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value diff --git a/logstash/pipeline/sql/sync_experiment.sql b/logstash/pipeline/sql/sync_experiment.sql deleted file mode 100644 index a71d0b19..00000000 --- a/logstash/pipeline/sql/sync_experiment.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, experiment.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html' -FROM aiod.experiment -INNER JOIN aiod.aiod_entry ON aiod.experiment.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.experiment.description_identifier=aiod.text.identifier -WHERE aiod.experiment.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value diff --git a/logstash/pipeline/sql/sync_ml_model.sql b/logstash/pipeline/sql/sync_ml_model.sql deleted file mode 100644 index 95c2f524..00000000 --- a/logstash/pipeline/sql/sync_ml_model.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, ml_model.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html' -FROM aiod.ml_model -INNER JOIN aiod.aiod_entry ON aiod.ml_model.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.ml_model.description_identifier=aiod.text.identifier -WHERE aiod.ml_model.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value diff --git a/logstash/pipeline/sql/sync_news.sql b/logstash/pipeline/sql/sync_news.sql deleted file mode 100644 index 575a9b48..00000000 --- a/logstash/pipeline/sql/sync_news.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, news.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', headline, alternative_headline -FROM aiod.news -INNER JOIN aiod.aiod_entry ON aiod.news.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.news.description_identifier=aiod.text.identifier -WHERE aiod.news.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value diff --git a/logstash/pipeline/sql/sync_organisation.sql b/logstash/pipeline/sql/sync_organisation.sql deleted file mode 100644 index 588f0a2b..00000000 --- a/logstash/pipeline/sql/sync_organisation.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, organisation.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', legal_name -FROM aiod.organisation -INNER JOIN aiod.aiod_entry ON aiod.organisation.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.organisation.description_identifier=aiod.text.identifier -WHERE aiod.organisation.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value diff --git a/logstash/pipeline/sql/sync_project.sql b/logstash/pipeline/sql/sync_project.sql deleted file mode 100644 index 9a9c0a92..00000000 --- a/logstash/pipeline/sql/sync_project.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, project.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html' -FROM aiod.project -INNER JOIN aiod.aiod_entry ON aiod.project.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.project.description_identifier=aiod.text.identifier -WHERE aiod.project.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value diff --git a/logstash/pipeline/sql/sync_publication.sql b/logstash/pipeline/sql/sync_publication.sql deleted file mode 100644 index 22abd17d..00000000 --- a/logstash/pipeline/sql/sync_publication.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, publication.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', issn, isbn -FROM aiod.publication -INNER JOIN aiod.aiod_entry ON aiod.publication.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.publication.description_identifier=aiod.text.identifier -WHERE aiod.publication.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value diff --git a/logstash/pipeline/sql/sync_service.sql b/logstash/pipeline/sql/sync_service.sql deleted file mode 100644 index 1f8ccea7..00000000 --- a/logstash/pipeline/sql/sync_service.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file has been generated by `logstash_config.py` file --- --------------------------------------------------------- -SELECT aiod_entry.date_modified, service.identifier, name, description_identifier, text.plain as 'plain', text.html as 'html', slogan -FROM aiod.service -INNER JOIN aiod.aiod_entry ON aiod.service.aiod_entry_identifier=aiod.aiod_entry.identifier -LEFT JOIN aiod.text ON aiod.service.description_identifier=aiod.text.identifier -WHERE aiod.service.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value diff --git a/logstash/setup/Dockerfile b/logstash/setup/Dockerfile deleted file mode 100644 index a1048220..00000000 --- a/logstash/setup/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM python:3.11-slim-bullseye - -# RUN apt-get update && apt-get -y install pip && pip install Jinja2 - -WORKDIR /app \ No newline at end of file diff --git a/src/routers/search_router.py b/src/routers/search_router.py index 2f53e912..2b3be77e 100644 --- a/src/routers/search_router.py +++ b/src/routers/search_router.py @@ -151,10 +151,8 @@ def search( from_=from_, size=limit, sort=SORT) total_hits = result["hits"]["total"]["value"] - next_offset = ( - result["hits"]["hits"][-1]["sort"] - if len(result["hits"]["hits"]) > 0 else None - ) + next_offset = (result["hits"]["hits"][-1]["sort"] + if len(result["hits"]["hits"]) > 0 else None) if get_all: # Launch database query diff --git a/src/setup/elasticsearch/generate_elasticsearch_indices.py b/src/setup/elasticsearch/generate_elasticsearch_indices.py new file mode 100755 index 00000000..603a2591 --- /dev/null +++ b/src/setup/elasticsearch/generate_elasticsearch_indices.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +"""Generates the elasticsearch indices + +Launched by the es_logstash_setup container in the docker-compose file. +""" + +import os +import copy +from elasticsearch import Elasticsearch + +from routers.search_routers import router_list + +BASE_MAPPING = { + "mappings" : { + "properties" : { + "date_modified" : { + "type" : "date" + }, + "identifier" : { + "type" : "long" + }, + "name" : { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword" + } + } + }, + "plain" : { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword" + } + } + }, + "html" : { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword" + } + } + } + } + } +} + +def add_field(base_mapping, field): + new_mapping = copy.deepcopy(base_mapping) + new_mapping["mappings"]["properties"][field] = {"type": "text", "fields": {"keyword": {"type": "keyword"}}} + return new_mapping + +def generate_mapping(entity, fields): + mapping = BASE_MAPPING + for field in fields: + mapping = add_field(mapping, field) + return mapping + +def main(): + + # Generate client + es_user = os.environ['ES_USER'] + es_password = os.environ['ES_PASSWORD'] + es_client = Elasticsearch("http://elasticsearch:9200", + basic_auth=(es_user, es_password)) + + # Search for entities and their extra fields + global_fields = set(['name', 'plain', 'html']) + entities = {} + for router in router_list: + extra_fields = list(router.match_fields^global_fields) + entities[router.es_index] = extra_fields + + # Add indices with mappings + for entity, fields in entities.items(): + mapping = generate_mapping(entity, fields) + print(f"{entity}: {mapping}") + es_client.indices.create(index=entity, body=mapping, ignore=400) + +if __name__ == "__main__": + main() diff --git a/src/setup/logstash/generate_logstash_config_files.py b/src/setup/logstash/generate_logstash_config_files.py index 2fc7a57f..f960c6d1 100755 --- a/src/setup/logstash/generate_logstash_config_files.py +++ b/src/setup/logstash/generate_logstash_config_files.py @@ -1,32 +1,28 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import os +"""Generates the logstash configuration and pipelines files -# PATH MACROS -# ============================================================================= +This file generates the logstash configuration file in logstash/config, the +pipelines configuration files in logstash/pipelines/conf and the pipelines +sql sentences in logstash/pipelines/sql. -# Repository base path -#REPO_PATH = os.path.join("..", "..", "..") +Launched by the es_logstash_setup container in the docker-compose file. +""" -FIELDS = { - "dataset": ["aiod_entry.date_modified", "dataset.identifier", "name", "description_identifier", "text.plain as 'plain'", "text.html as 'html'", "issn"], - "event": ["aiod_entry.date_modified", "event.identifier", "name", "description_identifier", "text.plain", "text.html",], - "experiment": ["aiod_entry.date_modified", "experiment.identifier", "name", "description_identifier", "text.plain as 'plain'", "text.html as 'html'"], - "ml_model": ["aiod_entry.date_modified", "ml_model.identifier", "name", "description_identifier", "text.plain as 'plain'", "text.html as 'html'"], - "news": ["aiod_entry.date_modified", "news.identifier", "name", "description_identifier", "text.plain as 'plain'", "text.html as 'html'", "headline", - "alternative_headline"], - "organisation": ["aiod_entry.date_modified", "organisation.identifier", "name", "description_identifier", "text.plain as 'plain'", "text.html as 'html'", "legal_name"], - "project": ["aiod_entry.date_modified", "project.identifier", "name", "description_identifier", "text.plain as 'plain'", "text.html as 'html'"], - "publication": ["aiod_entry.date_modified", "publication.identifier", "name", "description_identifier", "text.plain as 'plain'", "text.html as 'html'", "issn", "isbn"], - "service": ["aiod_entry.date_modified", "service.identifier", "name", "description_identifier", "text.plain as 'plain'", "text.html as 'html'", "slogan"] -} +import os + +from routers.search_routers import router_list # MACROS FOR THE DOCUMENTS GENERATION FUNCTIONS # ============================================================================= -INFO = """{0} This file has been generated by `logstash_config.py` file -{0} --------------------------------------------------------- +BASE_FIELDS = ["{0}.identifier", "{0}.name", "text.plain as 'plain'", + "text.html as 'html'", "aiod_entry.date_modified"] + +INFO = """{0} This file has been generated by `generate_logstash_config.py` +{0} file, placed in `src/setup/logstash` +{0} ------------------------------------------------------------- """ CONF_BASE = """http.host: "0.0.0.0" @@ -77,32 +73,13 @@ }} """ -FILTER_BASE = """filter {{ - if ![application_area] {{ - mutate {{ - replace => {{"application_area" => ""}} - }} - }} - mutate {{ +FILTER = """filter { + mutate { remove_field => ["@version", "@timestamp"] - split => {{"application_area" => ","}} - }}{0} -}} -""" - -DATE_FILTER = """ - if [type] == "organisation" {0}{{ - ruby {{ - code => ' - t = Time.at(event.get("date_founded").to_f) - event.set("date_founded", t.strftime("%Y-%m-%d")) - ' - }} - }} + } +} """ -SYNC_DATE_FILTER_ADDON = """or [type] == "rm_organisation" """ - INIT_OUTPUT_BASE = """ if [type] == "{2}" {{ elasticsearch {{ hosts => "elasticsearch:9200" @@ -177,12 +154,10 @@ def generate_pipeline_conf_files(pipeline_conf_path, db_user, db_pass, if not sync: # init file file_path = os.path.join(pipeline_conf_path, "init_table.conf") input_base = INIT_INPUT_BASE - date_filter = DATE_FILTER.format("") output_base = INIT_OUTPUT_BASE else: # sync file file_path = os.path.join(pipeline_conf_path, "sync_table.conf") input_base = SYNC_INPUT_BASE - date_filter = DATE_FILTER.format(SYNC_DATE_FILTER_ADDON) output_base = SYNC_OUTPUT_BASE # Generate configuration file @@ -198,10 +173,7 @@ def generate_pipeline_conf_files(pipeline_conf_path, db_user, db_pass, f.write("}\n") # Filters - if "organisation" in entities: - f.write(FILTER_BASE.format(date_filter)) - else: - f.write(FILTER_BASE.format("")) + f.write(FILTER) # Output f.write("output {\n") @@ -209,7 +181,7 @@ def generate_pipeline_conf_files(pipeline_conf_path, db_user, db_pass, f.write(output_base.format(es_user, es_pass, entity)) f.write("}\n") -def generate_pipeline_sql_files(pipeline_sql_path, entity, sync=False): +def generate_pipeline_sql_files(pipeline_sql_path, entity, fields, sync=False): # Generate output file path if sync: @@ -229,7 +201,10 @@ def generate_pipeline_sql_files(pipeline_sql_path, entity, sync=False): else: where_clause = INIT_CLAUSE.format(entity) - f.write(SQL_BASE.format(entity, ", ".join(FIELDS[entity]), where_clause)) + # Generate field list + field_list = ", ".join(fields).format(entity) + + f.write(SQL_BASE.format(entity, field_list, where_clause)) def generate_pipeline_sql_rm_files(pipeline_sql_path, entity): @@ -248,9 +223,21 @@ def generate_pipeline_sql_rm_files(pipeline_sql_path, entity): # MAIN FUNCTION # ============================================================================= -def main(base_path, db_user, db_pass, es_user, es_pass, entities, - ai_asset_entities, attributes, type_entities, mode_entities, - status_entities, agent_entities, organisation_entities): +def main(): + + # Get configuration variables + base_path = "/logstash" + db_user = "root" + db_pass = os.environ['MYSQL_ROOT_PASSWORD'] + es_user = os.environ['ES_USER'] + es_pass = os.environ['ES_PASSWORD'] + + # Search for entities and their extra fields + global_fields = set(['name', 'plain', 'html']) + entities = {} + for router in router_list: + extra_fields = list(router.match_fields^global_fields) + entities[router.es_index] = BASE_FIELDS + extra_fields # Make configuration dir conf_path = os.path.join(base_path, "config") @@ -265,81 +252,19 @@ def main(base_path, db_user, db_pass, es_user, es_pass, entities, # Generate logstash configuration file generate_conf_file(conf_path, es_user, es_pass) - # Generate pipeline configuration init file + # Generate pipeline configuration init and sync files generate_pipeline_conf_files(pipeline_conf_path, db_user, db_pass, - es_user, es_pass, entities, sync=False) - - # Generate pipeline configuration sync file + es_user, es_pass, entities.keys(), sync=False) generate_pipeline_conf_files(pipeline_conf_path, db_user, db_pass, - es_user, es_pass, entities, sync=True) + es_user, es_pass, entities.keys(), sync=True) - # Generate SQL init and sync files - for entity in entities: - generate_pipeline_sql_files(pipeline_sql_path, entity, sync=False) - generate_pipeline_sql_files(pipeline_sql_path, entity, sync=True) - - # Generate SQL rm files - for entity in entities: + # Generate SQL init, sync and rm files + for entity, fields in entities.items(): + generate_pipeline_sql_files(pipeline_sql_path, entity, fields, + sync=False) + generate_pipeline_sql_files(pipeline_sql_path, entity, fields, + sync=True) generate_pipeline_sql_rm_files(pipeline_sql_path, entity) if __name__ == "__main__": - - # PATH MACROS - # ------------------------------------------------------------------------- - - # Repository base path -# repo_path = REPO_PATH - - # Configuration base path -# base_path = os.path.join(repo_path, "logstash") - base_path = "/logstash" - - # ------------------------------------------------------------------------- - - # Users and passwords - db_user = "root" - db_pass = os.environ['MYSQL_ROOT_PASSWORD'] - es_user = os.environ['ES_USER'] - es_pass = os.environ['ES_PASSWORD'] -# with open(os.path.join(repo_path, ".env"), "r") as f: -# for line in f: -# if "MYSQL_ROOT_PASSWORD" in line: -# db_pass = line.split("=")[1][:-1] -# if "ES_USER" in line: -# es_user = line.split("=")[1][:-1] -# if "ES_PASSWORD" in line: -# es_pass = line.split("=")[1][:-1] - - # Entities and attributes - entities = ["dataset", "event", "experiment", "ml_model", "news", - "organisation", "project", "publication", "service"] - ai_asset_entities = ["dataset", "experiment", "ml_model", "publication"] - attributes = { - "dataset": ["issn", "measurement_technique", "temporal_coverage"], - "event": ["start_date", "end_date", "schedule", "registration_link", - "organiser_identifier"], - "experiment": ["experimental_workflow", "execution_settings", - "reproducibility_explanation"], - "news": ["headline", "alternative_headline"], - "organisation": ["date_founded", "legal_name"], - "project": ["start_date", "end_date", "total_cost_euro", - "coordinator_identifier"], - "publication": ["permanent_identifier", "isbn", "issn", - "knowledge_asset_id AS `knowledge_asset_identifier`"], - "service": ["slogan", "terms_of_service"] - } - type_entities = ["ml_model", "organisation", "publication"] - mode_entities = ["event"] - status_entities = ["event"] - agent_entities = { - "event": ("organiser_identifier", "organiser_type"), - "organisation": ("agent_id", "agent") - } - organisation_entities = { - "project": ("coordinator_identifier", "coordinator_name") - } - - # Main function - main(base_path, db_user, db_pass, es_user, es_pass, entities, - ai_asset_entities, attributes, type_entities, mode_entities, - status_entities, agent_entities, organisation_entities) + main() diff --git a/src/setup/logstash/generate_logstash_config_files_bkp.py b/src/setup/logstash/generate_logstash_config_files_bkp.py deleted file mode 100755 index d80fb1ea..00000000 --- a/src/setup/logstash/generate_logstash_config_files_bkp.py +++ /dev/null @@ -1,454 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os - -# PATH MACROS -# ============================================================================= - -# Repository base path -#REPO_PATH = os.path.join("..", "..", "..") - -# MACROS FOR THE DOCUMENTS GENERATION FUNCTIONS -# ============================================================================= - -INFO = """{0} This file has been generated by `logstash_config.py` file -{0} --------------------------------------------------------- -""" - -CONF_BASE = """http.host: "0.0.0.0" -xpack.monitoring.elasticsearch.hosts: [ "http://elasticsearch:9200" ] -xpack.monitoring.enabled: true -xpack.monitoring.elasticsearch.username: {0} -xpack.monitoring.elasticsearch.password: {1} -""" - -INIT_INPUT_BASE = """ jdbc {{ - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "{0}" - jdbc_password => "{1}" - clean_run => true - record_last_run => false - statement_filepath => "/usr/share/logstash/sql/init_{2}.sql" - type => "{2}" - }} -""" - -SYNC_INPUT_BASE = """ jdbc {{ - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "{0}" - jdbc_password => "{1}" - use_column_value => true - tracking_column => "date_modified" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/sync_{2}.sql" - type => "{2}" - }} - jdbc {{ - jdbc_driver_library => "/usr/share/logstash/mysql-connector-j.jar" - jdbc_driver_class => "com.mysql.jdbc.Driver" - jdbc_connection_string => "jdbc:mysql://sqlserver:3306/aiod" - jdbc_user => "{0}" - jdbc_password => "{1}" - use_column_value => true - tracking_column => "date_deleted" - tracking_column_type => "timestamp" - schedule => "*/5 * * * * *" - statement_filepath => "/usr/share/logstash/sql/rm_{2}.sql" - type => "rm_{2}" - }} -""" - -FILTER_BASE = """filter {{ - if ![application_area] {{ - mutate {{ - replace => {{"application_area" => ""}} - }} - }} - mutate {{ - # remove_field => ["@version", "@timestamp"] - split => {{"application_area" => ","}} - }}{0} -}} -""" - -DATE_FILTER = """ - if [type] == "organisation" {0}{{ - ruby {{ - code => ' - t = Time.at(event.get("date_founded").to_f) - event.set("date_founded", t.strftime("%Y-%m-%d")) - ' - }} - }} -""" - -SYNC_DATE_FILTER_ADDON = """or [type] == "rm_organisation" """ - -INIT_OUTPUT_BASE = """ if [type] == "{2}" {{ - elasticsearch {{ - hosts => "elasticsearch:9200" - user => "{0}" - password => "{1}" - ecs_compatibility => disabled - index => "{2}" - document_id => "{2}_%{{identifier}}" - }} - }} -""" - -#TODO: TEST DELETE WITHOUT protocol => "transport" -SYNC_OUTPUT_BASE = """ if [type] == "{2}" {{ - elasticsearch {{ - hosts => "elasticsearch:9200" - user => "{0}" - password => "{1}" - ecs_compatibility => disabled - index => "{2}" - document_id => "{2}_%{{identifier}}" - }} - }} - if [type] == "rm_{2}" {{ - elasticsearch {{ - action => "delete" - hosts => "elasticsearch:9200" - user => "{0}" - password => "{1}" - ecs_compatibility => disabled - index => "{2}" - document_id => "{2}_%{{identifier}}" - }} - }} -""" - -SQL_BASE = """SELECT - -- Concept - {0}.identifier, - {0}.platform, - {0}.platform_identifier, - -- Concept.aiod_entry - status.name AS `status`, - aiod_entry.date_modified, - aiod_entry.date_created, - -- Resource - {0}.ai_resource_id AS `resource_identifier`, - {0}.name, - {0}.description, - {0}.same_as{1}{2}{3}{4}{5}{6}{7}, - -- Application Area - GROUP_CONCAT(application_area.name) AS `application_area` -FROM aiod.{0} -INNER JOIN aiod.aiod_entry ON aiod.{0}.aiod_entry_identifier=aiod.aiod_entry.identifier -INNER JOIN aiod.status ON aiod.aiod_entry.status_identifier=aiod.status.identifier{8} -LEFT JOIN aiod.{0}_application_area_link ON aiod.{0}_application_area_link.from_identifier=aiod.{0}.identifier -LEFT JOIN aiod.application_area ON aiod.{0}_application_area_link.linked_identifier=aiod.application_area.identifier{9} -GROUP BY aiod.{0}.identifier -ORDER BY aiod.{0}.identifier -""" - -SQL_RM_BASE = """SELECT {0}.identifier -FROM aiod.{0} -WHERE aiod.{0}.date_deleted IS NOT NULL AND aiod.{0}.date_deleted > :sql_last_value -""" - -AI_ASSET_BASE = """, - -- AIAsset - {0}.ai_asset_id AS `asset_identifier`, - {0}.date_published, - {0}.version, - license.name AS `license`""" - -ATTRIBUTES_BASE = """, - -- Attributes - """ - -TYPE_BASE = """, - -- Type - {0}_type.name AS `{0}_type`""" - -MODE_BASE = """, - -- Mode - {0}_mode.name AS `mode`""" - -STATUS_BASE = """, - -- Status - {0}_status.name AS `{0}_status`""" - -AGENT_BASE = """, - -- Agent - agent.type AS `{0}`""" - -ORGANISATION_BASE = """, - -- Organisation - organisation.name AS `{0}`""" - -LEFT_LICENSE = """ -LEFT JOIN aiod.license ON aiod.{0}.license_identifier=aiod.license.identifier""" - -LEFT_TYPE = """ -LEFT JOIN aiod.{0}_type ON aiod.{0}.type_identifier=aiod.{0}_type.identifier""" - -LEFT_MODE = """ -LEFT JOIN aiod.{0}_mode ON aiod.{0}.mode_identifier=aiod.{0}_mode.identifier""" - -LEFT_STATUS = """ -LEFT JOIN aiod.{0}_status ON aiod.{0}.status_identifier=aiod.{0}_status.identifier""" - -LEFT_AGENT = """ -LEFT JOIN aiod.agent ON aiod.{0}.{1}=aiod.agent.identifier""" - -LEFT_ORGANISATION = """ -LEFT JOIN aiod.organisation ON aiod.{0}.{1}=aiod.organisation.identifier""" - -INIT_CLAUSE = """ -WHERE aiod.{0}.date_deleted IS NULL""" - -SYNC_CLAUSE = """ -WHERE aiod.{0}.date_deleted IS NULL AND aiod.aiod_entry.date_modified > :sql_last_value""" - -# DOCUMENTS GENERATION FUNCTIONS -# ============================================================================= - -def generate_conf_file(conf_path, es_user, es_pass): - - file_path = os.path.join(conf_path, "logstash.yml") - - # Generate configuration file - with open(file_path, 'w') as f: - - # Info - f.write(INFO.format('#')) - - # Configuration - f.write(CONF_BASE.format(es_user, es_pass)) - -def generate_pipeline_conf_files(pipeline_conf_path, db_user, db_pass, - es_user, es_pass, entities, sync=False): - - if not sync: # init file - file_path = os.path.join(pipeline_conf_path, "init_table.conf") - input_base = INIT_INPUT_BASE - date_filter = DATE_FILTER.format("") - output_base = INIT_OUTPUT_BASE - else: # sync file - file_path = os.path.join(pipeline_conf_path, "sync_table.conf") - input_base = SYNC_INPUT_BASE - date_filter = DATE_FILTER.format(SYNC_DATE_FILTER_ADDON) - output_base = SYNC_OUTPUT_BASE - - # Generate configuration file - with open(file_path, 'w') as f: - - # Info - f.write(INFO.format('#')) - - # Input - f.write("input {\n") - for entity in entities: - f.write(input_base.format(db_user, db_pass, entity)) - f.write("}\n") - - # Filters - if "organisation" in entities: - f.write(FILTER_BASE.format(date_filter)) - else: - f.write(FILTER_BASE.format("")) - - # Output - f.write("output {\n") - for entity in entities: - f.write(output_base.format(es_user, es_pass, entity)) - f.write("}\n") - -def generate_pipeline_sql_files(pipeline_sql_path, entity, sync=False): - - # Generate output file path - if sync: - file_path = os.path.join(pipeline_sql_path, f"sync_{entity}.sql") - else: - file_path = os.path.join(pipeline_sql_path, f"init_{entity}.sql") - - # Write the output file - with open(file_path, 'w') as f: - - # Info - f.write(INFO.format('--')) - - # Left joins - left_joins = "" - - # For ai_asset entities - ai_asset_attributes = "" - if entity in ai_asset_entities: - ai_asset_attributes = AI_ASSET_BASE.format(entity) - left_joins += LEFT_LICENSE.format(entity) - - # Attributes - entity_attributes = "" - if entity in attributes.keys(): - entity_attributes = (ATTRIBUTES_BASE - + f"{entity}.{attributes[entity][0]}") - for attribute in attributes[entity][1:]: - entity_attributes += f",\n {entity}.{attribute}" - - # For entities with a type relation - type_attribute = "" - if entity in type_entities: - type_attribute = TYPE_BASE.format(entity) - left_joins += LEFT_TYPE.format(entity) - - # For entities with a mode relation - mode_attribute = "" - if entity in mode_entities: - mode_attribute = MODE_BASE.format(entity) - left_joins += LEFT_MODE.format(entity) - - # For entities with a status relation - status_attribute = "" - if entity in status_entities: - status_attribute = STATUS_BASE.format(entity) - left_joins += LEFT_STATUS.format(entity) - - # For entities with an agent relation - agent_attribute = "" - if entity in agent_entities.keys(): - agent_attribute = AGENT_BASE.format(agent_entities[entity][1]) - left_joins += LEFT_AGENT.format(entity, agent_entities[entity][0]) - - # For entities with an organisation relation - organisation_attribute = "" - if entity in organisation_entities.keys(): - organisation_attribute = ORGANISATION_BASE.format( - organisation_entities[entity][1]) - left_joins += LEFT_ORGANISATION.format(entity, - organisation_entities[entity][0]) - - # Where clause - if sync: - where_clause = SYNC_CLAUSE.format(entity) - else: - where_clause = INIT_CLAUSE.format(entity) - - f.write(SQL_BASE.format(entity, ai_asset_attributes, - entity_attributes, type_attribute, - mode_attribute, status_attribute, - agent_attribute, organisation_attribute, - left_joins, where_clause)) - -def generate_pipeline_sql_rm_files(pipeline_sql_path, entity): - - # Generate output file path - file_path = os.path.join(pipeline_sql_path, f"rm_{entity}.sql") - - # Write the output file - with open(file_path, 'w') as f: - - # Info - f.write(INFO.format('--')) - - # SQL query - f.write(SQL_RM_BASE.format(entity)) - -# MAIN FUNCTION -# ============================================================================= - -def main(base_path, db_user, db_pass, es_user, es_pass, entities, - ai_asset_entities, attributes, type_entities, mode_entities, - status_entities, agent_entities, organisation_entities): - - # Make configuration dir - conf_path = os.path.join(base_path, "config") - os.makedirs(conf_path, exist_ok=True) - - # Make pipeline configuration dirs - pipeline_conf_path = os.path.join(base_path, "pipeline", "conf") - os.makedirs(pipeline_conf_path, exist_ok=True) - pipeline_sql_path = os.path.join(base_path, "pipeline", "sql") - os.makedirs(pipeline_sql_path, exist_ok=True) - - # Generate logstash configuration file - generate_conf_file(conf_path, es_user, es_pass) - - # Generate pipeline configuration init file - generate_pipeline_conf_files(pipeline_conf_path, db_user, db_pass, - es_user, es_pass, entities, sync=False) - - # Generate pipeline configuration sync file - generate_pipeline_conf_files(pipeline_conf_path, db_user, db_pass, - es_user, es_pass, entities, sync=True) - - # Generate SQL init and sync files - for entity in entities: - generate_pipeline_sql_files(pipeline_sql_path, entity, sync=False) - generate_pipeline_sql_files(pipeline_sql_path, entity, sync=True) - - # Generate SQL rm files - for entity in entities: - generate_pipeline_sql_rm_files(pipeline_sql_path, entity) - -if __name__ == "__main__": - - # PATH MACROS - # ------------------------------------------------------------------------- - - # Repository base path -# repo_path = REPO_PATH - - # Configuration base path -# base_path = os.path.join(repo_path, "logstash") - base_path = "/logstash" - - # ------------------------------------------------------------------------- - - # Users and passwords - db_user = "root" - db_pass = os.environ['MYSQL_ROOT_PASSWORD'] - es_user = os.environ['ES_USER'] - es_pass = os.environ['ES_PASSWORD'] -# with open(os.path.join(repo_path, ".env"), "r") as f: -# for line in f: -# if "MYSQL_ROOT_PASSWORD" in line: -# db_pass = line.split("=")[1][:-1] -# if "ES_USER" in line: -# es_user = line.split("=")[1][:-1] -# if "ES_PASSWORD" in line: -# es_pass = line.split("=")[1][:-1] - - # Entities and attributes - entities = ["dataset", "event", "experiment", "ml_model", "news", - "organisation", "project", "publication", "service"] - ai_asset_entities = ["dataset", "experiment", "ml_model", "publication"] - attributes = { - "dataset": ["issn", "measurement_technique", "temporal_coverage"], - "event": ["start_date", "end_date", "schedule", "registration_link", - "organiser_identifier"], - "experiment": ["experimental_workflow", "execution_settings", - "reproducibility_explanation"], - "news": ["headline", "alternative_headline"], - "organisation": ["date_founded", "legal_name"], - "project": ["start_date", "end_date", "total_cost_euro", - "coordinator_identifier"], - "publication": ["permanent_identifier", "isbn", "issn", - "knowledge_asset_id AS `knowledge_asset_identifier`"], - "service": ["slogan", "terms_of_service"] - } - type_entities = ["ml_model", "organisation", "publication"] - mode_entities = ["event"] - status_entities = ["event"] - agent_entities = { - "event": ("organiser_identifier", "organiser_type"), - "organisation": ("agent_id", "agent") - } - organisation_entities = { - "project": ("coordinator_identifier", "coordinator_name") - } - - # Main function - main(base_path, db_user, db_pass, es_user, es_pass, entities, - ai_asset_entities, attributes, type_entities, mode_entities, - status_entities, agent_entities, organisation_entities)