Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Crawler Integration #358

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions .github/actions/build-connector-image/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: "Build EDC Connector Image"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please squash your commits that went on top so we can merge it with keeping the history

description: "Builds and deploys the React frontend to AWS S3"
inputs:
registry-url:
required: true
description: "Docker Registry"
registry-user:
required: true
description: "Docker Registry Login Username"
registry-password:
required: true
description: "Docker Registry Login Password"
image-base-name:
required: true
description: "Docker Image Base Name (Company)"
image-name:
required: true
description: "Docker Image Name (Artifact Name)"
connector-name:
required: true
description: "EDC Connector Name in launchers/connectors/{connector-name}"
title:
required: true
description: "Docker Image Title"
description:
required: true
description: "Docker Image Description"
runs:
using: "composite"
steps:
- name: "Docker: Log in to the Container registry"
uses: docker/login-action@v2
with:
registry: ${{ inputs.registry-url }}
username: ${{ inputs.registry-user }}
password: ${{ inputs.registry-password }}
- name: "Docker: Store last commit info and build date"
id: last-commit-information
shell: bash
run: |
echo "LAST_COMMIT_INFO<<EOF" >> $GITHUB_ENV
export LAST_COMMIT_INFO=$(git log -1)
echo "$LAST_COMMIT_INFO" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
echo "BUILD_DATE=$(date --utc +%FT%TZ)" >> $GITHUB_ENV
- name: "Docker: Extract metadata (tags, labels)"
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ inputs.registry-url }}/${{ inputs.image-base-name }}/${{ inputs.image-name }}
labels: |
org.opencontainers.image.title=${{ inputs.title }}
org.opencontainers.image.description=${{ inputs.description }}
tags: |
type=schedule
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=ref,event=branch
type=ref,event=pr
type=sha
type=raw,value=latest,enable={{is_default_branch}}
type=raw,value=release,enable=${{ startsWith(github.ref, 'refs/tags/') }}
- name: "Docker: Build and Push"
uses: docker/build-push-action@v5
with:
file: authority-portal-backend/catalog-crawler/Dockerfile
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
CONNECTOR_NAME=${{ inputs.connector-name }}
"EDC_LAST_COMMIT_INFO_ARG=${{ env.LAST_COMMIT_INFO }}"
EDC_BUILD_DATE_ARG=${{ env.BUILD_DATE }}
14 changes: 12 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
type=sha
type=raw,value=latest,enable={{is_default_branch}}
type=raw,value=release,enable=${{ startsWith(github.ref, 'refs/tags/') }}
- name: "Docker: Build and Push Image"
- name: "Docker: Build and Push Image (authority-portal-backend)"
uses: docker/build-push-action@v4
with:
file: authority-portal-backend/authority-portal-quarkus/src/main/docker/Dockerfile.jvm
Expand All @@ -101,7 +101,17 @@ jobs:
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
network: host

- name: "Docker: Build and Push Image (authority-portal-crawler)"
uses: ./.github/actions/build-connector-image
with:
registry-url: ${{ env.REGISTRY }}
registry-user: ${{ github.actor }}
registry-password: ${{ secrets.GITHUB_TOKEN }}
image-base-name: ${{ env.IMAGE_NAME_BASE }}
image-name: "authority-portal-crawler"
connector-name: "catalog-crawler-ce"
title: "Catalog Crawler (Community Edition, DAPS)"
description: "sovity CE Catalog crawler for the sovity CE Authority Portal. Requires DAPS dataspace credentials to join an existing dataspace."

frontend:
name: Frontend
Expand Down
11 changes: 9 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ please see [changelog_updates.md](docs/dev/changelog_updates.md).

#### Major

- The Catalog Crawler has been moved to the AP repository.
- To prevent versioning conflicts with the image from EDC CE up to version 10.4.1, the image is now named differently. See [compatible versions](#compatible-versions) below.

#### Minor

#### Patch
Expand All @@ -24,11 +27,15 @@ please see [changelog_updates.md](docs/dev/changelog_updates.md).

### Deployment Migration Notes

- Please change the image used for the Catalog Crawler. The old image is no longer updated and will not work with future versions of the Portal.
- Previously: `ghcr.io/sovity/catalog-crawler-ce`
- Now: `ghcr.io/sovity/authority-portal-crawler`

#### Compatible Versions

- Authority Portal Backend Docker Image: `ghcr.io/sovity/authority-portal-backend:{{ version }}`
- Authority Portal Frontend Docker Image: `ghcr.io/sovity/authority-portal-frontend:{{ version }}`
- Catalog Crawler CE: `ghcr.io/sovity/catalog-crawler-ce:{{ CE VERSION }}`
- Catalog Crawler CE: `ghcr.io/sovity/authority-portal-crawler:{{ version }}`
- Sovity EDC CE: {{ CE Release Link }}

## [v4.1.2] - 2024-09-26
Expand Down Expand Up @@ -255,7 +262,7 @@ MDS 2.2 intermediate release

- All brokers can be undeployed including their databases.
- Keycloak
- Keycloak IAM must be updated to version `24.0.4`. Follow the [Keycloak upgrade guide](https://www.keycloak.org/docs/24.0.0/upgrading/) for more information.
- Keycloak IAM must be updated to version `24.0.4`. Follow the [Keycloak upgrade guide](https://www.keycloak.org/docs) for more information.
- Portal Backend

- Following environment variables have been added and **must be configured** for each environment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ import de.sovity.authorityportal.api.model.CentralComponentCreateRequest
import de.sovity.authorityportal.api.model.CentralComponentDto
import de.sovity.authorityportal.api.model.ComponentStatusOverview
import de.sovity.authorityportal.api.model.ConfigureProvidedConnectorWithCertificateRequest
import de.sovity.authorityportal.api.model.ConfigureProvidedConnectorWithJwksRequest
import de.sovity.authorityportal.api.model.ConnectorDetailsDto
import de.sovity.authorityportal.api.model.ConnectorOverviewResult
import de.sovity.authorityportal.api.model.CreateCaasRequest
import de.sovity.authorityportal.api.model.CreateConnectorRequest
import de.sovity.authorityportal.api.model.ConfigureProvidedConnectorWithJwksRequest
import de.sovity.authorityportal.api.model.CreateConnectorResponse
import de.sovity.authorityportal.api.model.DeploymentEnvironmentDto
import de.sovity.authorityportal.api.model.IdResponse
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package de.sovity.authorityportal.web.thirdparty.daps
import de.sovity.authorityportal.web.environment.DeploymentEnvironmentConfiguration.DeploymentEnvironment.DapsConfig
import de.sovity.authorityportal.web.thirdparty.daps.ext.CustomKeycloakResource
import de.sovity.authorityportal.web.thirdparty.daps.ext.instantiateResource
import io.quarkus.logging.Log
import org.keycloak.admin.client.KeycloakBuilder
import org.keycloak.representations.idm.ClientRepresentation
import org.keycloak.representations.idm.ProtocolMapperRepresentation
Expand Down Expand Up @@ -43,7 +44,9 @@ class DapsClient(dapsConfig: DapsConfig): AutoCloseable {
}

fun createClient(clientId: String) {
Log.info("Creating client $clientId in realm $realmName")
keycloak.realm(realmName).clients().create(buildClientRepresentation(clientId))
Log.info("Client $clientId created in realm $realmName")
}

fun deleteClient(clientId: String) {
Expand All @@ -66,6 +69,7 @@ class DapsClient(dapsConfig: DapsConfig): AutoCloseable {
}

fun addJwksUrl(clientId: String, jwksUrl: String) {
Log.info("Getting client $clientId in realm $realmName")
val client = getClientById(clientId) ?: error("Client not found")

client.attributes["jwks.url"] = jwksUrl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ quarkus.arc.exclude-types=io.swagger.v3.jaxrs2.**
%test.quarkus.log.console.json=false
%test.quarkus.otel.traces.exporter=none

quarkus.otel.sdk.disabled=true

# Rest Client
quarkus.rest-client.http2=true

Expand Down
4 changes: 4 additions & 0 deletions authority-portal-backend/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ subprojects {
password = project.findProperty("gpr.key") as String? ?: System.getenv("GPR_KEY")
}
}
maven {
url = uri("https://pkgs.dev.azure.com/sovity/41799556-91c8-4df6-8ddb-4471d6f15953/_packaging/core-edc/maven/v1")
name = "AzureRepo"
}
}

configurations.all {
Expand Down
34 changes: 34 additions & 0 deletions authority-portal-backend/catalog-crawler/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM eclipse-temurin:17-jre-alpine

# Install curl for healthcheck, bash for entrypoint
RUN apk add --no-cache curl bash
SHELL ["/bin/bash", "-c"]

# Use a non-root user
RUN adduser -D -H -s /sbin/nologin edc
USER edc:edc

# Which app.jar to include
ARG CONNECTOR_NAME="catalog-crawler-ce"

# For last-commit-info extension
ARG EDC_LAST_COMMIT_INFO_ARG="The docker container was built outside of github actions and you didn't provide the build arg EDC_LAST_COMMIT_INFO_ARG, so there's no last commit info."
ARG EDC_BUILD_DATE_ARG="The docker container was built outside of github actions and you didn't provide the build arg EDC_BUILD_DATE_ARG, so there's no build date."

WORKDIR /app
COPY ./authority-portal-backend/catalog-crawler/${CONNECTOR_NAME}-launcher/build/libs/app.jar /app
COPY ./authority-portal-backend/catalog-crawler/logging.properties /app
COPY ./authority-portal-backend/catalog-crawler/logging.dev.properties /app

RUN touch /app/empty-properties-file.properties

ENV EDC_LAST_COMMIT_INFO=$EDC_LAST_COMMIT_INFO_ARG
ENV EDC_BUILD_DATE=$EDC_BUILD_DATE_ARG
ENV JVM_ARGS=""

COPY ./authority-portal-backend/catalog-crawler/docker-entrypoint.sh /app/entrypoint.sh
ENTRYPOINT ["/app/entrypoint.sh"]
CMD ["start"]

# health status is determined by the availability of the /health endpoint
HEALTHCHECK --interval=5s --timeout=5s --retries=10 CMD curl -H "x-api-key: $EDC_API_AUTH_KEY" --fail http://localhost:11001/api/check/health || curl -H "x-api-key: $EDC_API_AUTH_KEY" --fail http://localhost:11001/backend/api/check/health
41 changes: 41 additions & 0 deletions authority-portal-backend/catalog-crawler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<!-- PROJECT LOGO -->
<br />
<div align="center">
<a href="https://github.com/sovity/authority-portal">
<img src="https://raw.githubusercontent.com/sovity/edc-ui/main/src/assets/images/sovity_logo.svg" alt="Logo" width="300">
</a>

<h3 align="center">EDC-Connector Extension:<br />Catalog Crawler</h3>

<p align="center">
<a href="https://github.com/sovity/authority-portal/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yaml">Report Bug</a>
·
<a href="https://github.com/sovity/edc-ce/issues/new?template=feature_request.md">Request Feature</a>
</p>
</div>

## About this Extension

The Catalog Crawler is an additional deployment unit needed to determine the online status of registered connectors and populate the Data Catalog:

- It is a modified EDC connector with the task to crawl the other connectors' public data offers.
- It periodically checks the Authority Portal's connector list for its environment.
- It crawls the given connectors in regular intervals.
- It writes the data offers and connector statuses into the Authority Portal DB.
- Each environment configured in the Authority Portal requires its own Catalog Crawler with credentials for that environment's DAPS.

## Why does this component exist?

The Authority Portal uses a non-EDC stack and thus it cannot read the catalogs of participating connectors directly.

## Deployment

Please see the [Productive Deployment Guide](../../docs/deployment-guide/goals/production/README.md) for more information.

## License

Apache License 2.0 - see [LICENSE](../../LICENSE)

## Contact

sovity GmbH - [email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
plugins {
`java-library`
id("application")
alias(libs.plugins.shadow)
}

dependencies {
implementation(project(":catalog-crawler:catalog-crawler-launcher-base"))

api(libs.edc.monitorJdkLogger)
api(libs.edc.apiObservability)

implementation(libs.edc.oauth2Core)
implementation(libs.edc.vaultFilesystem)
}

application {
mainClass.set("de.sovity.edc.ext.catalog.crawler.Main")
}

tasks.withType<com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar> {
mergeServiceFiles()
archiveFileName.set("app.jar")
}

group = libs.versions.sovityEdcGroup.get()
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
plugins {
`java-library`
}

dependencies {
// A minimal EDC that can request catalogs
api(libs.edc.controlPlaneCore)
api(libs.edc.dataPlaneSelectorCore)
api(libs.edc.configurationFilesystem)
api(libs.edc.controlPlaneAggregateServices)
api(libs.edc.http)
api(libs.edc.dsp)
api(libs.edc.jsonLd)

// Data Catalog Crawler
api(project(":catalog-crawler:catalog-crawler"))
}

group = libs.versions.sovityEdcGroup.get()
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
plugins {
`java-library`
}

dependencies {
annotationProcessor(libs.lombok)
compileOnly(libs.lombok)

implementation(libs.edc.controlPlaneSpi)
implementation(libs.edc.managementApiConfiguration)

implementation(libs.quartz.quartz)
implementation(libs.commons.lang3)
implementation(libs.quarkus.jooq)

api(libs.sovity.edc.catalogParser)
api(libs.sovity.edc.jsonAndJsonLdUtils)
api(libs.sovity.edc.wrapperCommonMappers)
api(libs.sovity.edc.ext.postgresFlywayCore)
api(libs.sovity.edc.config)
api(project(":authority-portal-db"))

testAnnotationProcessor(libs.lombok)
testCompileOnly(libs.lombok)
testImplementation(libs.sovity.edc.ext.testUtils)
testImplementation(libs.assertj.core)
testImplementation(libs.mockito.core)
testImplementation(libs.restAssured.restAssured)
testImplementation(libs.testcontainers.testcontainers)
testImplementation(libs.flyway.core)
testImplementation(libs.testcontainers.junitJupiter)
testImplementation(libs.testcontainers.postgresql)
testImplementation(libs.junit.api)
testImplementation(libs.jsonAssert)
testRuntimeOnly(libs.junit.engine)
}

tasks.getByName<Test>("test") {
useJUnitPlatform()
maxParallelForks = 1
}
Loading
Loading