-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add 'catalog-crawler/' from commit '73d00c04c1f697740a940f8c66846f2c2…
- Loading branch information
Showing
91 changed files
with
5,537 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
<!-- PROJECT LOGO --> | ||
<br /> | ||
<div align="center"> | ||
<a href="https://github.com/sovity/edc-ce"> | ||
<img src="https://raw.githubusercontent.com/sovity/edc-ui/main/src/assets/images/sovity_logo.svg" alt="Logo" width="300"> | ||
</a> | ||
|
||
<h3 align="center">EDC-Connector Extension:<br />Catalog Crawler</h3> | ||
|
||
<p align="center"> | ||
<a href="https://github.com/sovity/edc-ce/issues/new?template=bug_report.md">Report Bug</a> | ||
· | ||
<a href="https://github.com/sovity/edc-ce/issues/new?template=feature_request.md">Request Feature</a> | ||
</p> | ||
</div> | ||
|
||
## About this Extension | ||
|
||
The catalog crawler is a deployment unit depending on an existing Authority Portal's database: | ||
|
||
- It is a modified EDC connector with the task to crawl the other connector's public data offers. | ||
- It periodically checks the Authority Portal's connector list for its environment. | ||
- It crawls the given connectors in regular intervals. | ||
- It writes the data offers and connector statuses back into the Authority Portal DB. | ||
- Each Environment configured in the Authority Portal requires its own Catalog Crawler with credentials for that environment's DAPS. | ||
|
||
## Why does this component exist? | ||
|
||
The Authority Portal uses a non-EDC stack, and the EDC stack cannot handle multiple sources of authority at once. | ||
|
||
With the `DB -> UI` part of the broker having been moved to the Authority Portal, only the `Catalog -> DB` part remains as the Catalog Crawler, | ||
as it requires Connector-to-Connector IAM within the given Dataspace. | ||
|
||
## Deployment | ||
|
||
Please see the [Catalog Crawler Productive Deployment Guide](../../docs/deployment-guide/goals/catalog-crawler-production/README.md) for more information. | ||
|
||
## License | ||
|
||
Apache License 2.0 - see [LICENSE](../../LICENSE) | ||
|
||
## Contact | ||
|
||
sovity GmbH - [email protected] |
19 changes: 19 additions & 0 deletions
19
catalog-crawler/extensions/catalog-crawler/catalog-crawler-launcher-base/build.gradle.kts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
plugins { | ||
`java-library` | ||
} | ||
|
||
dependencies { | ||
// A minimal EDC that can request catalogs | ||
api(libs.edc.controlPlaneCore) | ||
api(libs.edc.dataPlaneSelectorCore) | ||
api(libs.edc.configurationFilesystem) | ||
api(libs.edc.controlPlaneAggregateServices) | ||
api(libs.edc.http) | ||
api(libs.edc.dsp) | ||
api(libs.edc.jsonLd) | ||
|
||
// Data Catalog Crawler | ||
api(project(":extensions:catalog-crawler:catalog-crawler")) | ||
} | ||
|
||
group = libs.versions.sovityEdcGroup.get() |
48 changes: 48 additions & 0 deletions
48
catalog-crawler/extensions/catalog-crawler/catalog-crawler/build.gradle.kts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
plugins { | ||
`java-library` | ||
} | ||
|
||
dependencies { | ||
annotationProcessor(libs.lombok) | ||
compileOnly(libs.lombok) | ||
|
||
implementation(libs.edc.controlPlaneSpi) | ||
implementation(libs.edc.managementApiConfiguration) | ||
|
||
implementation(libs.quartz.quartz) | ||
implementation(libs.apache.commonsLang) | ||
implementation(project(":utils:versions")) | ||
|
||
api(project(":utils:catalog-parser")) | ||
api(project(":utils:json-and-jsonld-utils")) | ||
api(project(":extensions:wrapper:wrapper-common-mappers")) | ||
api(project(":extensions:catalog-crawler:catalog-crawler-db")) | ||
api(project(":extensions:postgres-flyway-core")) | ||
|
||
testAnnotationProcessor(libs.lombok) | ||
testCompileOnly(libs.lombok) | ||
testImplementation(project(":utils:test-utils")) | ||
testImplementation(libs.assertj.core) | ||
testImplementation(libs.mockito.core) | ||
testImplementation(libs.restAssured.restAssured) | ||
testImplementation(libs.testcontainers.testcontainers) | ||
testImplementation(libs.flyway.core) | ||
testImplementation(libs.testcontainers.junitJupiter) | ||
testImplementation(libs.testcontainers.postgresql) | ||
testImplementation(libs.junit.api) | ||
testImplementation(libs.jsonAssert) | ||
testRuntimeOnly(libs.junit.engine) | ||
} | ||
|
||
tasks.getByName<Test>("test") { | ||
useJUnitPlatform() | ||
maxParallelForks = 1 | ||
} | ||
|
||
publishing { | ||
publications { | ||
create<MavenPublication>(project.name) { | ||
from(components["java"]) | ||
} | ||
} | ||
} |
149 changes: 149 additions & 0 deletions
149
...ler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtension.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
/* | ||
* Copyright (c) 2023 sovity GmbH | ||
* | ||
* This program and the accompanying materials are made available under the | ||
* terms of the Apache License, Version 2.0 which is available at | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* Contributors: | ||
* sovity GmbH - initial API and implementation | ||
* | ||
*/ | ||
|
||
package de.sovity.edc.ext.catalog.crawler; | ||
|
||
import de.sovity.edc.ext.wrapper.api.common.mappers.PlaceholderEndpointService; | ||
import org.eclipse.edc.connector.api.management.configuration.transform.ManagementApiTypeTransformerRegistry; | ||
import org.eclipse.edc.connector.spi.catalog.CatalogService; | ||
import org.eclipse.edc.jsonld.spi.JsonLd; | ||
import org.eclipse.edc.runtime.metamodel.annotation.Inject; | ||
import org.eclipse.edc.runtime.metamodel.annotation.Provides; | ||
import org.eclipse.edc.runtime.metamodel.annotation.Setting; | ||
import org.eclipse.edc.spi.system.ServiceExtension; | ||
import org.eclipse.edc.spi.system.ServiceExtensionContext; | ||
import org.eclipse.edc.spi.types.TypeManager; | ||
|
||
import static de.sovity.edc.ext.catalog.crawler.orchestration.config.EdcConfigPropertyUtils.toEdcProp; | ||
|
||
@Provides({CrawlerExtensionContext.class}) | ||
public class CrawlerExtension implements ServiceExtension { | ||
|
||
public static final String EXTENSION_NAME = "Authority Portal Data Catalog Crawler"; | ||
|
||
@Setting(required = true) | ||
public static final String EXTENSION_ENABLED = toEdcProp("CRAWLER_EXTENSION_ENABLED"); | ||
|
||
@Setting(required = true) | ||
public static final String ENVIRONMENT_ID = toEdcProp("CRAWLER_ENVIRONMENT_ID"); | ||
|
||
@Setting(required = true) | ||
public static final String JDBC_URL = toEdcProp("CRAWLER_DB_JDBC_URL"); | ||
|
||
@Setting(required = true) | ||
public static final String JDBC_USER = toEdcProp("CRAWLER_DB_JDBC_USER"); | ||
|
||
@Setting(required = true) | ||
public static final String JDBC_PASSWORD = toEdcProp("CRAWLER_DB_JDBC_PASSWORD"); | ||
|
||
@Setting | ||
public static final String DB_CONNECTION_POOL_SIZE = toEdcProp("CRAWLER_DB_CONNECTION_POOL_SIZE"); | ||
|
||
@Setting | ||
public static final String DB_CONNECTION_TIMEOUT_IN_MS = toEdcProp("CRAWLER_DB_CONNECTION_TIMEOUT_IN_MS"); | ||
|
||
@Setting | ||
public static final String DB_MIGRATE = toEdcProp("CRAWLER_DB_MIGRATE"); | ||
|
||
@Setting | ||
public static final String DB_CLEAN = toEdcProp("CRAWLER_DB_CLEAN"); | ||
|
||
@Setting | ||
public static final String DB_CLEAN_ENABLED = toEdcProp("CRAWLER_DB_CLEAN_ENABLED"); | ||
|
||
@Setting | ||
public static final String DB_ADDITIONAL_FLYWAY_MIGRATION_LOCATIONS = toEdcProp("CRAWLER_DB_ADDITIONAL_FLYWAY_LOCATIONS"); | ||
|
||
@Setting | ||
public static final String NUM_THREADS = toEdcProp("CRAWLER_NUM_THREADS"); | ||
|
||
@Setting | ||
public static final String MAX_DATA_OFFERS_PER_CONNECTOR = toEdcProp("CRAWLER_MAX_DATA_OFFERS_PER_CONNECTOR"); | ||
|
||
@Setting | ||
public static final String MAX_CONTRACT_OFFERS_PER_DATA_OFFER = toEdcProp("CRAWLER_MAX_CONTRACT_OFFERS_PER_DATA_OFFER"); | ||
|
||
@Setting | ||
public static final String CRON_ONLINE_CONNECTOR_REFRESH = toEdcProp("CRAWLER_CRON_ONLINE_CONNECTOR_REFRESH"); | ||
|
||
@Setting | ||
public static final String CRON_OFFLINE_CONNECTOR_REFRESH = toEdcProp("CRAWLER_CRON_OFFLINE_CONNECTOR_REFRESH"); | ||
|
||
@Setting | ||
public static final String CRON_DEAD_CONNECTOR_REFRESH = toEdcProp("CRAWLER_CRON_DEAD_CONNECTOR_REFRESH"); | ||
|
||
@Setting | ||
public static final String SCHEDULED_KILL_OFFLINE_CONNECTORS = toEdcProp("CRAWLER_SCHEDULED_KILL_OFFLINE_CONNECTORS"); | ||
@Setting | ||
public static final String KILL_OFFLINE_CONNECTORS_AFTER = toEdcProp("CRAWLER_KILL_OFFLINE_CONNECTORS_AFTER"); | ||
|
||
@Inject | ||
private TypeManager typeManager; | ||
|
||
@Inject | ||
private ManagementApiTypeTransformerRegistry typeTransformerRegistry; | ||
|
||
@Inject | ||
private JsonLd jsonLd; | ||
|
||
@Inject | ||
private CatalogService catalogService; | ||
|
||
/** | ||
* Manual Dependency Injection Result | ||
*/ | ||
private CrawlerExtensionContext services; | ||
|
||
@Override | ||
public String name() { | ||
return EXTENSION_NAME; | ||
} | ||
|
||
@Override | ||
public void initialize(ServiceExtensionContext context) { | ||
if (!Boolean.TRUE.equals(context.getConfig().getBoolean(EXTENSION_ENABLED, false))) { | ||
context.getMonitor().info("Crawler extension is disabled."); | ||
return; | ||
} | ||
|
||
services = CrawlerExtensionContextBuilder.buildContext( | ||
context.getConfig(), | ||
context.getMonitor(), | ||
typeManager, | ||
typeTransformerRegistry, | ||
jsonLd, | ||
catalogService, | ||
new PlaceholderEndpointService("http://0.0.0.0/") | ||
); | ||
|
||
// Provide access for the tests | ||
context.registerService(CrawlerExtensionContext.class, services); | ||
} | ||
|
||
@Override | ||
public void start() { | ||
if (services == null) { | ||
return; | ||
} | ||
services.crawlerInitializer().onStartup(); | ||
} | ||
|
||
@Override | ||
public void shutdown() { | ||
if (services == null) { | ||
return; | ||
} | ||
services.dataSource().close(); | ||
} | ||
} |
42 changes: 42 additions & 0 deletions
42
...alog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContext.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Copyright (c) 2023 sovity GmbH | ||
* | ||
* This program and the accompanying materials are made available under the | ||
* terms of the Apache License, Version 2.0 which is available at | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* Contributors: | ||
* sovity GmbH - initial API and implementation | ||
* | ||
*/ | ||
|
||
package de.sovity.edc.ext.catalog.crawler; | ||
|
||
import com.zaxxer.hikari.HikariDataSource; | ||
import de.sovity.edc.ext.catalog.crawler.crawling.ConnectorCrawler; | ||
import de.sovity.edc.ext.catalog.crawler.crawling.fetching.FetchedCatalogBuilder; | ||
import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory; | ||
import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferRecordUpdater; | ||
import de.sovity.edc.ext.wrapper.api.common.mappers.PolicyMapper; | ||
|
||
|
||
/** | ||
* Manual Dependency Injection result | ||
* | ||
* @param crawlerInitializer Startup Logic | ||
*/ | ||
public record CrawlerExtensionContext( | ||
CrawlerInitializer crawlerInitializer, | ||
// Required for stopping connections on closing | ||
HikariDataSource dataSource, | ||
DslContextFactory dslContextFactory, | ||
|
||
// Required for Integration Tests | ||
ConnectorCrawler connectorCrawler, | ||
PolicyMapper policyMapper, | ||
FetchedCatalogBuilder catalogPatchBuilder, | ||
DataOfferRecordUpdater dataOfferRecordUpdater | ||
) { | ||
} |
Oops, something went wrong.