Skip to content

Commit

Permalink
doc: added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
idris52 committed Aug 25, 2023
1 parent 70cf165 commit c9684a4
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@

public class DatasetEntityIngestor extends DataSpaceCatalogIngestorBase {
Logger log = LoggerFactory.getLogger(this.getClass().getName());

Check warning

Code scanning / CodeQL

Field masks field in super class Warning

This field shadows another field called
log
in a superclass.
RestEmitter emitter = RestEmitter.createWithDefaults();


final String entityType = "dataset";
/***
* editableDatasetProperties aspect of dataset entity
* editableDatasetProperties aspect of dataset entity - see details on datahub documentation for dataset entity aspects
* */
private DatasetProperties _datasetProperties(Asset asset) {
var createdAt=new com.linkedin.common.TimeStamp();
Expand All @@ -63,12 +63,20 @@ public SchemaMetadata _schemaMetadata(Asset asset) { //todo: This should not be
return new SchemaMetadata().setFields(fields);
}

/***
* Returns datahub style urn for an asset - includes `test` as platform, and includes EDC asset name and id within the urn.
* The FabricType is the environment type such as Dev, Prod, etc.
* */
public Urn _urn(Asset asset) throws URISyntaxException {
return new DatasetUrn(_platformUrn(entityType), asset.getName()+asset.getId(), FabricType.DEV);
}

/***
* This method emits whole dataset, with all aspects (defined within) to dataspace catalog. To only ingest/emit a single aspect, see specs.
* In this method, we first create a dataset with a single aspect - datasetProperties Aspect. Then, we create other aspects such as
* schemaMetadata and editableProperties aspects, and ingest them in parallel.
* Usually it can be done sequentially, but this is to show that, if an entity already exists, then aspects can be pushed in parallel as well.
* Since the calls are asynchronous, the datahub api at the receiving end will respond asynchronously.
* */
public Urn emitMetadataChangeProposal(Asset asset)

Check notice

Code scanning / CodeQL

Missing Override annotation Note

This method overrides
DataSpaceCatalogIngestorBase.emitMetadataChangeProposal
; it is advisable to add an Override annotation.
throws URISyntaxException, IOException, ExecutionException, InterruptedException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import com.linkedin.common.urn.DataPlatformUrn;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.template.RecordTemplate;
import datahub.client.rest.RestEmitter;
import datahub.event.MetadataChangeProposalWrapper;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.concurrent.ExecutionException;
import org.eclipse.edc.spi.types.domain.asset.Asset;
import org.slf4j.Logger;
Expand All @@ -14,6 +16,16 @@

abstract public class DataSpaceCatalogIngestorBase {
Logger log = LoggerFactory.getLogger(this.getClass().getName());
/***
* To create an emitter that is pushing to datahub instance that is remote (though an IP or url), see RestEmitter class. It has examples for creating emitter with external urls. An example is shown below commented out
*
* */
protected RestEmitter emitter = RestEmitter.createWithDefaults();
//protected RestEmitter emitter = RestEmitter.create(b -> b.server("http://localhost:8080")); // todo: replace the `localhost:8080` with ip address or address of the dathub gms.
/***
* Method to build change proposal for any entity. aspect represents an aspect, e.g. dataSetProperties or Ownership Aspect of dataset, entityType is e.g. dathahub entity `dataset` etc.
* And the urn is `datahub` style urn.
* */
public MetadataChangeProposalWrapper _metadataChangeProposalWrapper(RecordTemplate aspect, String entityType, Urn urn) {
return MetadataChangeProposalWrapper.builder()
.entityType(entityType)
Expand All @@ -22,9 +34,17 @@ public MetadataChangeProposalWrapper _metadataChangeProposalWrapper(RecordTempla
.aspect(aspect)
.build();
}
/**
* At the moment, edc connectors are experimental. The data platforms (bigquery, snowflake, hudi etc.) are unknown and not provided, so we use `test`. But we can
* Use `conf` files to configure this.
* */
public DataPlatformUrn _platformUrn(String entityType) throws URISyntaxException {
return DataPlatformUrn.createFromUrn(DataPlatformUrn.createFromTuple(entityType, "test"));
}

/***
* A method used by subclasses to implement entity specific changeproposals and emit to datahub (data space catalog)
* */
public abstract Urn emitMetadataChangeProposal(Asset asset) throws URISyntaxException, IOException, ExecutionException,
InterruptedException;

Expand Down

0 comments on commit c9684a4

Please sign in to comment.