Skip to content

Commit

Permalink
Add support for multi column validity dates (#3129)
Browse files Browse the repository at this point in the history
Add support for multi column validity dates in SQL and Legacy queryengine
* Replace findValidityDateColumn() with findValidityDate()

Co-authored-by: Torben Meyer <[email protected]>
Co-authored-by: awildturtok <[email protected]>
  • Loading branch information
3 people authored Aug 15, 2023
1 parent c822739 commit 5b35560
Show file tree
Hide file tree
Showing 42 changed files with 813 additions and 288 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import com.bakdata.conquery.models.datasets.concepts.Concept;
import com.bakdata.conquery.models.datasets.concepts.ConceptElement;
import com.bakdata.conquery.models.datasets.concepts.Connector;
import com.bakdata.conquery.models.datasets.concepts.ValidityDate;
import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeNode;
import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept;
import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId;
Expand Down Expand Up @@ -148,6 +149,13 @@ public void resolve(QueryResolveContext context) {

final Map<SecondaryIdDescription, Integer> secondaryIdPositions = calculateSecondaryIdPositions(currentPosition);

final Set<ValidityDate> validityDates = tables.stream()
.map(CQConcept::getTables)
.flatMap(Collection::stream)
.map(CQTable::findValidityDate)
.filter(Objects::nonNull)
.collect(Collectors.toSet());

// We need to know if a column is a concept column so we can prioritize it if it is also a SecondaryId
final Set<Column> conceptColumns = tables.stream()
.map(CQConcept::getTables)
Expand All @@ -157,7 +165,7 @@ public void resolve(QueryResolveContext context) {
.filter(Objects::nonNull)
.collect(Collectors.toSet());

positions = calculateColumnPositions(currentPosition, tables, secondaryIdPositions, conceptColumns);
positions = calculateColumnPositions(currentPosition, tables, secondaryIdPositions, conceptColumns, validityDates);

resultInfos = createResultInfos(secondaryIdPositions, conceptColumns);
}
Expand All @@ -179,22 +187,21 @@ private Map<SecondaryIdDescription, Integer> calculateSecondaryIdPositions(Atomi
return secondaryIdPositions;
}

private static Map<Column, Integer> calculateColumnPositions(AtomicInteger currentPosition, List<CQConcept> tables, Map<SecondaryIdDescription, Integer> secondaryIdPositions, Set<Column> conceptColumns) {
private static Map<Column, Integer> calculateColumnPositions(AtomicInteger currentPosition, List<CQConcept> tables, Map<SecondaryIdDescription, Integer> secondaryIdPositions, Set<Column> conceptColumns, Set<ValidityDate> validityDates) {
final Map<Column, Integer> positions = new HashMap<>();


for (CQConcept concept : tables) {
for (CQTable table : concept.getTables()) {

final Column validityDateColumn = table.findValidityDateColumn();

if (validityDateColumn != null) {
positions.putIfAbsent(validityDateColumn, 0);
}

// Set column positions, set SecondaryId positions to precomputed ones.
for (Column column : table.getConnector().getTable().getColumns()) {

// ValidityDates are handled separately in column=0
if (validityDates.stream().anyMatch(vd -> vd.containsColumn(column))) {
continue;
}

if (positions.containsKey(column)) {
continue;
}
Expand Down Expand Up @@ -341,4 +348,4 @@ public void visit(Consumer<Visitable> visitor) {
public RequiredEntities collectRequiredEntities(QueryExecutionContext context) {
return query.collectRequiredEntities(context);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
import com.bakdata.conquery.apiv1.query.concept.specific.CQConcept;
import com.bakdata.conquery.io.jackson.serializer.NsIdRef;
import com.bakdata.conquery.io.jackson.serializer.NsIdRefCollection;
import com.bakdata.conquery.models.datasets.Column;
import com.bakdata.conquery.models.datasets.concepts.Connector;
import com.bakdata.conquery.models.datasets.concepts.ValidityDate;
import com.bakdata.conquery.models.datasets.concepts.select.Select;
import com.bakdata.conquery.models.query.QueryResolveContext;
import com.fasterxml.jackson.annotation.JsonBackReference;
Expand Down Expand Up @@ -75,18 +75,17 @@ public void resolve(QueryResolveContext context) {
}

@CheckForNull
public Column findValidityDateColumn() {
public ValidityDate findValidityDate() {

// if no dateColumn is provided, we use the default instead which is always the first one.
// Set to null if none-available in the connector.
if (dateColumn != null) {
return dateColumn.getValue().getColumn();
return dateColumn.getValue();
}

if (!connector.getValidityDates().isEmpty()) {
return connector.getValidityDates().get(0).getColumn();
return connector.getValidityDates().get(0);
}

return null;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import com.bakdata.conquery.models.datasets.concepts.Concept;
import com.bakdata.conquery.models.datasets.concepts.ConceptElement;
import com.bakdata.conquery.models.datasets.concepts.Connector;
import com.bakdata.conquery.models.datasets.concepts.ValidityDate;
import com.bakdata.conquery.models.datasets.concepts.select.Select;
import com.bakdata.conquery.models.identifiable.ids.NamespacedIdentifiable;
import com.bakdata.conquery.models.query.DateAggregationMode;
Expand Down Expand Up @@ -226,7 +227,7 @@ public QPNode createQueryPlan(QueryPlanContext context, ConceptQueryPlan plan) {

final QPNode
conceptSpecificNode =
getConcept().createConceptQuery(context, filters, aggregators, eventDateUnionAggregators, selectValidityDateColumn(table));
getConcept().createConceptQuery(context, filters, aggregators, eventDateUnionAggregators, selectValidityDate(table));

// Link up the ExistsAggregators to the node
existsAggregators.forEach(agg -> agg.setReference(conceptSpecificNode));
Expand Down Expand Up @@ -272,14 +273,14 @@ private static List<Aggregator<?>> createAggregators(ConceptQueryPlan plan, List
.collect(Collectors.toList());
}

private Column selectValidityDateColumn(CQTable table) {
private ValidityDate selectValidityDate(CQTable table) {
if (table.getDateColumn() != null) {
return table.getDateColumn().getValue().getColumn();
return table.getDateColumn().getValue();
}

//else use this first defined validity date column
if (!table.getConnector().getValidityDates().isEmpty()) {
return table.getConnector().getValidityDates().get(0).getColumn();
return table.getConnector().getValidityDates().get(0);
}

return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import com.bakdata.conquery.models.auth.permissions.ConceptPermission;
import com.bakdata.conquery.models.auth.permissions.ConqueryPermission;
import com.bakdata.conquery.models.common.CDateSet;
import com.bakdata.conquery.models.datasets.Column;
import com.bakdata.conquery.models.datasets.Dataset;
import com.bakdata.conquery.models.datasets.concepts.select.Select;
import com.bakdata.conquery.models.exceptions.ConfigurationException;
Expand Down Expand Up @@ -89,12 +88,12 @@ public int countElements() {
/**
* Allows concepts to create their own altered FiltersNode if necessary.
*/
public QPNode createConceptQuery(QueryPlanContext context, List<FilterNode<?>> filters, List<Aggregator<?>> aggregators, List<Aggregator<CDateSet>> eventDateAggregators, Column validityDateColumn) {
public QPNode createConceptQuery(QueryPlanContext context, List<FilterNode<?>> filters, List<Aggregator<?>> aggregators, List<Aggregator<CDateSet>> eventDateAggregators, ValidityDate validityDate) {
final QPNode child = filters.isEmpty() && aggregators.isEmpty() ? new Leaf() : FiltersNode.create(filters, aggregators, eventDateAggregators);


// Only if a validityDateColumn exists, capsule children in ValidityDateNode
return validityDateColumn != null ? new ValidityDateNode(validityDateColumn, child) : child;
return validityDate != null ? new ValidityDateNode(validityDate, child) : child;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
package com.bakdata.conquery.models.datasets.concepts;

import javax.validation.constraints.NotNull;
import javax.annotation.CheckForNull;
import javax.annotation.Nullable;

import com.bakdata.conquery.io.jackson.serializer.NsIdRef;
import com.bakdata.conquery.models.common.daterange.CDateRange;
import com.bakdata.conquery.models.datasets.Column;
import com.bakdata.conquery.models.datasets.Dataset;
import com.bakdata.conquery.models.events.Bucket;
import com.bakdata.conquery.models.events.MajorTypeId;
import com.bakdata.conquery.models.identifiable.Labeled;
import com.bakdata.conquery.models.identifiable.ids.NamespacedIdentifiable;
import com.bakdata.conquery.models.identifiable.ids.specific.ValidityDateId;
Expand All @@ -15,6 +19,7 @@
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;

@Getter
Expand All @@ -24,44 +29,114 @@
public class ValidityDate extends Labeled<ValidityDateId> implements NamespacedIdentifiable<ValidityDateId> {

@NsIdRef
@NotNull
@Nullable
private Column column;
@NsIdRef
@Nullable
private Column startColumn;
@NsIdRef
@Nullable
private Column endColumn;
@JsonBackReference
@ToString.Exclude
@EqualsAndHashCode.Exclude
private Connector connector;

public static ValidityDate create(Column column) {
final ValidityDate validityDate = new ValidityDate();
validityDate.setColumn(column);
return validityDate;
}

public static ValidityDate create(Column startColumn, Column endColumn) {
final ValidityDate validityDate = new ValidityDate();
validityDate.setColumn(startColumn);
validityDate.setColumn(endColumn);
return validityDate;
}

@Override
public ValidityDateId createId() {
return new ValidityDateId(connector.getId(), getName());
}

@JsonIgnore
@ValidationMethod(message = "Column is not of Date or DateRange.")
public boolean isValidValidityDates() {
if (getColumn().getType().isDateCompatible()) {
return true;
@CheckForNull
public CDateRange getValidityDate(int event, Bucket bucket) {
// I spent a lot of time trying to create two classes implementing single/multi-column valditiy dates separately.
// JsonCreator was not happy, and I could not figure out why. This is probably the most performant implementation that's not two classes.

if (getColumn() != null) {
if (bucket.has(event, getColumn())) {
return bucket.getAsDateRange(event, getColumn());
}

return null;
}

final Column startColumn = getStartColumn();
final Column endColumn = getEndColumn();

final boolean hasStart = bucket.has(event, startColumn);
final boolean hasEnd = bucket.has(event, endColumn);

if (!hasStart && !hasEnd) {
return null;
}

log.error("ValidityDate-Column[{}] is not of type DATE or DATERANGE", getColumn().getId());
return false;
final int start = hasStart ? bucket.getDate(event, startColumn) : Integer.MIN_VALUE;
final int end = hasEnd ? bucket.getDate(event, endColumn) : Integer.MAX_VALUE;

return CDateRange.of(start, end);
}

public boolean containsColumn(Column column) {
return column.equals(getColumn()) || column.equals(getStartColumn()) || column.equals(getEndColumn());
}

@JsonIgnore
@ValidationMethod(message = "ValidityDate is not for Connectors' Table.")
public boolean isForConnectorsTable() {

if (getColumn().getTable().equals(connector.getTable())) {
return true;
final boolean anyColumnNotForConnector =
(startColumn != null && !startColumn.getTable().equals(connector.getTable()))
|| (endColumn != null && !endColumn.getTable().equals(connector.getTable()));

final boolean columnNotForConnector = column != null && !column.getTable().equals(connector.getTable());

return !anyColumnNotForConnector && !columnNotForConnector;
}

@JsonIgnore
@ValidationMethod(message = "Single column date range (set via column) and two column date range (set via startColumn and endColumn) are exclusive.")
public boolean isExclusiveValidityDates() {
if (column == null) {
return startColumn != null && endColumn != null;
}
return startColumn == null && endColumn == null;
}

log.error("ValidityDate[{}](Column = `{}`) does not belong to Connector[{}]#Table[{}]", getId(), getColumn().getId(), getId(), connector.getTable().getId());
@JsonIgnore
@ValidationMethod(message = "Both columns of a two-column validity date have to be of type DATE.")
public boolean isValidTwoColumnValidityDates() {
if (startColumn == null || endColumn == null) {
return true;
}
return startColumn.getType() == MajorTypeId.DATE && endColumn.getType() == MajorTypeId.DATE;
}

return false;
@JsonIgnore
@ValidationMethod(message = "Column is not of type DATE or DATE_RANGE.")
public boolean isValidValidityDatesSingleColumn() {
if (column == null) {
return true;
}
return column.getType().isDateCompatible();
}

@JsonIgnore
@Override
public Dataset getDataset() {
return connector.getDataset();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.bakdata.conquery.models.datasets.Dataset;
import com.bakdata.conquery.models.datasets.Import;
import com.bakdata.conquery.models.datasets.Table;
import com.bakdata.conquery.models.datasets.concepts.ValidityDate;
import com.bakdata.conquery.models.events.stores.root.BooleanStore;
import com.bakdata.conquery.models.events.stores.root.ColumnStore;
import com.bakdata.conquery.models.events.stores.root.DateRangeStore;
Expand Down Expand Up @@ -164,8 +165,14 @@ public CDateRange getDateRange(int event, Column column) {
return ((DateRangeStore) getStore(column)).getDateRange(event);
}

public boolean eventIsContainedIn(int event, Column column, CDateSet dateRanges) {
return dateRanges.intersects(getAsDateRange(event, column));
public boolean eventIsContainedIn(int event, ValidityDate validityDate, CDateSet dateRanges) {
final CDateRange dateRange = validityDate.getValidityDate(event, this);

if (dateRange == null){
return false;
}

return dateRanges.intersects(dateRange);
}

public CDateRange getAsDateRange(int event, Column column) {
Expand All @@ -181,10 +188,10 @@ public Object createScriptValue(int event, @NotNull Column column) {
}

public Map<String, Object> calculateMap(int event) {
Map<String, Object> out = new HashMap<>(stores.length);
final Map<String, Object> out = new HashMap<>(stores.length);

for (int i = 0; i < stores.length; i++) {
ColumnStore store = stores[i];
final ColumnStore store = stores[i];
if (!store.has(event)) {
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.bakdata.conquery.models.common.CDateSet;
import com.bakdata.conquery.models.common.daterange.CDateRange;
import com.bakdata.conquery.models.datasets.Column;
import com.bakdata.conquery.models.datasets.concepts.ValidityDate;
import com.bakdata.conquery.models.events.stores.root.ColumnStore;
import lombok.Getter;

Expand All @@ -25,7 +26,7 @@ public EmptyBucket() {


@Override
public boolean eventIsContainedIn(int event, Column column, CDateSet dateRanges) {
public boolean eventIsContainedIn(int event, ValidityDate column, CDateSet dateRanges) {
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import com.bakdata.conquery.io.storage.ModificationShieldedWorkerStorage;
import com.bakdata.conquery.models.common.CDate;
import com.bakdata.conquery.models.common.CDateSet;
import com.bakdata.conquery.models.datasets.Column;
import com.bakdata.conquery.models.datasets.SecondaryIdDescription;
import com.bakdata.conquery.models.datasets.Table;
import com.bakdata.conquery.models.datasets.concepts.Connector;
import com.bakdata.conquery.models.datasets.concepts.ValidityDate;
import com.bakdata.conquery.models.events.Bucket;
import com.bakdata.conquery.models.events.BucketManager;
import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId;
Expand All @@ -36,7 +36,7 @@ public class QueryExecutionContext {
private final BucketManager bucketManager;


private Column validityDateColumn;
private ValidityDate validityDateColumn;
@NonNull
private CDateSet dateRestriction = CDateSet.createFull();
private Connector connector;
Expand Down
Loading

0 comments on commit 5b35560

Please sign in to comment.