Skip to content

Commit

Permalink
1447 - Improves the tag search engine once we click on the tag on th… (
Browse files Browse the repository at this point in the history
  • Loading branch information
Vladysl authored Oct 12, 2023
1 parent d94453d commit 20c0b84
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,6 @@ public Mono<Long> countByState(final FacetStateDto state, final OwnerPojo owner)
final var select = DSL.select(countDistinct(DATA_ENTITY.ID))
.from(DATA_ENTITY)
.join(SEARCH_ENTRYPOINT).on(SEARCH_ENTRYPOINT.DATA_ENTITY_ID.eq(DATA_ENTITY.ID))
.leftJoin(TAG_TO_DATA_ENTITY).on(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(DATA_ENTITY.ID))
.leftJoin(DATA_SOURCE).on(DATA_SOURCE.ID.eq(DATA_ENTITY.DATA_SOURCE_ID))
.leftJoin(NAMESPACE).on(NAMESPACE.ID.eq(DATA_ENTITY.NAMESPACE_ID))
.or(NAMESPACE.ID.eq(DATA_SOURCE.NAMESPACE_ID))
Expand Down Expand Up @@ -677,8 +676,8 @@ public Mono<List<DataEntityDimensionsDto>> findByState(final FacetStateDto state
.on(DATA_SOURCE.ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.DATA_SOURCE_ID)))
.leftJoin(NAMESPACE).on(NAMESPACE.ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.NAMESPACE_ID)))
.or(NAMESPACE.ID.eq(DATA_SOURCE.NAMESPACE_ID))
.leftJoin(TAG_TO_DATA_ENTITY)
.on(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.ID)))
.leftJoin(DATA_ENTITY)
.on(DATA_ENTITY.ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.ID)))
.leftJoin(OWNERSHIP).on(OWNERSHIP.DATA_ENTITY_ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.ID)))
.leftJoin(OWNER).on(OWNER.ID.eq(OWNERSHIP.OWNER_ID))
.leftJoin(TITLE).on(TITLE.ID.eq(OWNERSHIP.TITLE_ID))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import org.jooq.Condition;
import org.jooq.Field;
import org.jooq.Record;
import org.jooq.Record1;
import org.jooq.Record3;
import org.jooq.SelectOrderByStep;
import org.jooq.Table;
import org.jooq.impl.DSL;
import org.opendatadiscovery.oddplatform.dto.DataEntityClassDto;
Expand All @@ -37,6 +39,10 @@
import static org.jooq.impl.DSL.count;
import static org.jooq.impl.DSL.countDistinct;
import static org.jooq.impl.DSL.field;
import static org.jooq.impl.DSL.select;
import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_FIELD;
import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_STRUCTURE;
import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_VERSION;
import static org.opendatadiscovery.oddplatform.model.Tables.DATA_ENTITY;
import static org.opendatadiscovery.oddplatform.model.Tables.DATA_SOURCE;
import static org.opendatadiscovery.oddplatform.model.Tables.GROUP_ENTITY_RELATIONS;
Expand All @@ -46,6 +52,7 @@
import static org.opendatadiscovery.oddplatform.model.Tables.SEARCH_ENTRYPOINT;
import static org.opendatadiscovery.oddplatform.model.Tables.SEARCH_FACETS;
import static org.opendatadiscovery.oddplatform.model.Tables.TAG;
import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_DATASET_FIELD;
import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_DATA_ENTITY;
import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_TERM;
import static org.opendatadiscovery.oddplatform.model.Tables.TERM;
Expand Down Expand Up @@ -193,9 +200,9 @@ public Mono<Map<SearchFilterId, Long>> getEntityClassFacetForDataEntity(final Fa
}
final Set<Long> tagIds = state.getFacetEntitiesIds(FacetType.TAGS);
if (!CollectionUtils.isEmpty(tagIds)) {
select = select.join(TAG_TO_DATA_ENTITY)
.on(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(DATA_ENTITY.ID));
conditions.add(TAG_TO_DATA_ENTITY.TAG_ID.in(tagIds));
final var dataEntities = getRelatedEntitiesAndFieldsToTag(tagIds);

conditions.add(DATA_ENTITY.ID.in(dataEntities));
}

final Set<Long> groupIds = state.getFacetEntitiesIds(FacetType.GROUPS);
Expand Down Expand Up @@ -585,4 +592,27 @@ private boolean deletedEntitiesAreRequested(final Map<FacetType, List<SearchFilt
return facetStateMap.getOrDefault(FacetType.STATUSES, List.of()).stream()
.anyMatch(f -> f.getEntityId() == DataEntityStatusDto.DELETED.getId());
}

private SelectOrderByStep<Record1<Long>> getRelatedEntitiesAndFieldsToTag(final Set<Long> tagIds) {
return select(DATA_ENTITY.ID)
.from(TAG_TO_DATA_ENTITY, DATA_ENTITY)
.where(TAG_TO_DATA_ENTITY.TAG_ID.in(tagIds))
.and(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(DATA_ENTITY.ID))
.union(select(DATA_ENTITY.ID)
.from(DATASET_VERSION, DATA_ENTITY)
.where(DATASET_VERSION.ID.in(
select(DATASET_STRUCTURE.DATASET_VERSION_ID)
.from(DATASET_STRUCTURE, DATASET_FIELD, TAG_TO_DATASET_FIELD)
.where(DATASET_STRUCTURE.DATASET_VERSION_ID.in(
select(DSL.max(DATASET_VERSION.ID))
.from(DATASET_VERSION)
.groupBy(DATASET_VERSION.DATASET_ODDRN)))
.and(DATASET_FIELD.ID.eq(DATASET_STRUCTURE.DATASET_FIELD_ID))
.and(TAG_TO_DATASET_FIELD.DATASET_FIELD_ID.eq(DATASET_FIELD.ID))
.and(TAG_TO_DATASET_FIELD.TAG_ID.in(tagIds))
)
.and(DATA_ENTITY.ODDRN.eq(DATASET_VERSION.DATASET_ODDRN))
)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -148,15 +148,13 @@ public Mono<Page<TagDto>> listMostPopular(final String query, final List<Long> i
paginate(homogeneousQuery, List.of(new OrderByField(TAG.ID, SortOrder.ASC)), (page - 1) * size, size);

final Table<? extends Record> tagCte = select.asTable("tag_cte");
final Table<Record> unionUsages = getDataEntityWithDatasetFields(tagCte, select);

final var cteSelect = DSL.with(tagCte.getName())
.as(select)
.select(tagCte.fields())
.select(DSL.coalesce(DSL.boolOr(TAG_TO_DATA_ENTITY.EXTERNAL), false).as(EXTERNAL_FIELD))
.select(DSL.count(TAG_TO_DATA_ENTITY.TAG_ID).as(COUNT_FIELD))
.from(tagCte.getName())
.leftJoin(TAG_TO_DATA_ENTITY).on(TAG_TO_DATA_ENTITY.TAG_ID.eq(tagCte.field(TAG.ID)))
.groupBy(tagCte.fields())
final var cteSelect = DSL.select(unionUsages.fields(tagCte.fields()))
.select(DSL.boolOr(unionUsages.field(EXTERNAL_FIELD, Boolean.class)).as(EXTERNAL_FIELD))
.select(DSL.sum(unionUsages.field(COUNT_FIELD, Integer.class)).as(COUNT_FIELD))
.from(unionUsages)
.groupBy(unionUsages.fields(tagCte.fields()))
.orderBy(field(COUNT_FIELD).desc());

return jooqReactiveOperations.flux(cteSelect)
Expand Down Expand Up @@ -372,6 +370,27 @@ public Flux<TagToDatasetFieldPojo> createDatasetFieldRelations(final Collection<
return jooqReactiveOperations.flux(query).map(r -> r.into(TagToDatasetFieldPojo.class));
}

private static Table<Record> getDataEntityWithDatasetFields(final Table<? extends Record> tagCte,
final Select<? extends Record> select) {
return DSL.with(tagCte.getName())
.as(select)
.select(tagCte.fields())
.select(DSL.coalesce(DSL.boolOr(TAG_TO_DATA_ENTITY.EXTERNAL), false).as(EXTERNAL_FIELD))
.select(DSL.count(TAG_TO_DATA_ENTITY.TAG_ID).as(COUNT_FIELD))
.from(tagCte.getName())
.leftJoin(TAG_TO_DATA_ENTITY).on(TAG_TO_DATA_ENTITY.TAG_ID.eq(tagCte.field(TAG.ID)))
.groupBy(tagCte.fields())
.unionAll(
DSL.select(tagCte.fields())
.select(DSL.coalesce(DSL.boolOr(TAG_TO_DATASET_FIELD.ORIGIN.ne(TagOrigin.INTERNAL.name())),
false).as(EXTERNAL_FIELD))
.select(DSL.count(TAG_TO_DATASET_FIELD.TAG_ID).as(COUNT_FIELD))
.from(tagCte.getName())
.leftJoin(TAG_TO_DATASET_FIELD).on(TAG_TO_DATASET_FIELD.TAG_ID.eq(tagCte.field(TAG.ID)))
.groupBy(tagCte.fields())
).asTable("union_usages");
}

private TagDto mapTag(final Record jooqRecord) {
return new TagDto(
jooqRecord.into(TagPojo.class),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
import org.opendatadiscovery.oddplatform.dto.SearchFilterDto;

import static org.jooq.impl.DSL.field;
import static org.jooq.impl.DSL.select;
import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_FIELD;
import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_STRUCTURE;
import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_VERSION;
import static org.opendatadiscovery.oddplatform.model.Tables.DATA_ENTITY;
import static org.opendatadiscovery.oddplatform.model.Tables.DATA_SOURCE;
import static org.opendatadiscovery.oddplatform.model.Tables.GROUP_ENTITY_RELATIONS;
Expand All @@ -20,6 +23,7 @@
import static org.opendatadiscovery.oddplatform.model.Tables.NAMESPACE;
import static org.opendatadiscovery.oddplatform.model.Tables.OWNER;
import static org.opendatadiscovery.oddplatform.model.Tables.TAG;
import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_DATASET_FIELD;
import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_DATA_ENTITY;
import static org.opendatadiscovery.oddplatform.model.Tables.TERM;
import static org.opendatadiscovery.oddplatform.model.Tables.TITLE;
Expand Down Expand Up @@ -63,7 +67,30 @@ public class FTSConstants {
FacetType.NAMESPACES, filters -> NAMESPACE.ID.in(extractFilterId(filters)),
FacetType.TYPES, filters -> DATA_ENTITY.TYPE_ID.in(extractFilterId(filters)),
FacetType.OWNERS, filters -> OWNER.ID.in(extractFilterId(filters)),
FacetType.TAGS, filters -> TAG_TO_DATA_ENTITY.TAG_ID.in(extractFilterId(filters)),
FacetType.TAGS, filters -> {
final var dataEntities = select(DATA_ENTITY.ID)
.from(TAG_TO_DATA_ENTITY, DATA_ENTITY)
.where(TAG_TO_DATA_ENTITY.TAG_ID.in(extractFilterId(filters)))
.and(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(DATA_ENTITY.ID))
.union(select(DATA_ENTITY.ID)
.from(DATASET_VERSION, DATA_ENTITY)
.where(DATASET_VERSION.ID.in(
select(DATASET_STRUCTURE.DATASET_VERSION_ID)
.from(DATASET_STRUCTURE, DATASET_FIELD, TAG_TO_DATASET_FIELD)
.where(DATASET_STRUCTURE.DATASET_VERSION_ID.in(
select(DSL.max(DATASET_VERSION.ID))
.from(DATASET_VERSION)
.groupBy(DATASET_VERSION.DATASET_ODDRN)))
.and(DATASET_FIELD.ID.eq(DATASET_STRUCTURE.DATASET_FIELD_ID))
.and(TAG_TO_DATASET_FIELD.DATASET_FIELD_ID.eq(DATASET_FIELD.ID))
.and(TAG_TO_DATASET_FIELD.TAG_ID.in(extractFilterId(filters)))
)
.and(DATA_ENTITY.ODDRN.eq(DATASET_VERSION.DATASET_ODDRN))
)
);

return DATA_ENTITY.ID.in(dataEntities);
},
FacetType.GROUPS, filters -> {
final var groupOddrns = DSL.select(DATA_ENTITY.ODDRN)
.from(DATA_ENTITY)
Expand Down

0 comments on commit 20c0b84

Please sign in to comment.