From 20c0b844a5747f6e81c9a10e3855dce721a4999c Mon Sep 17 00:00:00 2001 From: Vladysl <45620393+Vladysl@users.noreply.github.com> Date: Thu, 12 Oct 2023 14:57:40 +0300 Subject: [PATCH] =?UTF-8?q?1447=20-=20=20Improves=20the=20tag=20search=20e?= =?UTF-8?q?ngine=20once=20we=20click=20on=20the=20tag=20on=20th=E2=80=A6?= =?UTF-8?q?=20(#1449)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ReactiveDataEntityRepositoryImpl.java | 5 ++- .../ReactiveSearchFacetRepositoryImpl.java | 36 +++++++++++++++++-- .../reactive/ReactiveTagRepositoryImpl.java | 35 +++++++++++++----- .../repository/util/FTSConstants.java | 29 ++++++++++++++- 4 files changed, 90 insertions(+), 15 deletions(-) diff --git a/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveDataEntityRepositoryImpl.java b/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveDataEntityRepositoryImpl.java index 300443f66..42f29fd87 100644 --- a/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveDataEntityRepositoryImpl.java +++ b/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveDataEntityRepositoryImpl.java @@ -443,7 +443,6 @@ public Mono countByState(final FacetStateDto state, final OwnerPojo owner) final var select = DSL.select(countDistinct(DATA_ENTITY.ID)) .from(DATA_ENTITY) .join(SEARCH_ENTRYPOINT).on(SEARCH_ENTRYPOINT.DATA_ENTITY_ID.eq(DATA_ENTITY.ID)) - .leftJoin(TAG_TO_DATA_ENTITY).on(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(DATA_ENTITY.ID)) .leftJoin(DATA_SOURCE).on(DATA_SOURCE.ID.eq(DATA_ENTITY.DATA_SOURCE_ID)) .leftJoin(NAMESPACE).on(NAMESPACE.ID.eq(DATA_ENTITY.NAMESPACE_ID)) .or(NAMESPACE.ID.eq(DATA_SOURCE.NAMESPACE_ID)) @@ -677,8 +676,8 @@ public Mono> findByState(final FacetStateDto state .on(DATA_SOURCE.ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.DATA_SOURCE_ID))) .leftJoin(NAMESPACE).on(NAMESPACE.ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.NAMESPACE_ID))) .or(NAMESPACE.ID.eq(DATA_SOURCE.NAMESPACE_ID)) - .leftJoin(TAG_TO_DATA_ENTITY) - .on(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.ID))) + .leftJoin(DATA_ENTITY) + .on(DATA_ENTITY.ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.ID))) .leftJoin(OWNERSHIP).on(OWNERSHIP.DATA_ENTITY_ID.eq(jooqQueryHelper.getField(deCte, DATA_ENTITY.ID))) .leftJoin(OWNER).on(OWNER.ID.eq(OWNERSHIP.OWNER_ID)) .leftJoin(TITLE).on(TITLE.ID.eq(OWNERSHIP.TITLE_ID)) diff --git a/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveSearchFacetRepositoryImpl.java b/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveSearchFacetRepositoryImpl.java index 1079f00d9..91ae3a2da 100644 --- a/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveSearchFacetRepositoryImpl.java +++ b/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveSearchFacetRepositoryImpl.java @@ -14,7 +14,9 @@ import org.jooq.Condition; import org.jooq.Field; import org.jooq.Record; +import org.jooq.Record1; import org.jooq.Record3; +import org.jooq.SelectOrderByStep; import org.jooq.Table; import org.jooq.impl.DSL; import org.opendatadiscovery.oddplatform.dto.DataEntityClassDto; @@ -37,6 +39,10 @@ import static org.jooq.impl.DSL.count; import static org.jooq.impl.DSL.countDistinct; import static org.jooq.impl.DSL.field; +import static org.jooq.impl.DSL.select; +import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_FIELD; +import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_STRUCTURE; +import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_VERSION; import static org.opendatadiscovery.oddplatform.model.Tables.DATA_ENTITY; import static org.opendatadiscovery.oddplatform.model.Tables.DATA_SOURCE; import static org.opendatadiscovery.oddplatform.model.Tables.GROUP_ENTITY_RELATIONS; @@ -46,6 +52,7 @@ import static org.opendatadiscovery.oddplatform.model.Tables.SEARCH_ENTRYPOINT; import static org.opendatadiscovery.oddplatform.model.Tables.SEARCH_FACETS; import static org.opendatadiscovery.oddplatform.model.Tables.TAG; +import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_DATASET_FIELD; import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_DATA_ENTITY; import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_TERM; import static org.opendatadiscovery.oddplatform.model.Tables.TERM; @@ -193,9 +200,9 @@ public Mono> getEntityClassFacetForDataEntity(final Fa } final Set tagIds = state.getFacetEntitiesIds(FacetType.TAGS); if (!CollectionUtils.isEmpty(tagIds)) { - select = select.join(TAG_TO_DATA_ENTITY) - .on(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(DATA_ENTITY.ID)); - conditions.add(TAG_TO_DATA_ENTITY.TAG_ID.in(tagIds)); + final var dataEntities = getRelatedEntitiesAndFieldsToTag(tagIds); + + conditions.add(DATA_ENTITY.ID.in(dataEntities)); } final Set groupIds = state.getFacetEntitiesIds(FacetType.GROUPS); @@ -585,4 +592,27 @@ private boolean deletedEntitiesAreRequested(final Map f.getEntityId() == DataEntityStatusDto.DELETED.getId()); } + + private SelectOrderByStep> getRelatedEntitiesAndFieldsToTag(final Set tagIds) { + return select(DATA_ENTITY.ID) + .from(TAG_TO_DATA_ENTITY, DATA_ENTITY) + .where(TAG_TO_DATA_ENTITY.TAG_ID.in(tagIds)) + .and(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(DATA_ENTITY.ID)) + .union(select(DATA_ENTITY.ID) + .from(DATASET_VERSION, DATA_ENTITY) + .where(DATASET_VERSION.ID.in( + select(DATASET_STRUCTURE.DATASET_VERSION_ID) + .from(DATASET_STRUCTURE, DATASET_FIELD, TAG_TO_DATASET_FIELD) + .where(DATASET_STRUCTURE.DATASET_VERSION_ID.in( + select(DSL.max(DATASET_VERSION.ID)) + .from(DATASET_VERSION) + .groupBy(DATASET_VERSION.DATASET_ODDRN))) + .and(DATASET_FIELD.ID.eq(DATASET_STRUCTURE.DATASET_FIELD_ID)) + .and(TAG_TO_DATASET_FIELD.DATASET_FIELD_ID.eq(DATASET_FIELD.ID)) + .and(TAG_TO_DATASET_FIELD.TAG_ID.in(tagIds)) + ) + .and(DATA_ENTITY.ODDRN.eq(DATASET_VERSION.DATASET_ODDRN)) + ) + ); + } } diff --git a/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveTagRepositoryImpl.java b/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveTagRepositoryImpl.java index 0a7bc6726..66b0b658b 100644 --- a/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveTagRepositoryImpl.java +++ b/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/reactive/ReactiveTagRepositoryImpl.java @@ -148,15 +148,13 @@ public Mono> listMostPopular(final String query, final List i paginate(homogeneousQuery, List.of(new OrderByField(TAG.ID, SortOrder.ASC)), (page - 1) * size, size); final Table tagCte = select.asTable("tag_cte"); + final Table unionUsages = getDataEntityWithDatasetFields(tagCte, select); - final var cteSelect = DSL.with(tagCte.getName()) - .as(select) - .select(tagCte.fields()) - .select(DSL.coalesce(DSL.boolOr(TAG_TO_DATA_ENTITY.EXTERNAL), false).as(EXTERNAL_FIELD)) - .select(DSL.count(TAG_TO_DATA_ENTITY.TAG_ID).as(COUNT_FIELD)) - .from(tagCte.getName()) - .leftJoin(TAG_TO_DATA_ENTITY).on(TAG_TO_DATA_ENTITY.TAG_ID.eq(tagCte.field(TAG.ID))) - .groupBy(tagCte.fields()) + final var cteSelect = DSL.select(unionUsages.fields(tagCte.fields())) + .select(DSL.boolOr(unionUsages.field(EXTERNAL_FIELD, Boolean.class)).as(EXTERNAL_FIELD)) + .select(DSL.sum(unionUsages.field(COUNT_FIELD, Integer.class)).as(COUNT_FIELD)) + .from(unionUsages) + .groupBy(unionUsages.fields(tagCte.fields())) .orderBy(field(COUNT_FIELD).desc()); return jooqReactiveOperations.flux(cteSelect) @@ -372,6 +370,27 @@ public Flux createDatasetFieldRelations(final Collection< return jooqReactiveOperations.flux(query).map(r -> r.into(TagToDatasetFieldPojo.class)); } + private static Table getDataEntityWithDatasetFields(final Table tagCte, + final Select select) { + return DSL.with(tagCte.getName()) + .as(select) + .select(tagCte.fields()) + .select(DSL.coalesce(DSL.boolOr(TAG_TO_DATA_ENTITY.EXTERNAL), false).as(EXTERNAL_FIELD)) + .select(DSL.count(TAG_TO_DATA_ENTITY.TAG_ID).as(COUNT_FIELD)) + .from(tagCte.getName()) + .leftJoin(TAG_TO_DATA_ENTITY).on(TAG_TO_DATA_ENTITY.TAG_ID.eq(tagCte.field(TAG.ID))) + .groupBy(tagCte.fields()) + .unionAll( + DSL.select(tagCte.fields()) + .select(DSL.coalesce(DSL.boolOr(TAG_TO_DATASET_FIELD.ORIGIN.ne(TagOrigin.INTERNAL.name())), + false).as(EXTERNAL_FIELD)) + .select(DSL.count(TAG_TO_DATASET_FIELD.TAG_ID).as(COUNT_FIELD)) + .from(tagCte.getName()) + .leftJoin(TAG_TO_DATASET_FIELD).on(TAG_TO_DATASET_FIELD.TAG_ID.eq(tagCte.field(TAG.ID))) + .groupBy(tagCte.fields()) + ).asTable("union_usages"); + } + private TagDto mapTag(final Record jooqRecord) { return new TagDto( jooqRecord.into(TagPojo.class), diff --git a/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/util/FTSConstants.java b/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/util/FTSConstants.java index 1f6d284e8..ce3a7917b 100644 --- a/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/util/FTSConstants.java +++ b/odd-platform-api/src/main/java/org/opendatadiscovery/oddplatform/repository/util/FTSConstants.java @@ -11,7 +11,10 @@ import org.opendatadiscovery.oddplatform.dto.SearchFilterDto; import static org.jooq.impl.DSL.field; +import static org.jooq.impl.DSL.select; import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_FIELD; +import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_STRUCTURE; +import static org.opendatadiscovery.oddplatform.model.Tables.DATASET_VERSION; import static org.opendatadiscovery.oddplatform.model.Tables.DATA_ENTITY; import static org.opendatadiscovery.oddplatform.model.Tables.DATA_SOURCE; import static org.opendatadiscovery.oddplatform.model.Tables.GROUP_ENTITY_RELATIONS; @@ -20,6 +23,7 @@ import static org.opendatadiscovery.oddplatform.model.Tables.NAMESPACE; import static org.opendatadiscovery.oddplatform.model.Tables.OWNER; import static org.opendatadiscovery.oddplatform.model.Tables.TAG; +import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_DATASET_FIELD; import static org.opendatadiscovery.oddplatform.model.Tables.TAG_TO_DATA_ENTITY; import static org.opendatadiscovery.oddplatform.model.Tables.TERM; import static org.opendatadiscovery.oddplatform.model.Tables.TITLE; @@ -63,7 +67,30 @@ public class FTSConstants { FacetType.NAMESPACES, filters -> NAMESPACE.ID.in(extractFilterId(filters)), FacetType.TYPES, filters -> DATA_ENTITY.TYPE_ID.in(extractFilterId(filters)), FacetType.OWNERS, filters -> OWNER.ID.in(extractFilterId(filters)), - FacetType.TAGS, filters -> TAG_TO_DATA_ENTITY.TAG_ID.in(extractFilterId(filters)), + FacetType.TAGS, filters -> { + final var dataEntities = select(DATA_ENTITY.ID) + .from(TAG_TO_DATA_ENTITY, DATA_ENTITY) + .where(TAG_TO_DATA_ENTITY.TAG_ID.in(extractFilterId(filters))) + .and(TAG_TO_DATA_ENTITY.DATA_ENTITY_ID.eq(DATA_ENTITY.ID)) + .union(select(DATA_ENTITY.ID) + .from(DATASET_VERSION, DATA_ENTITY) + .where(DATASET_VERSION.ID.in( + select(DATASET_STRUCTURE.DATASET_VERSION_ID) + .from(DATASET_STRUCTURE, DATASET_FIELD, TAG_TO_DATASET_FIELD) + .where(DATASET_STRUCTURE.DATASET_VERSION_ID.in( + select(DSL.max(DATASET_VERSION.ID)) + .from(DATASET_VERSION) + .groupBy(DATASET_VERSION.DATASET_ODDRN))) + .and(DATASET_FIELD.ID.eq(DATASET_STRUCTURE.DATASET_FIELD_ID)) + .and(TAG_TO_DATASET_FIELD.DATASET_FIELD_ID.eq(DATASET_FIELD.ID)) + .and(TAG_TO_DATASET_FIELD.TAG_ID.in(extractFilterId(filters))) + ) + .and(DATA_ENTITY.ODDRN.eq(DATASET_VERSION.DATASET_ODDRN)) + ) + ); + + return DATA_ENTITY.ID.in(dataEntities); + }, FacetType.GROUPS, filters -> { final var groupOddrns = DSL.select(DATA_ENTITY.ODDRN) .from(DATA_ENTITY)