Skip to content

Commit

Permalink
feat(browse-classification): prepare and populate database for classi…
Browse files Browse the repository at this point in the history
…fication browse (#514)

prepare database scripts for new table creation
create pre-processor for instances to populdate the table with classifications (it should be used then to convert database records to elastic documents)
replace folio-service-tools wtih folio-spring-testing that causes a lot of import changes.

Closes: MSEARCH-667
Signed-off-by: psmagin <[email protected]>
  • Loading branch information
psmagin authored Feb 12, 2024
1 parent af3ac0a commit 13c66d9
Show file tree
Hide file tree
Showing 180 changed files with 1,187 additions and 212 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* Update LccnProcessor to populate lccn field with only "LCCN" ([MSEARCH-630](https://issues.folio.org/browse/MSEARCH-630))
* Make maximum offset for additional elasticsearch request on browse configurable ([MSEARCH-641](https://issues.folio.org/browse/MSEARCH-641))
* Make system user usage optional ([MSEARCH-631](https://issues.folio.org/browse/MSEARCH-631))
* Prepare and populate database for classification browse ([MSEARCH-667](https://issues.folio.org/browse/MSEARCH-667))


### Bug fixes
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ and [Cross-cluster replication](https://docs.aws.amazon.com/opensearch-service/l
| SEARCH_BY_ALL_FIELDS_ENABLED | false | Specifies if globally search by all field values must be enabled or not (tenant can override this setting) |
| BROWSE_CN_INTERMEDIATE_VALUES_ENABLED | true | Specifies if globally intermediate values (nested instance items) must be populated or not (tenant can override this setting) |
| BROWSE_CN_INTERMEDIATE_REMOVE_DUPLICATES | true | Specifies if globally intermediate duplicate values (fullCallNumber) should be removed or not (Active only with BROWSE_CN_INTERMEDIATE_VALUES_ENABLED) |
| BROWSE_CLASSIFICATIONS_ENABLED | false | Specifies if globally instance classification indexing will be performed |
| SCROLL_QUERY_SIZE | 1000 | The number of records to be loaded by each scroll query. 10_000 is a max value |
| STREAM_ID_RETRY_INTERVAL_MS | 1000 | Specifies time to wait before reattempting query. |
| STREAM_ID_RETRY_ATTEMPTS | 3 | Specifies how many queries attempt to perform after the first one failed. |
Expand Down
34 changes: 2 additions & 32 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
<streamex.version>0.8.2</streamex.version>

<!-- Test dependencies versions -->
<testcontainers.version>1.19.4</testcontainers.version>
<wiremock.version>2.27.2</wiremock.version>
<awaitility.version>4.2.0</awaitility.version>

Expand Down Expand Up @@ -256,34 +255,6 @@
</exclusions>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
<version>${testcontainers.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>junit-jupiter</artifactId>
<version>${testcontainers.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>kafka</artifactId>
<version>${testcontainers.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>postgresql</artifactId>
<version>${testcontainers.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
Expand All @@ -300,9 +271,8 @@

<dependency>
<groupId>org.folio</groupId>
<artifactId>folio-service-tools-spring-test</artifactId>
<version>${folio-service-tools.version}</version>
<scope>test</scope>
<artifactId>folio-spring-testing</artifactId>
<version>${folio-spring-support.version}</version>
</dependency>
</dependencies>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.folio.search.repository.classification;

import java.util.Objects;
import lombok.Builder;

public record InstanceClassificationEntity(
Id id,
boolean shared
) {

public InstanceClassificationEntity {
Objects.requireNonNull(id);
}

public String type() {
return id().type();
}

public String number() {
return id().number();
}

public String instanceId() {
return id().instanceId();
}

public String tenantId() {
return id().tenantId();
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
InstanceClassificationEntity that = (InstanceClassificationEntity) o;
return Objects.equals(id, that.id);
}

@Override
public int hashCode() {
return Objects.hash(id);
}

@Builder
public record Id(String type,
String number,
String instanceId,
String tenantId) {
public Id {
Objects.requireNonNull(number);
Objects.requireNonNull(instanceId);
Objects.requireNonNull(tenantId);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package org.folio.search.repository.classification;

import java.util.List;
import org.folio.search.model.index.InstanceSubResource;

public record InstanceClassificationEntityAgg(
String type,
String number,
List<InstanceSubResource> instances
) {

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
package org.folio.search.repository.classification;

import static org.folio.search.utils.JdbcUtils.getGroupedParamPlaceholder;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.sql.PreparedStatement;
import java.util.Collections;
import java.util.List;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections4.CollectionUtils;
import org.folio.search.model.index.InstanceSubResource;
import org.folio.search.utils.JdbcUtils;
import org.folio.spring.FolioExecutionContext;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowMapper;
import org.springframework.stereotype.Repository;

@Log4j2
@Repository
@RequiredArgsConstructor
public class InstanceClassificationJdbcRepository implements InstanceClassificationRepository {

private static final String INSTANCE_CLASSIFICATION_TABLE_NAME = "instance_classification";
private static final String CLASSIFICATION_TYPE_COLUMN = "classification_type_id";
private static final String CLASSIFICATION_NUMBER_COLUMN = "classification_number";
private static final String TENANT_ID_COLUMN = "tenant_id";
private static final String INSTANCE_ID_COLUMN = "instance_id";
private static final String SHARED_COLUMN = "shared";
private static final String CLASSIFICATION_TYPE_DEFAULT = "<null>";

private static final String SELECT_ALL_SQL = "SELECT * FROM %s;";
private static final String SELECT_ALL_BY_INSTANCE_ID_AGG = """
SELECT
classification_number,
classification_type_id,
json_agg(json_build_object(
'instanceId', instance_id,
'shared', shared,
'tenantId', tenant_id
)) AS instances
FROM %s
WHERE (classification_number, classification_type_id) IN (%s)
GROUP BY classification_number, classification_type_id;
""";
private static final String INSERT_SQL = """
INSERT INTO %s (classification_type_id, classification_number, tenant_id, instance_id, shared)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT (classification_type_id, classification_number, tenant_id, instance_id)
DO UPDATE SET shared = EXCLUDED.shared;
""";
private static final String DELETE_SQL = """
DELETE FROM %s
WHERE classification_type_id = ? AND classification_number = ? AND tenant_id = ? AND instance_id = ?;
""";
private static final int BATCH_SIZE = 100;
private static final TypeReference<List<InstanceSubResource>> VALUE_TYPE_REF = new TypeReference<>() { };

private final FolioExecutionContext context;
private final JdbcTemplate jdbcTemplate;
private final ObjectMapper objectMapper;

public void saveAll(List<InstanceClassificationEntity> classifications) {
log.debug("saveAll::instance classifications [entities: {}]", classifications);

if (classifications == null || classifications.isEmpty()) {
return;
}

var uniqueEntities = classifications.stream().distinct().toList();

jdbcTemplate.batchUpdate(
INSERT_SQL.formatted(getTableName()),
uniqueEntities,
BATCH_SIZE,
(PreparedStatement ps, InstanceClassificationEntity item) -> {
var id = item.id();
ps.setString(1, classificationTypeToDatabaseValue(id));
ps.setString(2, id.number());
ps.setString(3, id.tenantId());
ps.setString(4, id.instanceId());
ps.setBoolean(5, item.shared());
});
}

@Override
public void deleteAll(List<InstanceClassificationEntity> classifications) {
log.debug("deleteAll::instance classifications [entities: {}]", classifications);

if (classifications == null || classifications.isEmpty()) {
return;
}

jdbcTemplate.batchUpdate(
DELETE_SQL.formatted(getTableName()),
classifications,
BATCH_SIZE,
(PreparedStatement ps, InstanceClassificationEntity item) -> {
var id = item.id();
ps.setString(1, classificationTypeToDatabaseValue(id));
ps.setString(2, id.number());
ps.setString(3, id.tenantId());
ps.setString(4, id.instanceId());
});

}

@Override
public List<InstanceClassificationEntity> findAll() {
log.debug("findAll::instance classifications");
return jdbcTemplate.query(SELECT_ALL_SQL.formatted(getTableName()), instanceClassificationRowMapper());
}

@Override
public List<InstanceClassificationEntityAgg> fetchAggregatedByClassifications(
List<InstanceClassificationEntity> classifications) {
log.debug("fetchAggregatedByClassifications::instance classifications [entities: {}]", classifications);
if (CollectionUtils.isEmpty(classifications)) {
return Collections.emptyList();
}
return jdbcTemplate.query(
SELECT_ALL_BY_INSTANCE_ID_AGG.formatted(getTableName(), getGroupedParamPlaceholder(classifications.size(), 2)),
instanceClassificationAggRowMapper(), getArgsForAggregatedByClassifications(classifications));
}

@NotNull
private Object[] getArgsForAggregatedByClassifications(List<InstanceClassificationEntity> classifications) {
var args = new Object[classifications.size() * 2];
int index = 0;
for (var classification : classifications) {
args[index++] = classification.number();
args[index++] = classification.type();
}
return args;
}

@NotNull
private RowMapper<InstanceClassificationEntity> instanceClassificationRowMapper() {
return (rs, rowNum) -> {
var builder = InstanceClassificationEntity.Id.builder();
var typeVal = rs.getString(CLASSIFICATION_TYPE_COLUMN);
builder.type(databaseValueToClassificationType(typeVal));
builder.number(rs.getString(CLASSIFICATION_NUMBER_COLUMN));
builder.instanceId(rs.getString(INSTANCE_ID_COLUMN));
builder.tenantId(rs.getString(TENANT_ID_COLUMN));
var shared = rs.getBoolean(SHARED_COLUMN);
return new InstanceClassificationEntity(builder.build(), shared);
};
}

@NotNull
private RowMapper<InstanceClassificationEntityAgg> instanceClassificationAggRowMapper() {
return (rs, rowNum) -> {
var typeVal = rs.getString(CLASSIFICATION_TYPE_COLUMN);
var type = databaseValueToClassificationType(typeVal);
var number = rs.getString(CLASSIFICATION_NUMBER_COLUMN);
var instancesJson = rs.getString("instances");
List<InstanceSubResource> instanceSubResources;
try {
instanceSubResources = objectMapper.readValue(instancesJson, VALUE_TYPE_REF);
} catch (JsonProcessingException e) {
throw new IllegalArgumentException(e);
}
return new InstanceClassificationEntityAgg(type, number, instanceSubResources);
};
}

private String getTableName() {
return JdbcUtils.getFullTableName(context, INSTANCE_CLASSIFICATION_TABLE_NAME);
}

private String classificationTypeToDatabaseValue(InstanceClassificationEntity.Id id) {
return id.type() == null ? CLASSIFICATION_TYPE_DEFAULT : id.type();
}

@Nullable
private static String databaseValueToClassificationType(String typeVal) {
return CLASSIFICATION_TYPE_DEFAULT.equals(typeVal) ? null : typeVal;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package org.folio.search.repository.classification;

import java.util.List;

public interface InstanceClassificationRepository {

void saveAll(List<InstanceClassificationEntity> classifications);

void deleteAll(List<InstanceClassificationEntity> classifications);

List<InstanceClassificationEntity> findAll();

List<InstanceClassificationEntityAgg> fetchAggregatedByClassifications(
List<InstanceClassificationEntity> classifications);
}
Loading

0 comments on commit 13c66d9

Please sign in to comment.