Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pattern acceleration #87

Merged
merged 20 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
88df094
merge squash pattern-acceleration-refactor
elliVM Sep 12, 2024
d9730ae
run jooq generate with new settings and spotless
elliVM Sep 12, 2024
dd42e06
rename BloomFilterTempTable tokenSet to searchTermTokenSet
elliVM Sep 12, 2024
b2aaf03
make ConditionWalker and ValidElement classes final
elliVM Sep 12, 2024
c1ae260
rename method isIndexStatement to isBloomSearchCondition
elliVM Sep 12, 2024
914ddff
add indexstatement with bad connection tests for ElementConditionTest…
elliVM Sep 12, 2024
485b346
apply spotless
elliVM Sep 12, 2024
b3435ad
add javadoc to tests that use Condition.toString for equality testing
elliVM Sep 12, 2024
2916cd6
refactoring iteration: multiple new classes and interfaces to simplif…
elliVM Sep 16, 2024
be3c266
apply spotless
elliVM Sep 16, 2024
1c31037
renaming of names and methods
elliVM Sep 18, 2024
0c3ffcc
refactor ElementCondition to directly use IndexStatementCondition cla…
elliVM Sep 18, 2024
0e00db0
Renaming and some comments
elliVM Sep 18, 2024
510ee97
Remove decorators and use class methods, fix testing, clean equality …
elliVM Sep 20, 2024
0585e92
apply spotless
elliVM Sep 20, 2024
e988a86
create testing patterns to Strings first add comments for clarity
elliVM Sep 20, 2024
4cdfd50
disable search term tokenization
elliVM Sep 24, 2024
b0ce419
clean up code and separate condition interfaces, use decorators for c…
elliVM Sep 24, 2024
21d018f
enable tokenization of search term, filter category table filter toke…
elliVM Sep 24, 2024
ae8359d
TokenizedValue finds minor tokens, add tests that correct tokens are …
elliVM Sep 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 12 additions & 33 deletions database/bloomdb.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* This program handles user requests that require archive access.
* Copyright (C) 2022 Suomen Kanuuna Oy
* Copyright (C) 2024 Suomen Kanuuna Oy
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
Expand Down Expand Up @@ -44,36 +44,15 @@
* a licensee so wish it.
*/

DROP TABLE IF EXISTS `filter_expected_100000_fpp_001`;
DROP TABLE IF EXISTS `filter_expected_1000000_fpp_003`;
DROP TABLE IF EXISTS `filter_expected_2500000_fpp_005`;
DROP TABLE IF EXISTS `filtertype`;

CREATE TABLE `filter_expected_100000_fpp_001` (
`id` INT NOT NULL AUTO_INCREMENT,
`partition_id` BIGINT(20) unsigned NOT NULL UNIQUE,
`filter` LONGBLOB,
CONSTRAINT `fk_smallfilter_partition`
FOREIGN KEY (`partition_id`) REFERENCES `journaldb`.`logfile`(`id`) ON DELETE CASCADE,
CONSTRAINT `pk_small`
PRIMARY KEY (`id`)
)ENGINE=InnoDB ROW_FORMAT=COMPRESSED;

CREATE TABLE `filter_expected_1000000_fpp_003` (
`id` INT NOT NULL AUTO_INCREMENT,
`partition_id` BIGINT(20) unsigned NOT NULL UNIQUE ,
`filter` LONGBLOB,
CONSTRAINT `fk_mediumfilter_partition`
FOREIGN KEY (`partition_id`) REFERENCES `journaldb`.`logfile`(`id`) ON DELETE CASCADE,
CONSTRAINT `pk_medium`
PRIMARY KEY (`id`)
)ENGINE=InnoDB ROW_FORMAT=COMPRESSED;

CREATE TABLE `filter_expected_2500000_fpp_005` (
`id` INT NOT NULL AUTO_INCREMENT,
`partition_id` BIGINT(20) unsigned NOT NULL UNIQUE ,
`filter` LONGBLOB,
CONSTRAINT `fk_largefilter_partition`
FOREIGN KEY (`partition_id`) REFERENCES `journaldb`.`logfile`(`id`) ON DELETE CASCADE,
CONSTRAINT `pk_large`
PRIMARY KEY (`id`)
)ENGINE=InnoDB ROW_FORMAT=COMPRESSED;
CREATE TABLE `filtertype`
(
`id` bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
`expectedElements` bigint(20) UNSIGNED NOT NULL,
`targetFpp` DOUBLE(2, 2) UNSIGNED NOT NULL,
`pattern` VARCHAR(2048) NOT NULL,
UNIQUE KEY (`expectedElements`, `targetFpp`, `pattern`)
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4
COLLATE = utf8mb4_unicode_ci;
8 changes: 7 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@
<version>${jclouds.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>2.2.224</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
Expand Down Expand Up @@ -304,7 +310,7 @@
</jdbc>
<generator>
<database>
<includes>streamdb.log_group|streamdb.host|streamdb.stream|journaldb.host|journaldb.bucket|journaldb.logfile|bloomdb.filter_expected_100000_fpp_001|bloomdb.filter_expected_1000000_fpp_003|bloomdb.filter_expected_2500000_fpp_005</includes>
<includes>streamdb.log_group|streamdb.host|streamdb.stream|journaldb.host|journaldb.bucket|journaldb.logfile|bloomdb.filtertype</includes>
</database>
<target>
<packageName>com.teragrep.pth_06.jooq.generated</packageName>
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/com/teragrep/pth_06/config/ArchiveConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public final class ArchiveConfig {

// bloom
public final boolean bloomEnabled;
public final boolean withoutFilters;
public final String bloomDbName;

public final boolean isStub;
Expand All @@ -79,6 +80,7 @@ public ArchiveConfig(Map<String, String> opts) {
dbPassword = getOrThrow(opts, "DBpassword");
dbUrl = getOrThrow(opts, "DBurl");
bloomEnabled = opts.getOrDefault("bloom.enabled", "false").equalsIgnoreCase("true");
withoutFilters = opts.getOrDefault("bloom.enabled", "false").equalsIgnoreCase("true");
bloomDbName = opts.getOrDefault("DBbloomdbname", "bloomdb");

dbJournalDbName = opts.getOrDefault("DBjournaldbname", "journaldb");
Expand Down Expand Up @@ -108,6 +110,7 @@ public ArchiveConfig() {
dbStreamDbName = "";

bloomEnabled = false;
withoutFilters = false;
bloomDbName = "";

hideDatabaseExceptions = false;
Expand Down
37 changes: 29 additions & 8 deletions src/main/java/com/teragrep/pth_06/config/ConditionConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,36 @@ public final class ConditionConfig {
private final boolean streamQuery;
private final boolean bloomEnabled;
private final boolean withoutFilters;
private final long bloomTermId;

public ConditionConfig(DSLContext ctx, boolean streamQuery) {
this.ctx = ctx;
this.streamQuery = streamQuery;
this.bloomEnabled = false;
this.withoutFilters = false;
this(ctx, streamQuery, false, false, 0L);
}

public ConditionConfig(DSLContext ctx, boolean streamQuery, boolean bloomEnabled) {
this(ctx, streamQuery, bloomEnabled, false, 0L);
}

public ConditionConfig(DSLContext ctx, boolean streamQuery, boolean bloomEnabled, boolean withoutFilters) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this removing the feature that allows to search files without a filter?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Option was not used in github repo. Added option with implementation and tests

this(ctx, streamQuery, bloomEnabled, withoutFilters, 0L);
}

public ConditionConfig(DSLContext ctx, boolean streamQuery, boolean bloomEnabled, long bloomTermId) {
this(ctx, streamQuery, bloomEnabled, false, bloomTermId);
}

public ConditionConfig(
DSLContext ctx,
boolean streamQuery,
boolean bloomEnabled,
boolean withoutFilters,
long bloomTermId
) {
this.ctx = ctx;
this.streamQuery = streamQuery;
this.bloomEnabled = bloomEnabled;
this.withoutFilters = withoutFilters;
this.bloomTermId = bloomTermId;
}

public DSLContext context() {
Expand All @@ -76,12 +93,16 @@ public boolean bloomEnabled() {
return bloomEnabled;
}

public boolean streamQuery() {
return streamQuery;
public boolean withoutFilters() {
return withoutFilters;
}

public boolean withoutFilter() {
return withoutFilters;
public long bloomTermId() {
return bloomTermId;
}

public boolean streamQuery() {
return streamQuery;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@


import com.teragrep.pth_06.jooq.generated.DefaultCatalog;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_1000000Fpp_003;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_100000Fpp_001;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_2500000Fpp_005;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.Filtertype;

import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -78,27 +76,17 @@
@SuppressWarnings({ "all", "unchecked", "rawtypes" })
public class Bloomdb extends SchemaImpl {

private static final long serialVersionUID = 1310856944;
private static final long serialVersionUID = -1839179080;

/**
* The reference instance of <code>bloomdb</code>
*/
public static final Bloomdb BLOOMDB = new Bloomdb();

/**
* The table <code>bloomdb.filter_expected_1000000_fpp_003</code>.
* The table <code>bloomdb.filtertype</code>.
*/
public final FilterExpected_1000000Fpp_003 FILTER_EXPECTED_1000000_FPP_003 = com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003;

/**
* The table <code>bloomdb.filter_expected_100000_fpp_001</code>.
*/
public final FilterExpected_100000Fpp_001 FILTER_EXPECTED_100000_FPP_001 = com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001;

/**
* The table <code>bloomdb.filter_expected_2500000_fpp_005</code>.
*/
public final FilterExpected_2500000Fpp_005 FILTER_EXPECTED_2500000_FPP_005 = com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005;
public final Filtertype FILTERTYPE = com.teragrep.pth_06.jooq.generated.bloomdb.tables.Filtertype.FILTERTYPE;

/**
* No further instances allowed
Expand All @@ -122,8 +110,6 @@ public final List<Table<?>> getTables() {

private final List<Table<?>> getTables0() {
return Arrays.<Table<?>>asList(
FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003,
FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001,
FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005);
Filtertype.FILTERTYPE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@
package com.teragrep.pth_06.jooq.generated.bloomdb;


import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_1000000Fpp_003;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_100000Fpp_001;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.FilterExpected_2500000Fpp_005;
import com.teragrep.pth_06.jooq.generated.bloomdb.tables.Filtertype;

import javax.annotation.Generated;

Expand All @@ -77,23 +75,15 @@ public class Indexes {
// INDEX definitions
// -------------------------------------------------------------------------

public static final Index FILTER_EXPECTED_1000000_FPP_003_PARTITION_ID = Indexes0.FILTER_EXPECTED_1000000_FPP_003_PARTITION_ID;
public static final Index FILTER_EXPECTED_1000000_FPP_003_PRIMARY = Indexes0.FILTER_EXPECTED_1000000_FPP_003_PRIMARY;
public static final Index FILTER_EXPECTED_100000_FPP_001_PARTITION_ID = Indexes0.FILTER_EXPECTED_100000_FPP_001_PARTITION_ID;
public static final Index FILTER_EXPECTED_100000_FPP_001_PRIMARY = Indexes0.FILTER_EXPECTED_100000_FPP_001_PRIMARY;
public static final Index FILTER_EXPECTED_2500000_FPP_005_PARTITION_ID = Indexes0.FILTER_EXPECTED_2500000_FPP_005_PARTITION_ID;
public static final Index FILTER_EXPECTED_2500000_FPP_005_PRIMARY = Indexes0.FILTER_EXPECTED_2500000_FPP_005_PRIMARY;
public static final Index FILTERTYPE_EXPECTEDELEMENTS = Indexes0.FILTERTYPE_EXPECTEDELEMENTS;
public static final Index FILTERTYPE_PRIMARY = Indexes0.FILTERTYPE_PRIMARY;

// -------------------------------------------------------------------------
// [#1459] distribute members to avoid static initialisers > 64kb
// -------------------------------------------------------------------------

private static class Indexes0 {
public static Index FILTER_EXPECTED_1000000_FPP_003_PARTITION_ID = Internal.createIndex("partition_id", FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003, new OrderField[] { FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003.PARTITION_ID }, true);
public static Index FILTER_EXPECTED_1000000_FPP_003_PRIMARY = Internal.createIndex("PRIMARY", FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003, new OrderField[] { FilterExpected_1000000Fpp_003.FILTER_EXPECTED_1000000_FPP_003.ID }, true);
public static Index FILTER_EXPECTED_100000_FPP_001_PARTITION_ID = Internal.createIndex("partition_id", FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001, new OrderField[] { FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001.PARTITION_ID }, true);
public static Index FILTER_EXPECTED_100000_FPP_001_PRIMARY = Internal.createIndex("PRIMARY", FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001, new OrderField[] { FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001.ID }, true);
public static Index FILTER_EXPECTED_2500000_FPP_005_PARTITION_ID = Internal.createIndex("partition_id", FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005, new OrderField[] { FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005.PARTITION_ID }, true);
public static Index FILTER_EXPECTED_2500000_FPP_005_PRIMARY = Internal.createIndex("PRIMARY", FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005, new OrderField[] { FilterExpected_2500000Fpp_005.FILTER_EXPECTED_2500000_FPP_005.ID }, true);
public static Index FILTERTYPE_EXPECTEDELEMENTS = Internal.createIndex("expectedElements", Filtertype.FILTERTYPE, new OrderField[] { Filtertype.FILTERTYPE.EXPECTEDELEMENTS, Filtertype.FILTERTYPE.TARGETFPP, Filtertype.FILTERTYPE.PATTERN }, true);
public static Index FILTERTYPE_PRIMARY = Internal.createIndex("PRIMARY", Filtertype.FILTERTYPE, new OrderField[] { Filtertype.FILTERTYPE.ID }, true);
}
}
Loading