field4() {
+ return Filtertype.FILTERTYPE.PATTERN;
}
@Override
- public Integer component1() {
+ public ULong component1() {
return getId();
}
@Override
public ULong component2() {
- return getPartitionId();
+ return getExpectedelements();
}
@Override
- public byte[] component3() {
- return getFilter();
+ public Double component3() {
+ return getTargetfpp();
}
@Override
- public Integer value1() {
+ public String component4() {
+ return getPattern();
+ }
+
+ @Override
+ public ULong value1() {
return getId();
}
@Override
public ULong value2() {
- return getPartitionId();
+ return getExpectedelements();
+ }
+
+ @Override
+ public Double value3() {
+ return getTargetfpp();
}
@Override
- public byte[] value3() {
- return getFilter();
+ public String value4() {
+ return getPattern();
}
@Override
- public FilterExpected_100000Fpp_001Record value1(Integer value) {
+ public FiltertypeRecord value1(ULong value) {
setId(value);
return this;
}
@Override
- public FilterExpected_100000Fpp_001Record value2(ULong value) {
- setPartitionId(value);
+ public FiltertypeRecord value2(ULong value) {
+ setExpectedelements(value);
+ return this;
+ }
+
+ @Override
+ public FiltertypeRecord value3(Double value) {
+ setTargetfpp(value);
return this;
}
@Override
- public FilterExpected_100000Fpp_001Record value3(byte... value) {
- setFilter(value);
+ public FiltertypeRecord value4(String value) {
+ setPattern(value);
return this;
}
@Override
- public FilterExpected_100000Fpp_001Record values(Integer value1, ULong value2, byte[] value3) {
+ public FiltertypeRecord values(ULong value1, ULong value2, Double value3, String value4) {
value1(value1);
value2(value2);
value3(value3);
+ value4(value4);
return this;
}
@@ -217,20 +253,21 @@ public FilterExpected_100000Fpp_001Record values(Integer value1, ULong value2, b
// -------------------------------------------------------------------------
/**
- * Create a detached FilterExpected_100000Fpp_001Record
+ * Create a detached FiltertypeRecord
*/
- public FilterExpected_100000Fpp_001Record() {
- super(FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001);
+ public FiltertypeRecord() {
+ super(Filtertype.FILTERTYPE);
}
/**
- * Create a detached, initialised FilterExpected_100000Fpp_001Record
+ * Create a detached, initialised FiltertypeRecord
*/
- public FilterExpected_100000Fpp_001Record(Integer id, ULong partitionId, byte[] filter) {
- super(FilterExpected_100000Fpp_001.FILTER_EXPECTED_100000_FPP_001);
+ public FiltertypeRecord(ULong id, ULong expectedelements, Double targetfpp, String pattern) {
+ super(Filtertype.FILTERTYPE);
set(0, id);
- set(1, partitionId);
- set(2, filter);
+ set(1, expectedelements);
+ set(2, targetfpp);
+ set(3, pattern);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/CategoryTable.java b/src/main/java/com/teragrep/pth_06/planner/CategoryTable.java
new file mode 100644
index 00000000..f7b9618d
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/CategoryTable.java
@@ -0,0 +1,57 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
+
+public interface CategoryTable {
+
+ void create();
+
+ void insertFilters();
+
+ QueryCondition bloommatchCondition();
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/CategoryTableImpl.java b/src/main/java/com/teragrep/pth_06/planner/CategoryTableImpl.java
new file mode 100644
index 00000000..3c0a1c7a
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/CategoryTableImpl.java
@@ -0,0 +1,178 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import com.teragrep.pth_06.config.ConditionConfig;
+import com.teragrep.pth_06.planner.walker.conditions.CategoryTableCondition;
+import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
+import org.jooq.*;
+import org.jooq.impl.DSL;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED;
+
+/**
+ * Class to create a temp table of current search term and origin filter size filter types.
+ *
+ * Origin schema:
+ *
+ * - id PK
+ * - partition_id FK journaldb.logfile.id
+ * - filter_type_id FK bloomdb.filtertype.id
+ * - filter - bloomfilter byte array
+ *
+ *
+ * This schema:
+ *
+ * - id PK
+ * - termId - count of the current search term
+ * - typeId FK bloomdb.filtertype.id
+ * - filter - bloomfilter bytes with only the search term token set inserted
+ *
+ * Parent table create table example:
+ *
+ *
+ * CREATE TABLE `example` ( `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, `partition_id` bigint(20) unsigned NOT NULL, `filter_type_id` bigint(20) unsigned NOT NULL, `filter` longblob NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY `partition_id` (`partition_id`), KEY `example_ibfk_1` (`filter_type_id`), CONSTRAINT `example_ibfk_1` FOREIGN KEY (`filter_type_id`) REFERENCES `filtertype` (`id`) ON DELETE CASCADE, CONSTRAINT `example_ibfk_2` FOREIGN KEY (`partition_id`) REFERENCES `journaldb`.`logfile` (`id`) ON DELETE CASCADE ) ENGINE=InnoDB AUTO_INCREMENT=54787 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
+ *
+ *
+ * @see com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb
+ */
+public final class CategoryTableImpl implements CategoryTable {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(CategoryTableImpl.class);
+
+ private final DSLContext ctx;
+ private final Table> originTable;
+ private final long bloomTermId;
+ private final CategoryTableCondition tableCondition;
+ private final TableFilters tableFilters;
+
+ public CategoryTableImpl(ConditionConfig config, Table> originTable, String value) {
+ this(
+ config.context(),
+ originTable,
+ config.bloomTermId(),
+ new CategoryTableCondition(originTable, config.bloomTermId()),
+ new TableFilters(config.context(), originTable, config.bloomTermId(), value)
+ );
+ }
+
+ public CategoryTableImpl(DSLContext ctx, Table> originTable, long bloomTermId, String value) {
+ this(
+ ctx,
+ originTable,
+ bloomTermId,
+ new CategoryTableCondition(originTable, bloomTermId),
+ new TableFilters(ctx, originTable, bloomTermId, value)
+ );
+ }
+
+ public CategoryTableImpl(
+ DSLContext ctx,
+ Table> originTable,
+ long bloomTermId,
+ CategoryTableCondition tableCondition,
+ TableFilters tableFilters
+ ) {
+ this.ctx = ctx;
+ this.originTable = originTable;
+ this.bloomTermId = bloomTermId;
+ this.tableCondition = tableCondition;
+ this.tableFilters = tableFilters;
+ }
+
+ public void create() {
+ final Table namedTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + originTable.getName())));
+ if (LOGGER.isInfoEnabled()) {
+ LOGGER.info("Creating temporary table <{}>", namedTable.getName());
+ }
+ ctx.dropTemporaryTableIfExists(namedTable).execute();
+ final String sql = "create temporary table " + namedTable.getName()
+ + "(id bigint auto_increment primary key, term_id bigint, type_id bigint, filter longblob, unique key "
+ + namedTable.getName() + "_unique_key (term_id, type_id))";
+ final Query createQuery = ctx.query(sql);
+ createQuery.execute();
+ final Index typeIndex = DSL.index(DSL.name(namedTable.getName() + "_ix_type_id"));
+ final CreateIndexIncludeStep indexStep = ctx
+ .createIndex(typeIndex)
+ .on(namedTable, DSL.field("type_id", BIGINTUNSIGNED.nullable(false)));
+ LOGGER.trace("BloomFilterTempTable create index <{}>", indexStep);
+ indexStep.execute();
+ }
+
+ public void insertFilters() {
+ tableFilters.insertFiltersIntoCategoryTable();
+ }
+
+ /**
+ * Row condition that selects the same sized filter arrays from this category table and the origin table.
+ *
+ * @return condition
+ */
+ public QueryCondition bloommatchCondition() {
+ return tableCondition;
+ }
+
+ /**
+ * Equal only if all object parameters are same value and the instances of DSLContext are same
+ *
+ * @param object object compared against
+ * @return true if all object is same class, object fields are equal and DSLContext is same instance
+ */
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null)
+ return false;
+ if (object.getClass() != this.getClass())
+ return false;
+ final CategoryTableImpl cast = (CategoryTableImpl) object;
+ return this.originTable.equals(cast.originTable) && this.ctx == cast.ctx && // equal only if same instance of DSLContext
+ this.bloomTermId == cast.bloomTermId && this.tableFilters.equals(cast.tableFilters);
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/CreatedCategoryTable.java b/src/main/java/com/teragrep/pth_06/planner/CreatedCategoryTable.java
new file mode 100644
index 00000000..86a67441
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/CreatedCategoryTable.java
@@ -0,0 +1,76 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
+
+/**
+ * Decorator that inserts category tables filter types into database
+ */
+public final class CreatedCategoryTable implements CategoryTable {
+
+ private final CategoryTable origin;
+
+ public CreatedCategoryTable(final CategoryTable origin) {
+ this.origin = origin;
+ }
+
+ @Override
+ public void create() {
+ origin.create();
+ }
+
+ @Override
+ public void insertFilters() {
+ origin.insertFilters();
+ }
+
+ @Override
+ public QueryCondition bloommatchCondition() {
+ create();
+ return origin.bloommatchCondition();
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/PatternMatchTables.java b/src/main/java/com/teragrep/pth_06/planner/PatternMatchTables.java
new file mode 100644
index 00000000..94a7fe7f
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/PatternMatchTables.java
@@ -0,0 +1,121 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import com.teragrep.pth_06.planner.walker.conditions.PatternMatchCondition;
+import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
+import org.jooq.DSLContext;
+import org.jooq.Field;
+import org.jooq.Table;
+import org.jooq.types.ULong;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
+
+/**
+ * Class to get a collection of Tables that match the given PatternMatchCondition
+ */
+public final class PatternMatchTables {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(PatternMatchTables.class);
+
+ private final DSLContext ctx;
+ private final QueryCondition patternMatchCondition;
+
+ public PatternMatchTables(DSLContext ctx, String pattern) {
+ this(ctx, new PatternMatchCondition(pattern));
+ }
+
+ public PatternMatchTables(DSLContext ctx, PatternMatchCondition patternMatchCondition) {
+ this.ctx = ctx;
+ this.patternMatchCondition = patternMatchCondition;
+ }
+
+ /**
+ * List of tables from bloomdb that match patternMatchCondition Note: Table records are not fetched fully
+ *
+ * @return List of tables that matched condition and were not empty
+ */
+ public List> toList() {
+ final List> tables = ctx
+ .meta()
+ .filterSchemas(s -> s.equals(BLOOMDB)) // select bloomdb
+ .filterTables(t -> !t.equals(BLOOMDB.FILTERTYPE)) // remove filtertype table
+ .filterTables(t -> ctx.select((Field) t.field("id"))// for each remaining table
+ .from(t)
+ .leftJoin(BLOOMDB.FILTERTYPE)// join filtertype to access patterns
+ .on(BLOOMDB.FILTERTYPE.ID.eq((Field) t.field("filter_type_id")))
+ .where(patternMatchCondition.condition())// select tables that match pattern condition
+ .limit(1)// limit 1 since we are checking only if table is not empty
+ .fetch()
+ .isNotEmpty() // select table if not empty
+ )
+ .getTables();
+ LOGGER.debug("Table(s) with a pattern match <{}>", tables);
+ return tables;
+ }
+
+ /**
+ * Equal only if all values are equal and same instance of DSLContext
+ *
+ * @param object object compared against
+ * @return true if all object is same class, object fields are equal and DSLContext is same instance
+ */
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null)
+ return false;
+ if (object.getClass() != this.getClass())
+ return false;
+ final PatternMatchTables cast = (PatternMatchTables) object;
+ return this.patternMatchCondition.equals(cast.patternMatchCondition) && this.ctx == cast.ctx; // only same instance of DSLContext is equal
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/SearchTermFiltersInserted.java b/src/main/java/com/teragrep/pth_06/planner/SearchTermFiltersInserted.java
new file mode 100644
index 00000000..a214b22c
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/SearchTermFiltersInserted.java
@@ -0,0 +1,76 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
+
+/**
+ * Decorator that inserts category tables filter types into database
+ */
+public final class SearchTermFiltersInserted implements CategoryTable {
+
+ private final CategoryTable origin;
+
+ public SearchTermFiltersInserted(final CategoryTable origin) {
+ this.origin = origin;
+ }
+
+ @Override
+ public void create() {
+ origin.create();
+ }
+
+ @Override
+ public void insertFilters() {
+ origin.insertFilters();
+ }
+
+ @Override
+ public QueryCondition bloommatchCondition() {
+ insertFilters();
+ return origin.bloommatchCondition();
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
index c3b880d0..0cc044cb 100644
--- a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
+++ b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
@@ -45,16 +45,12 @@
*/
package com.teragrep.pth_06.planner;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.UncheckedIOException;
import java.sql.*;
import java.time.Instant;
+import java.util.Set;
import com.teragrep.pth_06.config.Config;
-import com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb;
import com.teragrep.pth_06.planner.walker.ConditionWalker;
-import org.apache.spark.util.sketch.BloomFilter;
import org.jooq.*;
import org.jooq.conf.MappedSchema;
import org.jooq.conf.RenderMapping;
@@ -70,7 +66,6 @@
import static com.teragrep.pth_06.jooq.generated.journaldb.Journaldb.JOURNALDB;
import static org.jooq.impl.DSL.select;
-import static org.jooq.impl.SQLDataType.BIGINT;
// https://stackoverflow.com/questions/33657391/qualifying-a-temporary-table-column-name-in-jooq
// https://www.jooq.org/doc/latest/manual/sql-building/dynamic-sql/
@@ -96,6 +91,7 @@ public class StreamDBClient {
private long includeBeforeEpoch;
private final boolean bloomEnabled;
private final Condition journaldbCondition;
+ private final ConditionWalker walker;
public StreamDBClient(Config config) throws SQLException {
@@ -110,6 +106,7 @@ public StreamDBClient(Config config) throws SQLException {
final String streamdbName = config.archiveConfig.dbStreamDbName;
final String bloomdbName = config.archiveConfig.bloomDbName;
final boolean hideDatabaseExceptions = config.archiveConfig.hideDatabaseExceptions;
+ final boolean withoutFilters = config.archiveConfig.withoutFilters;
// https://blog.jooq.org/how-i-incorrectly-fetched-jdbc-resultsets-again/
Settings settings = new Settings()
@@ -128,10 +125,7 @@ public StreamDBClient(Config config) throws SQLException {
ctx.execute("SET sql_mode = 'NO_ENGINE_SUBSTITUTION';");
}
// -- TODO use dslContext.batch for all initial operations
- BloomFiltersTempTable.create(ctx);
-
- final ConditionWalker walker = new ConditionWalker(ctx, bloomEnabled);
-
+ this.walker = new ConditionWalker(ctx, bloomEnabled, withoutFilters);
Condition streamdbCondition;
try {
@@ -154,7 +148,7 @@ void setIncludeBeforeEpoch(long includeBeforeEpoch) {
this.includeBeforeEpoch = includeBeforeEpoch;
}
- int pullToSliceTable(Date day) {
+ public int pullToSliceTable(Date day) {
NestedTopNQuery nestedTopNQuery = new NestedTopNQuery();
SelectOnConditionStep> select = ctx
.select(
@@ -263,76 +257,6 @@ private static void create(DSLContext ctx) {
}
- // TODO WIP
- public static class BloomFiltersTempTable {
-
- public static final String bloomTable = "bloomFiltersTable";
- public static final Table BLOOM_TABLE = DSL.table(DSL.name(bloomTable));
-
- public static final Field id = DSL.field("id", BIGINT.identity(true));
- public static final Field fe100kfp001 = DSL.field(DSL.name(bloomTable, "fe100kfp001"), byte[].class);
- public static final Field fe1000kfpp003 = DSL
- .field(DSL.name(bloomTable, "fe1000kfpp003"), byte[].class);
- public static final Field fe2500kfpp005 = DSL
- .field(DSL.name(bloomTable, "fe2500kfpp005"), byte[].class);
-
- private static void create(DSLContext ctx) {
-
- DropTableStep dropQuery = ctx.dropTemporaryTableIfExists(BloomFiltersTempTable.BLOOM_TABLE);
- dropQuery.execute();
-
- Query query = ctx
- .query(
- "create temporary table bloomFiltersTable(id bigint auto_increment primary key, fe100kfp001 longblob, fe1000kfpp003 longblob, fe2500kfpp005 longblob)"
- );
-
- /*CreateTableConstraintStep query = ctx.createTemporaryTable(BloomFiltersTempTable.BLOOM_TABLE)
- .columns(
- id,
- fe100kfp001,
- fe1000kfpp003,
- fe2500kfpp005)
- .constraints(primaryKey(id));
-
- */
- query.execute();
-
- }
-
- public static long insert(
- DSLContext ctx,
- BloomFilter smallFilter,
- BloomFilter mediumFilter,
- BloomFilter largeFilter
- ) {
-
- final ByteArrayOutputStream smallBaos = new ByteArrayOutputStream();
- final ByteArrayOutputStream mediumBaos = new ByteArrayOutputStream();
- final ByteArrayOutputStream largeBaos = new ByteArrayOutputStream();
-
- try {
- smallFilter.writeTo(smallBaos);
- mediumFilter.writeTo(mediumBaos);
- largeFilter.writeTo(largeBaos);
-
- smallBaos.close();
- mediumBaos.close();
- largeBaos.close();
- }
- catch (IOException e) {
- throw new UncheckedIOException(e);
- }
-
- ctx
- .insertInto(BLOOM_TABLE)
- .columns(fe100kfp001, fe1000kfpp003, fe2500kfpp005)
- .values(DSL.val(smallBaos.toByteArray(), byte[].class), DSL.val(mediumBaos.toByteArray(), byte[].class), DSL.val(largeBaos.toByteArray(), byte[].class)).execute();
-
- long rv = ctx.lastID().longValue();
- return rv;
- }
- }
-
public static class GetArchivedObjectsFilterTable {
// temporary table created from streamdb
@@ -420,15 +344,17 @@ private Table getTableStatement(Condition journaldbCondition, Date day)
.on(JOURNALDB.LOGFILE.HOST_ID.eq(GetArchivedObjectsFilterTable.host_id).and(JOURNALDB.LOGFILE.LOGTAG.eq(GetArchivedObjectsFilterTable.tag)));
if (bloomEnabled) {
-
- selectOnConditionStep = selectOnConditionStep
- .leftJoin(Bloomdb.BLOOMDB.FILTER_EXPECTED_100000_FPP_001)
- .on(JOURNALDB.LOGFILE.ID.eq(Bloomdb.BLOOMDB.FILTER_EXPECTED_100000_FPP_001.PARTITION_ID))
- .leftJoin(Bloomdb.BLOOMDB.FILTER_EXPECTED_1000000_FPP_003)
- .on(JOURNALDB.LOGFILE.ID.eq(Bloomdb.BLOOMDB.FILTER_EXPECTED_1000000_FPP_003.PARTITION_ID))
- .leftJoin(Bloomdb.BLOOMDB.FILTER_EXPECTED_2500000_FPP_005)
- .on(JOURNALDB.LOGFILE.ID.eq(Bloomdb.BLOOMDB.FILTER_EXPECTED_2500000_FPP_005.PARTITION_ID));
-
+ Set> tables = walker.patternMatchTables();
+ if (!tables.isEmpty()) {
+ for (Table> table : tables) {
+ if (LOGGER.isInfoEnabled()) {
+ LOGGER.info("Left join pattern match table: <{}>", table.getName());
+ }
+ selectOnConditionStep = selectOnConditionStep
+ .leftJoin(table)
+ .on(JOURNALDB.LOGFILE.ID.eq((Field) table.field("partition_id")));
+ }
+ }
}
return selectOnConditionStep
diff --git a/src/main/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadata.java
new file mode 100644
index 00000000..d5962952
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadata.java
@@ -0,0 +1,108 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import org.jooq.*;
+import org.jooq.impl.DSL;
+import org.jooq.types.ULong;
+
+import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
+
+/**
+ * Filter types of a table from metadata
+ */
+public final class TableFilterTypesFromMetadata implements TableRecords {
+
+ private final DSLContext ctx;
+ private final Table> table;
+ private final long bloomTermId;
+
+ public TableFilterTypesFromMetadata(DSLContext ctx, Table> table, long bloomTermId) {
+ this.ctx = ctx;
+ this.table = table;
+ this.bloomTermId = bloomTermId;
+ }
+
+ public Result toResult() {
+ if (table == null) {
+ throw new IllegalStateException("Origin table was null");
+ }
+ final Table> joined = table
+ .join(BLOOMDB.FILTERTYPE)
+ .on(BLOOMDB.FILTERTYPE.ID.eq((Field) table.field("filter_type_id")));
+ final Table namedTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + table.getName())));
+ final Field expectedField = DSL.field(DSL.name(namedTable.getName(), "expectedElements"), ULong.class);
+ final Field fppField = DSL.field(DSL.name(namedTable.getName(), "targetFpp"), Double.class);
+ final SelectField>[] resultFields = {
+ BLOOMDB.FILTERTYPE.ID,
+ joined.field("expectedElements").as(expectedField),
+ joined.field("targetFpp").as(fppField),
+ joined.field("pattern")
+ };
+ // Fetch filtertype values from metadata
+ final Result records = ctx
+ .select(resultFields)
+ .from(joined)
+ .groupBy(joined.field("filter_type_id"))
+ .fetch();
+ if (records.isEmpty()) {
+ throw new RuntimeException("Origin table was empty");
+ }
+ return records;
+ }
+
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null)
+ return false;
+ if (object.getClass() != this.getClass())
+ return false;
+ final TableFilterTypesFromMetadata cast = (TableFilterTypesFromMetadata) object;
+ return this.bloomTermId == cast.bloomTermId && this.table.equals(cast.table) && this.ctx == cast.ctx;
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/TableFilters.java
new file mode 100644
index 00000000..d5ff1086
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/TableFilters.java
@@ -0,0 +1,168 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import com.teragrep.blf_01.Token;
+import org.apache.spark.util.sketch.BloomFilter;
+import org.jooq.*;
+import org.jooq.impl.DSL;
+import org.jooq.types.ULong;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.regex.Pattern;
+
+import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
+import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED;
+
+/**
+ * Filter types of a table that can be inserted into the tables category table
+ */
+public final class TableFilters {
+
+ private final DSLContext ctx;
+ private final Table> table;
+ private final long bloomTermId;
+ private final TokenizedValue value;
+ private final TableRecords recordsInMetadata;
+
+ public TableFilters(DSLContext ctx, Table> table, long bloomTermId, String input) {
+ this(
+ ctx,
+ table,
+ bloomTermId,
+ new TokenizedValue(input),
+ new TableFilterTypesFromMetadata(ctx, table, bloomTermId)
+ );
+ }
+
+ public TableFilters(
+ DSLContext ctx,
+ Table> table,
+ long bloomTermId,
+ TokenizedValue value,
+ TableFilterTypesFromMetadata recordsInMetadata
+ ) {
+ this.ctx = ctx;
+ this.table = table;
+ this.bloomTermId = bloomTermId;
+ this.value = value;
+ this.recordsInMetadata = recordsInMetadata;
+ }
+
+ /**
+ * Extracts filter type from record, creates a bloom filter and returns the filters byte array
+ *
+ * @param record record with filter info
+ * @return byte[] of the created filter
+ */
+ private byte[] filterBytesFromRecord(final Record record) {
+ final ULong expected = record.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class));
+ final Double fpp = record.getValue(DSL.field(DSL.name(table.getName(), "targetFpp"), Double.class));
+ final String pattern = record.getValue(BLOOMDB.FILTERTYPE.PATTERN, String.class);
+ final BloomFilter filter = BloomFilter.create(expected.longValue(), fpp);
+ final Pattern compiled = Pattern.compile(pattern);
+ boolean isEmpty = true;
+ for (final Token token : value.tokens()) {
+ final String tokenString = token.toString();
+ if (compiled.matcher(tokenString).matches()) {
+ isEmpty = false;
+ filter.put(tokenString);
+ }
+ }
+ if (isEmpty) {
+ throw new IllegalStateException("Trying to insert empty filter");
+ }
+ final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream();
+ try {
+ filter.writeTo(filterBAOS);
+ filterBAOS.close();
+ }
+ catch (IOException e) {
+ throw new UncheckedIOException(new IOException("Error writing filter bytes: " + e.getMessage()));
+ }
+ return filterBAOS.toByteArray();
+ }
+
+ private void insertFilterRecordToCategoryTable(final Record record) {
+ final Table categoryTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + this.table.getName())));
+ final Field>[] insertFields = {
+ DSL.field("term_id", BIGINTUNSIGNED.nullable(false)),
+ DSL.field("type_id", BIGINTUNSIGNED.nullable(false)),
+ DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class)
+ };
+ final Field>[] valueFields = {
+ DSL.val(bloomTermId, ULong.class),
+ DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class),
+ DSL.val(filterBytesFromRecord(record), byte[].class)
+ };
+ ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute();
+ }
+
+ public void insertFiltersIntoCategoryTable() {
+ recordsInMetadata.toResult().forEach(this::insertFilterRecordToCategoryTable);
+ }
+
+ /**
+ * Expects DSLContext values to be the same instance
+ *
+ * @param object object compared
+ * @returs true if object is equal
+ */
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null)
+ return false;
+ if (object.getClass() != this.getClass())
+ return false;
+ final TableFilters cast = (TableFilters) object;
+ return this.ctx == cast.ctx && this.value.equals(cast.value) && this.table.equals(cast.table)
+ && this.bloomTermId == cast.bloomTermId;
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/TableRecords.java b/src/main/java/com/teragrep/pth_06/planner/TableRecords.java
new file mode 100644
index 00000000..95b9fb92
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/TableRecords.java
@@ -0,0 +1,54 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import org.jooq.Record;
+import org.jooq.Result;
+
+public interface TableRecords {
+
+ Result toResult();
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java b/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java
new file mode 100644
index 00000000..3d5bc6c1
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java
@@ -0,0 +1,81 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import com.teragrep.blf_01.Token;
+import com.teragrep.blf_01.Tokenizer;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Set;
+
+public final class TokenizedValue {
+
+ public final String value;
+
+ public TokenizedValue(String value) {
+ this.value = value;
+ }
+
+ public Set tokens() {
+ return new HashSet<>(
+ new Tokenizer(32).tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8)))
+ );
+ }
+
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null)
+ return false;
+ if (object.getClass() != this.getClass())
+ return false;
+ final TokenizedValue cast = (TokenizedValue) object;
+ return this.value.equals(cast.value);
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java b/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java
index 2635ac79..24ceffd1 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java
@@ -47,41 +47,53 @@
import com.teragrep.pth_06.config.ConditionConfig;
import com.teragrep.pth_06.planner.walker.conditions.ElementCondition;
+import com.teragrep.pth_06.planner.walker.conditions.ValidElement;
import org.jooq.Condition;
import org.jooq.DSLContext;
+import org.jooq.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Element;
+import java.util.HashSet;
+import java.util.Set;
+
/**
* Condition Walker
Walker for conditions.
*
- * @since 23/09/2021
* @author Kimmo Leppinen
* @author Mikko Kortelainen
* @author Ville Manninen
+ * @since 23/09/2021
*/
-public class ConditionWalker extends XmlWalker {
+public final class ConditionWalker extends XmlWalker {
private final boolean bloomEnabled;
+ private final boolean withoutFilters;
private final Logger LOGGER = LoggerFactory.getLogger(ConditionWalker.class);
// Default query is full
private boolean streamQuery = false;
private final DSLContext ctx;
+ private final Set> combinedMatchSet;
+ private long bloomTermId = 0;
/**
* Constructor without connection. Used during unit-tests. Enables jooq-query construction.
*/
public ConditionWalker() {
- super();
- this.ctx = null;
- this.bloomEnabled = false;
+ this(null, false, false);
}
public ConditionWalker(DSLContext ctx, boolean bloomEnabled) {
+ this(ctx, bloomEnabled, false);
+ }
+
+ public ConditionWalker(DSLContext ctx, boolean bloomEnabled, boolean withoutFilters) {
super();
this.ctx = ctx;
this.bloomEnabled = bloomEnabled;
+ this.withoutFilters = withoutFilters;
+ this.combinedMatchSet = new HashSet<>();
}
public Condition fromString(String inXml, boolean streamQuery) throws Exception {
@@ -89,6 +101,15 @@ public Condition fromString(String inXml, boolean streamQuery) throws Exception
return fromString(inXml);
}
+ /**
+ * Set of all the tables that pattern matched with tokenized value search elements the walkers has traversed
+ *
+ * @return Set of Tables that had a pattern match
+ */
+ public Set> patternMatchTables() {
+ return combinedMatchSet;
+ }
+
@Override
public Condition emitLogicalOperation(String op, Object l, Object r) throws Exception {
Condition rv;
@@ -138,11 +159,17 @@ public Condition emitUnaryOperation(String op, Element current) throws Exception
return rv;
}
- Condition emitElem(Element current) {
- ElementCondition elementCondition = new ElementCondition(
- current,
- new ConditionConfig(ctx, streamQuery, bloomEnabled, false)
+ Condition emitElem(final Element current) {
+ final ElementCondition elementCondition = new ElementCondition(
+ new ValidElement(current),
+ new ConditionConfig(ctx, streamQuery, bloomEnabled, withoutFilters, bloomTermId)
);
+ if (elementCondition.isBloomSearchCondition()) {
+ final Set> elementPatternMatchTables = elementCondition.patternMatchTables();
+ // add tables condition found to walker pattern match tables
+ patternMatchTables().addAll(elementPatternMatchTables);
+ bloomTermId++;
+ }
return elementCondition.condition();
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java
new file mode 100644
index 00000000..198260c3
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java
@@ -0,0 +1,57 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.walker.conditions;
+
+import org.jooq.Table;
+
+import java.util.Set;
+
+public interface BloomQueryCondition {
+
+ boolean isBloomSearchCondition();
+
+ Set> patternMatchTables();
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java
new file mode 100644
index 00000000..6d328964
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java
@@ -0,0 +1,98 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.walker.conditions;
+
+import org.jooq.*;
+import org.jooq.impl.DSL;
+import org.jooq.types.ULong;
+
+import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED;
+
+/**
+ * Row condition that compares the compareTo tables bloom filter bytes against category table
+ */
+public final class CategoryTableCondition implements QueryCondition {
+
+ private final Table> comparedTo;
+ private final long bloomTermId;
+
+ public CategoryTableCondition(Table> comparedTo, long bloomTermId) {
+ this.comparedTo = comparedTo;
+ this.bloomTermId = bloomTermId;
+ }
+
+ public Condition condition() {
+ final Table categoryTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + comparedTo.getName())));
+ final Field termIdField = DSL.field("term_id", BIGINTUNSIGNED.nullable(false));
+ final Field typeIdField = DSL.field("type_id", BIGINTUNSIGNED.nullable(false));
+ final Field filterField = DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class);
+ // select filter with correct bloom term id and filter type id from category table
+ final SelectConditionStep> selectFilterStep = DSL
+ .select(filterField)
+ .from(categoryTable)
+ .where(termIdField.eq(ULong.valueOf(bloomTermId)))
+ .and(typeIdField.eq((Field) comparedTo.field("filter_type_id")));
+ // compares category table filter byte[] against bloom filter byte[]
+ final Condition filterFieldCondition = DSL
+ .function("bloommatch", Boolean.class, selectFilterStep.asField(), comparedTo.field("filter"))
+ .eq(true);
+ // null check allows SQL to optimize query
+ final Condition notNullCondition = comparedTo.field("filter").isNotNull();
+ return filterFieldCondition.and(notNullCondition);
+ }
+
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null)
+ return false;
+ if (object.getClass() != this.getClass())
+ return false;
+ final CategoryTableCondition cast = (CategoryTableCondition) object;
+ return this.bloomTermId == cast.bloomTermId && this.comparedTo.equals(cast.comparedTo);
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java
index be7d0033..51adb310 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java
@@ -45,50 +45,39 @@
*/
package com.teragrep.pth_06.planner.walker.conditions;
-import com.teragrep.blf_01.Tokenizer;
import com.teragrep.pth_06.config.ConditionConfig;
import org.jooq.Condition;
+import org.jooq.Table;
import org.jooq.impl.DSL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Element;
+import java.util.Set;
+
/**
* Creates a query condition from provided dom element
*/
-public final class ElementCondition {
+public final class ElementCondition implements QueryCondition, BloomQueryCondition {
private static final Logger LOGGER = LoggerFactory.getLogger(ElementCondition.class);
- private final Element element;
+ private final ValidElement element;
private final ConditionConfig config;
public ElementCondition(Element element, ConditionConfig config) {
- this.element = element;
- this.config = config;
+ this(new ValidElement(element), config);
}
- private void validate(Element element) {
- if (element.getTagName() == null) {
- throw new IllegalStateException("Tag name for Element was null");
- }
- if (!element.hasAttribute("operation")) {
- throw new IllegalStateException(
- "Could not find specified or default value for 'operation' attribute from Element"
- );
- }
- if (!element.hasAttribute("value")) {
- throw new IllegalStateException(
- "Could not find specified or default value for 'value' attribute from Element"
- );
- }
+ public ElementCondition(ValidElement element, ConditionConfig config) {
+ this.element = element;
+ this.config = config;
}
public Condition condition() {
- validate(element);
- final String tag = element.getTagName();
- final String value = element.getAttribute("value");
- final String operation = element.getAttribute("operation");
+ final String tag = element.tag();
+ final String value = element.value();
+ final String operation = element.operation();
Condition condition = DSL.noCondition();
switch (tag.toLowerCase()) {
case "index":
@@ -116,21 +105,30 @@ public Condition condition() {
}
// value search
if ("indexstatement".equalsIgnoreCase(tag) && "EQUALS".equals(operation) && config.bloomEnabled()) {
- IndexStatementCondition indexStatementCondition = new IndexStatementCondition(
- value,
- config,
- new Tokenizer(32)
- );
- condition = indexStatementCondition.condition();
+ QueryCondition indexStatement = new IndexStatementCondition(value, config, condition);
+ condition = indexStatement.condition();
}
}
- if (condition.equals(DSL.noCondition())) {
+ // bloom search can return condition unmodified
+ if (condition.equals(DSL.noCondition()) && !isBloomSearchCondition()) {
throw new IllegalStateException("Unsupported Element tag " + tag);
}
LOGGER.debug("Query condition: <{}>", condition);
return condition;
}
+ public boolean isBloomSearchCondition() {
+ final String tag = element.tag();
+ final String operation = element.operation();
+ return "indexstatement".equalsIgnoreCase(tag) && "EQUALS".equals(operation) && !config.streamQuery()
+ && config.bloomEnabled();
+ }
+
+ public Set> patternMatchTables() {
+ final String value = element.value();
+ return new IndexStatementCondition(value, config).patternMatchTables();
+ }
+
@Override
public boolean equals(final Object object) {
if (this == object)
@@ -140,9 +138,6 @@ public boolean equals(final Object object) {
if (object.getClass() != this.getClass())
return false;
final ElementCondition cast = (ElementCondition) object;
- boolean equalName = this.element.getTagName().equals(cast.element.getTagName());
- boolean equalOperation = this.element.getAttribute("operation").equals(cast.element.getAttribute("operation"));
- boolean equalValue = this.element.getAttribute("value").equals(cast.element.getAttribute("value"));
- return equalName && equalOperation && equalValue && this.config.equals(cast.config);
+ return this.element.equals(cast.element) && this.config.equals(cast.config);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
index e48e4b49..cf1cfced 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
@@ -45,103 +45,86 @@
*/
package com.teragrep.pth_06.planner.walker.conditions;
-import com.teragrep.blf_01.Token;
-import com.teragrep.blf_01.Tokenizer;
import com.teragrep.pth_06.config.ConditionConfig;
-import com.teragrep.pth_06.planner.StreamDBClient;
-import org.apache.spark.util.sketch.BloomFilter;
+import com.teragrep.pth_06.planner.*;
import org.jooq.Condition;
-import org.jooq.Field;
+import org.jooq.Table;
import org.jooq.impl.DSL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.ByteArrayInputStream;
-import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
-import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
-
-public final class IndexStatementCondition implements QueryCondition {
+public final class IndexStatementCondition implements QueryCondition, BloomQueryCondition {
private final Logger LOGGER = LoggerFactory.getLogger(IndexStatementCondition.class);
private final String value;
private final ConditionConfig config;
- private final Tokenizer tokenizer;
+ private final Condition condition;
+ private final Set> tableSet;
+
+ public IndexStatementCondition(String value, ConditionConfig config) {
+ this(value, config, DSL.noCondition());
+ }
- public IndexStatementCondition(String value, ConditionConfig config, Tokenizer tokenizer) {
+ public IndexStatementCondition(String value, ConditionConfig config, Condition condition) {
this.value = value;
this.config = config;
- this.tokenizer = tokenizer;
+ this.condition = condition;
+ this.tableSet = new HashSet<>();
}
public Condition condition() {
- final Set tokenSet = new HashSet<>(
- tokenizer.tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8)))
- );
-
- LOGGER.info("BloomFilter tokenSet <[{}]>", tokenSet);
-
- final BloomFilter smallFilter = BloomFilter.create(100000, 0.01);
- final BloomFilter mediumFilter = BloomFilter.create(1000000, 0.03);
- final BloomFilter largeFilter = BloomFilter.create(2500000, 0.05);
-
- tokenSet.forEach(token -> {
- smallFilter.put(token.toString());
- mediumFilter.put(token.toString());
- largeFilter.put(token.toString());
- });
-
- final long rowId = StreamDBClient.BloomFiltersTempTable
- .insert(config.context(), smallFilter, mediumFilter, largeFilter);
-
- final Condition rowIdCondition = StreamDBClient.BloomFiltersTempTable.id.eq(rowId);
-
- final Field smallColumn = DSL
- .select(StreamDBClient.BloomFiltersTempTable.fe100kfp001)
- .from(StreamDBClient.BloomFiltersTempTable.BLOOM_TABLE)
- .where(rowIdCondition)
- .asField();
- final Field mediumColumn = DSL
- .select(StreamDBClient.BloomFiltersTempTable.fe1000kfpp003)
- .from(StreamDBClient.BloomFiltersTempTable.BLOOM_TABLE)
- .where(rowIdCondition)
- .asField();
- final Field largeColumn = DSL
- .select(StreamDBClient.BloomFiltersTempTable.fe2500kfpp005)
- .from(StreamDBClient.BloomFiltersTempTable.BLOOM_TABLE)
- .where(rowIdCondition)
- .asField();
-
- final Field fe100kfp001 = DSL
- .function("bloommatch", Boolean.class, smallColumn, BLOOMDB.FILTER_EXPECTED_100000_FPP_001.FILTER);
- final Field fe1000kfpp003 = DSL
- .function("bloommatch", Boolean.class, mediumColumn, BLOOMDB.FILTER_EXPECTED_1000000_FPP_003.FILTER);
- final Field fe2500kfpp005 = DSL
- .function("bloommatch", Boolean.class, largeColumn, BLOOMDB.FILTER_EXPECTED_2500000_FPP_005.FILTER);
-
- final Condition noBloomFilter = BLOOMDB.FILTER_EXPECTED_100000_FPP_001.FILTER
- .isNull()
- .and(
- BLOOMDB.FILTER_EXPECTED_1000000_FPP_003.FILTER
- .isNull()
- .and(BLOOMDB.FILTER_EXPECTED_2500000_FPP_005.FILTER.isNull())
+ if (!config.bloomEnabled()) {
+ LOGGER.debug("Indexstatement reached with bloom disabled");
+ return condition;
+ }
+ Condition newCondition = condition;
+ if (tableSet.isEmpty()) {
+ final PatternMatchTables patternMatchTables = new PatternMatchTables(config.context(), value);
+ tableSet.addAll(patternMatchTables.toList());
+ }
+ if (!tableSet.isEmpty()) {
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("Found pattern match on <{}> table(s)", tableSet.size());
+ }
+ Condition combinedTableCondition = DSL.noCondition();
+ Condition combinedNullFilterCondition = DSL.noCondition();
+
+ for (final Table> table : tableSet) {
+ final CategoryTable categoryTable = new CreatedCategoryTable(
+ new SearchTermFiltersInserted(new CategoryTableImpl(config, table, value))
);
- final Condition queryCondition = fe100kfp001
- .eq(true)
- .or(fe1000kfpp003.eq(true).or(fe2500kfpp005.eq(true).or(noBloomFilter)));
- LOGGER.trace("ConditionWalker.emitElement bloomCondition part <{}>", queryCondition);
+ final Condition nullFilterCondition = table.field("filter").isNull();
+ final QueryCondition tableCondition = categoryTable.bloommatchCondition();
+ combinedTableCondition = combinedTableCondition.or(tableCondition.condition());
+ combinedNullFilterCondition = combinedNullFilterCondition.and(nullFilterCondition);
+ }
+ if (config.withoutFilters()) {
+ newCondition = combinedNullFilterCondition;
+ }
+ else {
+ newCondition = combinedTableCondition.or(combinedNullFilterCondition);
+ }
+ }
+ return newCondition;
+ }
- return queryCondition;
+ @Override
+ public boolean isBloomSearchCondition() {
+ return config.bloomEnabled() && !config.streamQuery();
+ }
+
+ @Override
+ public Set> patternMatchTables() {
+ if (tableSet.isEmpty()) {
+ condition();
+ }
+ return tableSet;
}
- /**
- * @param object object compared against
- * @return true if object is same class and all object values are equal (tokenizer values are expected to point to
- * same reference)
- */
@Override
public boolean equals(final Object object) {
if (this == object)
@@ -151,6 +134,6 @@ public boolean equals(final Object object) {
if (object.getClass() != this.getClass())
return false;
final IndexStatementCondition cast = (IndexStatementCondition) object;
- return this.value.equals(cast.value) && this.config.equals(cast.config) && this.tokenizer == cast.tokenizer; // expects same reference
+ return this.value.equals(cast.value) && this.config.equals(cast.config);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java
new file mode 100644
index 00000000..f4ad5808
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java
@@ -0,0 +1,92 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.walker.conditions;
+
+import com.teragrep.blf_01.Token;
+import com.teragrep.pth_06.planner.TokenizedValue;
+import org.jooq.*;
+import org.jooq.impl.DSL;
+
+import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
+
+/**
+ * Combined regex match condition
+ *
+ * true if any of the tokens regex match against bloomdb.filtertype.pattern
+ */
+public final class PatternMatchCondition implements QueryCondition {
+
+ private final TokenizedValue value;
+
+ public PatternMatchCondition(String input) {
+ this(new TokenizedValue(input));
+ }
+
+ public PatternMatchCondition(TokenizedValue value) {
+ this.value = value;
+ }
+
+ public Condition condition() {
+ Condition patternCondition = DSL.noCondition();
+ for (Token token : value.tokens()) {
+ Field tokenStringField = DSL.val(token.toString());
+ patternCondition = patternCondition.or(tokenStringField.likeRegex(BLOOMDB.FILTERTYPE.PATTERN));
+ }
+ return patternCondition;
+ }
+
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null)
+ return false;
+ if (object.getClass() != this.getClass())
+ return false;
+ final PatternMatchCondition cast = (PatternMatchCondition) object;
+ return this.value.equals(cast.value);
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ValidElement.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ValidElement.java
new file mode 100644
index 00000000..0ea44822
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ValidElement.java
@@ -0,0 +1,103 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.walker.conditions;
+
+import org.w3c.dom.Element;
+
+public final class ValidElement {
+
+ private final Element element;
+
+ private void validate() {
+ if (element.getTagName() == null) {
+ throw new IllegalStateException("Tag name for Element was null");
+ }
+ if (!element.hasAttribute("operation")) {
+ throw new IllegalStateException(
+ "Could not find specified or default value for 'operation' attribute from Element"
+ );
+ }
+ if (!element.hasAttribute("value")) {
+ throw new IllegalStateException(
+ "Could not find specified or default value for 'value' attribute from Element"
+ );
+ }
+ }
+
+ public ValidElement(Element element) {
+ this.element = element;
+ }
+
+ public String tag() {
+ validate();
+ return element.getTagName();
+ }
+
+ public String value() {
+ validate();
+ return element.getAttribute("value");
+ }
+
+ public String operation() {
+ validate();
+ return element.getAttribute("operation");
+ }
+
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null)
+ return false;
+ if (object.getClass() != this.getClass())
+ return false;
+ final ValidElement cast = (ValidElement) object;
+ boolean equalName = this.element.getTagName().equals(cast.element.getTagName());
+ boolean equalOperation = this.element.getAttribute("operation").equals(cast.element.getAttribute("operation"));
+ boolean equalValue = this.element.getAttribute("value").equals(cast.element.getAttribute("value"));
+ return equalName && equalOperation && equalValue;
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/CategoryTableImplTest.java b/src/test/java/com/teragrep/pth_06/planner/CategoryTableImplTest.java
new file mode 100644
index 00000000..60469640
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/CategoryTableImplTest.java
@@ -0,0 +1,326 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import org.apache.spark.util.sketch.BloomFilter;
+import org.jooq.Condition;
+import org.jooq.DSLContext;
+import org.jooq.Table;
+import org.jooq.impl.DSL;
+import org.junit.jupiter.api.*;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+public class CategoryTableImplTest {
+
+ final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
+ final String userName = "sa";
+ final String password = "";
+ // matches IPv4
+ final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ // matches IPv4 starting with 255.
+ final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255));
+ final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+
+ @BeforeAll
+ public void setup() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS filtertype").execute();
+ String filtertype = "CREATE TABLE`filtertype`" + "("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `expectedElements` bigint(20) unsigned NOT NULL,"
+ + " `targetFpp` DOUBLE(2) unsigned NOT NULL,"
+ + " `pattern` VARCHAR(2048) NOT NULL,"
+ + " UNIQUE KEY (`expectedElements`, `targetFpp`, `pattern`)" + ")";
+ conn.prepareStatement(filtertype).execute();
+ String typeSQL = "INSERT INTO `filtertype` (`id`,`expectedElements`, `targetFpp`, `pattern`) VALUES (?,?,?,?)";
+ int id = 1;
+ for (String pattern : patternList) {
+ PreparedStatement filterType = conn.prepareStatement(typeSQL);
+ filterType.setInt(1, id);
+ filterType.setInt(2, 1000);
+ filterType.setDouble(3, 0.01);
+ filterType.setString(4, pattern);
+ filterType.executeUpdate();
+ id++;
+ }
+ });
+ }
+
+ @BeforeEach
+ void createTargetTable() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS target").execute();
+ String targetTable = "CREATE TABLE `target`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ conn.prepareStatement(targetTable).execute();
+ });
+ }
+
+ @AfterAll
+ void tearDown() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("DROP ALL OBJECTS").execute(); //h2 clear database
+ conn.close();
+ });
+ }
+
+ @Test
+ public void testNonCreatedEmptyTable() {
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+
+ Assertions.assertDoesNotThrow(new CategoryTableImpl(ctx, table, 0L, "test")::bloommatchCondition);
+ }
+
+ @Test
+ public void testCreatedWithEmptyTable() {
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+
+ CategoryTable tempTable = new CategoryTableImpl(ctx, table, 0L, "test");
+ tempTable.create();
+ RuntimeException ex = Assertions.assertThrows(RuntimeException.class, tempTable::insertFilters);
+ Assertions.assertEquals("Origin table was empty", ex.getMessage());
+ }
+
+ @Test
+ public void testCreation() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+
+ CategoryTable categoryTable = new CategoryTableImpl(ctx, table, 0L, "192.168.1.1");
+ Assertions.assertDoesNotThrow(categoryTable::create);
+ }
+
+ @Test
+ public void testFilterInsertion() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+
+ CategoryTable categoryTable = new CategoryTableImpl(ctx, table, 0L, "ip=192.168.1.1");
+ Assertions.assertDoesNotThrow(categoryTable::create);
+ Assertions.assertDoesNotThrow(categoryTable::insertFilters);
+ BloomFilter filter = Assertions.assertDoesNotThrow(() -> {
+ ResultSet rs = conn.prepareStatement("SELECT * FROM term_0_target").executeQuery();
+ rs.absolute(1);
+ byte[] bytes = rs.getBytes(4);
+ return BloomFilter.readFrom(new ByteArrayInputStream(bytes));
+ });
+ // check that category table filter only has pattern matching tokens
+ Assertions.assertTrue(filter.mightContain("192.168.1.1"));
+ Assertions.assertFalse(filter.mightContain("ip=192.168.1.1"));
+ Assertions.assertFalse(filter.mightContain("168.1.1"));
+ }
+
+ @Test
+ public void testConditionGeneration() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+
+ CategoryTableImpl tempTable = new CategoryTableImpl(ctx, table, 0L, "test");
+ Condition tableCond = tempTable.bloommatchCondition().condition();
+ String e = "(\n" + " bloommatch(\n" + " (\n" + " select \"term_0_target\".\"filter\"\n"
+ + " from \"term_0_target\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"target\".\"filter_type_id\"\n" + " )\n" + " ),\n"
+ + " \"bloomdb\".\"target\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"target\".\"filter\" is not null\n" + ")";
+ Assertions.assertEquals(e, tableCond.toString());
+ }
+
+ @Test
+ public void testBloomTerm() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ CategoryTableImpl tempTable = new CategoryTableImpl(ctx, table, 1L, "test");
+ Condition condition = tempTable.bloommatchCondition().condition();
+ Assertions.assertTrue(condition.toString().contains("term_1_"));
+ }
+
+ @Test
+ public void testEquality() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> target1 = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ Table> target2 = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ CategoryTableImpl table1 = new CategoryTableImpl(ctx, target1, 1L, "one");
+ CategoryTableImpl table2 = new CategoryTableImpl(ctx, target2, 1L, "one");
+ Assertions.assertEquals(table1, table2);
+ }
+
+ @Test
+ public void testDifferentTokenSetNotEquals() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ CategoryTableImpl table1 = new CategoryTableImpl(ctx, table, 1L, "one");
+ CategoryTableImpl table2 = new CategoryTableImpl(ctx, table, 1L, "two");
+ Assertions.assertNotEquals(table1, table2);
+ }
+
+ @Test
+ public void testDifferentBloomTermNotEquals() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ CategoryTableImpl table1 = new CategoryTableImpl(ctx, table, 0L, "one");
+ CategoryTableImpl table2 = new CategoryTableImpl(ctx, table, 1L, "one");
+ Assertions.assertNotEquals(table1, table2);
+ }
+
+ @Test
+ public void testDifferentDSLContextNotEquals() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ DSLContext ctx2 = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ CategoryTableImpl table1 = new CategoryTableImpl(ctx, table, 0L, "one");
+ CategoryTableImpl table2 = new CategoryTableImpl(ctx2, table, 0L, "one");
+ Assertions.assertNotEquals(table1, table2);
+ }
+
+ void fillTargetTable() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ String sql = "INSERT INTO `target` (`partition_id`, `filter_type_id`, `filter`) "
+ + "VALUES (?, (SELECT `id` FROM `filtertype` WHERE id=?), ?)";
+ PreparedStatement stmt = conn.prepareStatement(sql);
+ BloomFilter filter = BloomFilter.create(1000, 0.01);
+ final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream();
+ Assertions.assertDoesNotThrow(() -> {
+ filter.writeTo(filterBAOS);
+ filterBAOS.close();
+ });
+ stmt.setInt(1, 1);
+ stmt.setInt(2, 1);
+ stmt.setBytes(3, filterBAOS.toByteArray());
+ stmt.executeUpdate();
+ });
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/PatternMatchTablesTest.java b/src/test/java/com/teragrep/pth_06/planner/PatternMatchTablesTest.java
new file mode 100644
index 00000000..36aaa431
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/PatternMatchTablesTest.java
@@ -0,0 +1,219 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import org.apache.spark.util.sketch.BloomFilter;
+import org.jooq.DSLContext;
+import org.jooq.Named;
+import org.jooq.Table;
+import org.jooq.impl.DSL;
+import org.junit.jupiter.api.*;
+
+import java.io.ByteArrayOutputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+public class PatternMatchTablesTest {
+
+ final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
+ final String userName = "sa";
+ final String password = "";
+ // matches IPv4
+ final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ // matches IPv4 starting with 255.
+ final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255));
+ final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+
+ @BeforeAll
+ void setup() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS filtertype").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip255").execute();
+ String filtertype = "CREATE TABLE`filtertype`" + "("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `expectedElements` bigint(20) unsigned NOT NULL,"
+ + " `targetFpp` DOUBLE(2) unsigned NOT NULL,"
+ + " `pattern` VARCHAR(2048) NOT NULL,"
+ + " UNIQUE KEY (`expectedElements`, `targetFpp`, `pattern`)" + ")";
+ String ip = "CREATE TABLE `pattern_test_ip`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ String ip255 = "CREATE TABLE `pattern_test_ip255`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ conn.prepareStatement(filtertype).execute();
+ conn.prepareStatement(ip).execute();
+ conn.prepareStatement(ip255).execute();
+ String typeSQL = "INSERT INTO `filtertype` (`id`,`expectedElements`, `targetFpp`, `pattern`) VALUES (?,?,?,?)";
+ int id = 1;
+ for (String pattern : patternList) {
+ PreparedStatement filterType = conn.prepareStatement(typeSQL);
+ filterType.setInt(1, id);
+ filterType.setInt(2, 1000);
+ filterType.setDouble(3, 0.01);
+ filterType.setString(4, pattern);
+ filterType.executeUpdate();
+ id++;
+ }
+ writeFilter("pattern_test_ip", 1);
+ writeFilter("pattern_test_ip255", 2);
+ });
+ }
+
+ @AfterAll
+ void tearDown() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("DROP ALL OBJECTS").execute(); //h2 clear database
+ conn.close();
+ });
+ }
+
+ @Test
+ public void testSingleMatch() {
+ DSLContext ctx = DSL.using(conn);
+ String input = "192.168.1.1";
+ PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
+ List> result = patternMatchTables.toList();
+ Assertions.assertEquals(1, result.size());
+ Assertions.assertEquals("pattern_test_ip", result.get(0).getName());
+ }
+
+ @Test
+ public void testSearchTermTokenizedMatch() {
+ DSLContext ctx = DSL.using(conn);
+ String input = "target_ip=192.168.1.1";
+ PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
+ List> result = patternMatchTables.toList();
+ Assertions.assertEquals(1, result.size());
+ Assertions.assertEquals("pattern_test_ip", result.get(0).getName());
+ }
+
+ @Test
+ public void testMultipleMatch() {
+ DSLContext ctx = DSL.using(conn);
+ String input = "255.255.255.255";
+ PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
+ List> result = patternMatchTables.toList();
+ List> result2 = patternMatchTables.toList();
+ List tableNames = result.stream().map(Named::getName).collect(Collectors.toList());
+ Assertions.assertEquals(2, result.size());
+ Assertions.assertEquals(2, result2.size());
+ Assertions.assertTrue(tableNames.contains("pattern_test_ip"));
+ Assertions.assertTrue(tableNames.contains("pattern_test_ip255"));
+ }
+
+ @Test
+ public void testNoMatch() {
+ DSLContext ctx = DSL.using(conn);
+ String input = "testinput";
+ PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
+ List> result = patternMatchTables.toList();
+ Assertions.assertTrue(result.isEmpty());
+ }
+
+ @Test
+ public void equalsTest() {
+ DSLContext ctx = DSL.using(conn);
+ String input = "testinput";
+ PatternMatchTables eq1 = new PatternMatchTables(ctx, input);
+ PatternMatchTables eq2 = new PatternMatchTables(ctx, input);
+ Assertions.assertEquals(eq1, eq2);
+ Assertions.assertEquals(eq2, eq1);
+ }
+
+ @Test
+ public void differentInputNotEqualsTest() {
+ DSLContext ctx = DSL.using(conn);
+ PatternMatchTables eq1 = new PatternMatchTables(ctx, "testinput");
+ PatternMatchTables eq2 = new PatternMatchTables(ctx, "anotherinput");
+ Assertions.assertNotEquals(eq1, eq2);
+ Assertions.assertNotEquals(eq2, eq1);
+ }
+
+ @Test
+ public void differentDSLContextNotEqualsTest() {
+ DSLContext ctx1 = DSL.using(conn);
+ DSLContext ctx2 = DSL.using(conn);
+ PatternMatchTables eq1 = new PatternMatchTables(ctx1, "testinput");
+ PatternMatchTables eq2 = new PatternMatchTables(ctx2, "testinput");
+ Assertions.assertNotEquals(eq1, eq2);
+ Assertions.assertNotEquals(eq2, eq1);
+ }
+
+ private void writeFilter(String tableName, int filterId) {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ String sql = "INSERT INTO `" + tableName + "` (`partition_id`, `filter_type_id`, `filter`) "
+ + "VALUES (?, (SELECT `id` FROM `filtertype` WHERE id=?), ?)";
+ PreparedStatement stmt = conn.prepareStatement(sql);
+ BloomFilter filter = BloomFilter.create(1000, 0.01);
+ final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream();
+ Assertions.assertDoesNotThrow(() -> {
+ filter.writeTo(filterBAOS);
+ filterBAOS.close();
+ });
+ stmt.setInt(1, 1);
+ stmt.setInt(2, filterId);
+ stmt.setBytes(3, filterBAOS.toByteArray());
+ stmt.executeUpdate();
+ });
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadataResultTest.java b/src/test/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadataResultTest.java
new file mode 100644
index 00000000..50516717
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadataResultTest.java
@@ -0,0 +1,231 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import org.apache.spark.util.sketch.BloomFilter;
+import org.jooq.DSLContext;
+import org.jooq.Record;
+import org.jooq.Result;
+import org.jooq.Table;
+import org.jooq.impl.DSL;
+import org.jooq.types.ULong;
+import org.junit.jupiter.api.*;
+
+import java.io.ByteArrayOutputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+class TableFilterTypesFromMetadataResultTest {
+
+ final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
+ final String userName = "sa";
+ final String password = "";
+ // matches IPv4
+ final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ // matches IPv4 starting with 255.
+ final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255));
+ final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+
+ @BeforeAll
+ public void setup() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS filtertype").execute();
+ String filtertype = "CREATE TABLE`filtertype`" + "("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `expectedElements` bigint(20) unsigned NOT NULL,"
+ + " `targetFpp` DOUBLE(2) unsigned NOT NULL,"
+ + " `pattern` VARCHAR(2048) NOT NULL,"
+ + " UNIQUE KEY (`expectedElements`, `targetFpp`, `pattern`)" + ")";
+ conn.prepareStatement(filtertype).execute();
+ String typeSQL = "INSERT INTO `filtertype` (`id`,`expectedElements`, `targetFpp`, `pattern`) VALUES (?,?,?,?)";
+ int id = 1;
+ for (String pattern : patternList) {
+ PreparedStatement filterType = conn.prepareStatement(typeSQL);
+ filterType.setInt(1, id);
+ filterType.setInt(2, id * 1000);
+ filterType.setDouble(3, 0.01);
+ filterType.setString(4, pattern);
+ filterType.executeUpdate();
+ id++;
+ }
+ });
+ }
+
+ @BeforeEach
+ void createTargetTable() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS target").execute();
+ String targetTable = "CREATE TABLE `target`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ conn.prepareStatement(targetTable).execute();
+ });
+ }
+
+ @AfterAll
+ void tearDown() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("DROP ALL OBJECTS").execute(); //h2 clear database
+ conn.close();
+ });
+ }
+
+ @Test
+ void testNoFilterTypes() {
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ TableFilterTypesFromMetadata result = new TableFilterTypesFromMetadata(ctx, table, 0L);
+ RuntimeException exception = Assertions.assertThrows(RuntimeException.class, result::toResult);
+ Assertions.assertEquals("Origin table was empty", exception.getMessage());
+ }
+
+ @Test
+ void testOneFilterType() {
+ insertSizedFilterIntoTargetTable(1);
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ TableFilterTypesFromMetadata result = new TableFilterTypesFromMetadata(ctx, table, 0L);
+ Result records = result.toResult();
+ Assertions.assertEquals(1, records.size());
+ Assertions.assertEquals(ULong.valueOf(1000), records.get(0).get(1));
+ Assertions.assertEquals(0.01, records.get(0).get(2));
+ }
+
+ @Test
+ void testMultipleFilterTypes() {
+ insertSizedFilterIntoTargetTable(1);
+ insertSizedFilterIntoTargetTable(2);
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ TableFilterTypesFromMetadata result = new TableFilterTypesFromMetadata(ctx, table, 0L);
+ Result records = result.toResult();
+ Assertions.assertEquals(2, records.size());
+ Record first = records.get(0);
+ Record second = records.get(1);
+ Assertions.assertEquals(first.get(1), ULong.valueOf("1000"));
+ Assertions.assertEquals(second.get(1), ULong.valueOf("2000"));
+ }
+
+ @Test
+ public void testEquality() {
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ TableFilterTypesFromMetadata result1 = new TableFilterTypesFromMetadata(ctx, table, 0L);
+ TableFilterTypesFromMetadata result2 = new TableFilterTypesFromMetadata(ctx, table, 0L);
+ Assertions.assertEquals(result1, result2);
+ Assertions.assertEquals(result2, result1);
+ }
+
+ @Test
+ public void testNotEquals() {
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ TableFilterTypesFromMetadata result1 = new TableFilterTypesFromMetadata(ctx, table, 0L);
+ TableFilterTypesFromMetadata result2 = new TableFilterTypesFromMetadata(ctx, table, 1L);
+ TableFilterTypesFromMetadata result3 = new TableFilterTypesFromMetadata(ctx, null, 0L);
+ Assertions.assertNotEquals(result1, result2);
+ Assertions.assertNotEquals(result1, result3);
+ }
+
+ void insertSizedFilterIntoTargetTable(int filterTypeId) {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ String sql = "INSERT INTO `target` (`partition_id`, `filter_type_id`, `filter`) "
+ + "VALUES (?, (SELECT `id` FROM `filtertype` WHERE id=?), ?)";
+ PreparedStatement stmt = conn.prepareStatement(sql);
+ BloomFilter filter = BloomFilter.create(1000, 0.01);
+
+ final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream();
+ Assertions.assertDoesNotThrow(() -> {
+ filter.writeTo(filterBAOS);
+ filterBAOS.close();
+ });
+ stmt.setInt(1, filterTypeId);
+ stmt.setInt(2, filterTypeId);
+ stmt.setBytes(3, filterBAOS.toByteArray());
+ int success = stmt.executeUpdate();
+ Assertions.assertEquals(1, success);
+ });
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/TableFiltersTest.java
new file mode 100644
index 00000000..5ab27f18
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/TableFiltersTest.java
@@ -0,0 +1,222 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import org.apache.spark.util.sketch.BloomFilter;
+import org.jooq.DSLContext;
+import org.jooq.Table;
+import org.jooq.exception.DataAccessException;
+import org.jooq.impl.DSL;
+import org.junit.jupiter.api.*;
+
+import java.io.ByteArrayOutputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+class TableFiltersTest {
+
+ final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
+ final String userName = "sa";
+ final String password = "";
+ // matches IPv4
+ final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ // matches IPv4 starting with 255.
+ final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255));
+ final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+
+ @BeforeAll
+ public void setup() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS filtertype").execute();
+ String filtertype = "CREATE TABLE`filtertype`" + "("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `expectedElements` bigint(20) unsigned NOT NULL,"
+ + " `targetFpp` DOUBLE(2) unsigned NOT NULL,"
+ + " `pattern` VARCHAR(2048) NOT NULL,"
+ + " UNIQUE KEY (`expectedElements`, `targetFpp`, `pattern`)" + ")";
+ conn.prepareStatement(filtertype).execute();
+ String typeSQL = "INSERT INTO `filtertype` (`id`,`expectedElements`, `targetFpp`, `pattern`) VALUES (?,?,?,?)";
+ int id = 1;
+ for (String pattern : patternList) {
+ PreparedStatement filterType = conn.prepareStatement(typeSQL);
+ filterType.setInt(1, id);
+ filterType.setInt(2, id * 1000);
+ filterType.setDouble(3, 0.01);
+ filterType.setString(4, pattern);
+ filterType.executeUpdate();
+ id++;
+ }
+ });
+ }
+
+ @BeforeEach
+ void createTargetTable() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS target").execute();
+ String targetTable = "CREATE TABLE `target`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ conn.prepareStatement(targetTable).execute();
+ });
+ }
+
+ @AfterAll
+ void tearDown() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("DROP ALL OBJECTS").execute(); //h2 clear database
+ conn.close();
+ });
+ }
+
+ @Test
+ public void testCreation() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ Assertions.assertDoesNotThrow(() -> new TableFilters(ctx, table, 0L, "test"));
+ }
+
+ @Test
+ public void testInsertFiltersIntoCategoryTable() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ DataAccessException exception = Assertions
+ .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").insertFiltersIntoCategoryTable());
+ Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")"));
+ }
+
+ @Test
+ public void testInsertFiltersWithoutPatternMatch() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ IllegalStateException exception = Assertions
+ .assertThrows(IllegalStateException.class, () -> new TableFilters(ctx, table, 0L, "nomatch").insertFiltersIntoCategoryTable());
+ Assertions.assertTrue(exception.getMessage().contains("Trying to insert empty filter"));
+ }
+
+ @Test
+ public void testEquals() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+
+ TableFilters filter1 = new TableFilters(ctx, table, 0L, "test");
+ TableFilters filter2 = new TableFilters(ctx, table, 0L, "test");
+ Assertions.assertEquals(filter1, filter2);
+ }
+
+ @Test
+ public void testNotEquals() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+
+ TableFilters filter1 = new TableFilters(ctx, table, 0L, "test");
+ TableFilters filter2 = new TableFilters(ctx, table, 1L, "test");
+ TableFilters filter3 = new TableFilters(ctx, table, 0L, "mest");
+ Assertions.assertNotEquals(filter1, filter2);
+ Assertions.assertNotEquals(filter1, filter3);
+ }
+
+ void fillTargetTable() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ String sql = "INSERT INTO `target` (`partition_id`, `filter_type_id`, `filter`) "
+ + "VALUES (?, (SELECT `id` FROM `filtertype` WHERE id=?), ?)";
+ PreparedStatement stmt = conn.prepareStatement(sql);
+ BloomFilter filter = BloomFilter.create(1000, 0.01);
+ final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream();
+ Assertions.assertDoesNotThrow(() -> {
+ filter.writeTo(filterBAOS);
+ filterBAOS.close();
+ });
+ stmt.setInt(1, 1);
+ stmt.setInt(2, 1);
+ stmt.setBytes(3, filterBAOS.toByteArray());
+ stmt.executeUpdate();
+ });
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/TokenizedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/TokenizedValueTest.java
new file mode 100644
index 00000000..69c2ee2d
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/TokenizedValueTest.java
@@ -0,0 +1,89 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner;
+
+import com.teragrep.blf_01.Token;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+import java.util.stream.Collectors;
+
+class TokenizedValueTest {
+
+ @Test
+ void testTokenization() {
+ TokenizedValue result = new TokenizedValue("test.nest");
+ Set tokens = result.tokens().stream().map(Token::toString).collect(Collectors.toSet());
+ Assertions.assertEquals("test.nest", result.value);
+ Assertions.assertTrue(tokens.contains("nest"));
+ Assertions.assertTrue(tokens.contains("test"));
+ Assertions.assertTrue(tokens.contains("."));
+ Assertions.assertTrue(tokens.contains("test.nest"));
+ Assertions.assertTrue(tokens.contains(".nest"));
+ Assertions.assertTrue(tokens.contains("test."));
+ Assertions.assertEquals(6, tokens.size());
+ }
+
+ @Test
+ void testEquality() {
+ TokenizedValue value1 = new TokenizedValue("test");
+ TokenizedValue value2 = new TokenizedValue("test");
+ Assertions.assertEquals(value1, value2);
+ Assertions.assertEquals(value2, value1);
+ value1.tokens();
+ Assertions.assertEquals(value2, value1);
+ }
+
+ @Test
+ void testNotEquals() {
+ TokenizedValue value1 = new TokenizedValue("test");
+ TokenizedValue value2 = new TokenizedValue("nest");
+ Assertions.assertNotEquals(value1, value2);
+ Assertions.assertNotEquals(value2, value1);
+ Assertions.assertNotEquals(value1, null);
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java
new file mode 100644
index 00000000..c57e2312
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java
@@ -0,0 +1,223 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.walker.conditions;
+
+import org.apache.spark.util.sketch.BloomFilter;
+import org.jooq.DSLContext;
+import org.jooq.Table;
+import org.jooq.impl.DSL;
+import org.junit.jupiter.api.*;
+
+import java.io.ByteArrayOutputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+class CategoryTableConditionTest {
+
+ final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
+ final String userName = "sa";
+ final String password = "";
+ // matches IPv4
+ final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ // matches IPv4 starting with 255.
+ final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255));
+ final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+
+ @BeforeAll
+ public void setup() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS filtertype").execute();
+ String filtertype = "CREATE TABLE`filtertype`" + "("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `expectedElements` bigint(20) unsigned NOT NULL,"
+ + " `targetFpp` DOUBLE(2) unsigned NOT NULL,"
+ + " `pattern` VARCHAR(2048) NOT NULL,"
+ + " UNIQUE KEY (`expectedElements`, `targetFpp`, `pattern`)" + ")";
+ conn.prepareStatement(filtertype).execute();
+ String typeSQL = "INSERT INTO `filtertype` (`id`,`expectedElements`, `targetFpp`, `pattern`) VALUES (?,?,?,?)";
+ int id = 1;
+ for (String pattern : patternList) {
+ PreparedStatement filterType = conn.prepareStatement(typeSQL);
+ filterType.setInt(1, id);
+ filterType.setInt(2, 1000);
+ filterType.setDouble(3, 0.01);
+ filterType.setString(4, pattern);
+ filterType.executeUpdate();
+ id++;
+ }
+ });
+ }
+
+ @BeforeEach
+ void createTargetTable() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS target").execute();
+ String targetTable = "CREATE TABLE `target`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ conn.prepareStatement(targetTable).execute();
+ });
+ }
+
+ @AfterAll
+ void tearDown() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("DROP ALL OBJECTS").execute(); //h2 clear database
+ conn.close();
+ });
+ }
+
+ @Test
+ void testCondition() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> target1 = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ CategoryTableCondition cond = new CategoryTableCondition(target1, 0L);
+ String e = "(\n" + " bloommatch(\n" + " (\n" + " select \"term_0_target\".\"filter\"\n"
+ + " from \"term_0_target\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"target\".\"filter_type_id\"\n" + " )\n" + " ),\n"
+ + " \"bloomdb\".\"target\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"target\".\"filter\" is not null\n" + ")";
+ Assertions.assertEquals(e, cond.condition().toString());
+ }
+
+ @Test
+ void testBloomTermId() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> target1 = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ CategoryTableCondition cond = new CategoryTableCondition(target1, 1L);
+ String e = "(\n" + " bloommatch(\n" + " (\n" + " select \"term_1_target\".\"filter\"\n"
+ + " from \"term_1_target\"\n" + " where (\n" + " term_id = 1\n"
+ + " and type_id = \"bloomdb\".\"target\".\"filter_type_id\"\n" + " )\n" + " ),\n"
+ + " \"bloomdb\".\"target\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"target\".\"filter\" is not null\n" + ")";
+ Assertions.assertEquals(e, cond.condition().toString());
+ }
+
+ @Test
+ public void testEquality() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> target1 = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ CategoryTableCondition cond1 = new CategoryTableCondition(target1, 1L);
+ CategoryTableCondition cond2 = new CategoryTableCondition(target1, 1L);
+ Assertions.assertEquals(cond1, cond2);
+ cond1.condition();
+ Assertions.assertEquals(cond1, cond2);
+ }
+
+ @Test
+ public void testNonEquality() {
+ fillTargetTable();
+ DSLContext ctx = DSL.using(conn);
+ Table> target1 = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ CategoryTableCondition cond1 = new CategoryTableCondition(target1, 0L);
+ CategoryTableCondition cond2 = new CategoryTableCondition(target1, 1L);
+ CategoryTableCondition cond3 = new CategoryTableCondition(null, 1L);
+ Assertions.assertNotEquals(cond1, cond2);
+ Assertions.assertNotEquals(cond1, cond3);
+ }
+
+ void fillTargetTable() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ String sql = "INSERT INTO `target` (`partition_id`, `filter_type_id`, `filter`) "
+ + "VALUES (?, (SELECT `id` FROM `filtertype` WHERE id=?), ?)";
+ PreparedStatement stmt = conn.prepareStatement(sql);
+ BloomFilter filter = BloomFilter.create(1000, 0.01);
+ final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream();
+ Assertions.assertDoesNotThrow(() -> {
+ filter.writeTo(filterBAOS);
+ filterBAOS.close();
+ });
+ stmt.setInt(1, 1);
+ stmt.setInt(2, 1);
+ stmt.setBytes(3, filterBAOS.toByteArray());
+ stmt.executeUpdate();
+ });
+ }
+
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java
index c21d8374..352305f3 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java
@@ -49,6 +49,12 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
public class EarliestConditionTest {
@Test
@@ -66,7 +72,6 @@ void equalsTest() {
eq1.condition();
EarliestCondition eq2 = new EarliestCondition("946677600");
Assertions.assertEquals(eq1, eq2);
- Assertions.assertEquals(eq2, eq1);
}
@Test
@@ -74,7 +79,5 @@ void notEqualsTest() {
EarliestCondition eq1 = new EarliestCondition("946677600");
EarliestCondition notEq = new EarliestCondition("1000");
Assertions.assertNotEquals(eq1, notEq);
- Assertions.assertNotEquals(notEq, eq1);
- Assertions.assertNotEquals(eq1, null);
}
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java
index e4c99561..30f614ed 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java
@@ -48,7 +48,7 @@
import com.teragrep.pth_06.config.ConditionConfig;
import org.jooq.Condition;
import org.jooq.DSLContext;
-import org.jooq.exception.SQLDialectNotSupportedException;
+import org.jooq.exception.DataAccessException;
import org.jooq.impl.DSL;
import org.jooq.tools.jdbc.MockConnection;
import org.jooq.tools.jdbc.MockResult;
@@ -59,12 +59,18 @@
import javax.xml.parsers.DocumentBuilderFactory;
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
class ElementConditionTest {
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
final Document document = Assertions.assertDoesNotThrow(() -> factory.newDocumentBuilder().newDocument());
final DSLContext mockCtx = DSL.using(new MockConnection(ctx -> new MockResult[0]));
- final ConditionConfig config = new ConditionConfig(mockCtx, false, true, false);
+ final ConditionConfig config = new ConditionConfig(mockCtx, false, true);
final ConditionConfig streamConfig = new ConditionConfig(mockCtx, true);
@Test
@@ -84,20 +90,6 @@ void testStreamTags() {
Assertions.assertEquals(loops, streamTags.length);
}
- @Test
- void testIndexStatement() {
- Element element = document.createElement("indexstatement");
- element.setAttribute("value", "searchTerm");
- element.setAttribute("operation", "EQUALS");
- Element element2 = document.createElement("indexstatement");
- element2.setAttribute("value", "searchTerm");
- element2.setAttribute("operation", "NOT_EQUALS");
- Assertions
- .assertThrows(SQLDialectNotSupportedException.class, new ElementCondition(element, config)::condition);
- Assertions.assertThrows(IllegalStateException.class, new ElementCondition(element, streamConfig)::condition);
- Assertions.assertThrows(IllegalStateException.class, new ElementCondition(element2, config)::condition);
- }
-
@Test
void testProvidedElementMissingValue() {
Element element = document.createElement("test");
@@ -118,6 +110,35 @@ void testProvidedElementMissingOperation() {
Assertions.assertThrows(IllegalStateException.class, streamElementCondition::condition);
}
+ @Test
+ void testIsIndexStatement() {
+ Element element = document.createElement("indexstatement");
+ element.setAttribute("value", "searchTerm");
+ element.setAttribute("operation", "EQUALS");
+ Element element2 = document.createElement("index");
+ element2.setAttribute("value", "searchTerm");
+ element2.setAttribute("operation", "EQUALS");
+ ElementCondition condition = new ElementCondition(element, config);
+ Assertions.assertTrue(condition.isBloomSearchCondition());
+ element.setAttribute("operation", "NOT_EQUALS");
+ ElementCondition condition2 = new ElementCondition(element, config);
+ Assertions.assertFalse(condition2.isBloomSearchCondition());
+ ElementCondition condition3 = new ElementCondition(element, streamConfig);
+ Assertions.assertFalse(condition3.isBloomSearchCondition());
+ ElementCondition condition4 = new ElementCondition(element2, streamConfig);
+ Assertions.assertFalse(condition4.isBloomSearchCondition());
+ }
+
+ @Test
+ void testIndexStatementWithBadConnection() {
+ Element element = document.createElement("indexstatement");
+ element.setAttribute("value", "searchTerm");
+ element.setAttribute("operation", "EQUALS");
+ ElementCondition condition = new ElementCondition(element, config);
+ Assertions.assertTrue(condition.isBloomSearchCondition());
+ Assertions.assertThrows(DataAccessException.class, condition::condition);
+ }
+
@Test
void testTimeQualifiers() {
String[] tags = {
@@ -189,10 +210,8 @@ void notEqualsTest() {
ElementCondition notEq = new ElementCondition(anotherElement, config);
ElementCondition notEq2 = new ElementCondition(element, streamConfig);
Assertions.assertNotEquals(eq1, notEq);
- Assertions.assertNotEquals(notEq, eq1);
Assertions.assertNotEquals(eq1, notEq2);
Assertions.assertNotEquals(notEq, notEq2);
- Assertions.assertNotEquals(eq1, null);
}
@Test
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/HostConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/HostConditionTest.java
index be77c9be..91eb44c8 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/HostConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/HostConditionTest.java
@@ -49,6 +49,12 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
public class HostConditionTest {
@Test
@@ -84,7 +90,6 @@ void equalsTest() {
eq3.condition();
HostCondition eq4 = new HostCondition("946677600", "EQUALS", true);
Assertions.assertEquals(eq1, eq2);
- Assertions.assertEquals(eq2, eq1);
Assertions.assertEquals(eq3, eq4);
}
@@ -94,8 +99,6 @@ void notEqualsTest() {
HostCondition notEq = new HostCondition("1000", "EQUALS", false);
HostCondition notEq2 = new HostCondition("946677600", "EQUALS", true);
Assertions.assertNotEquals(eq1, notEq);
- Assertions.assertNotEquals(notEq, eq1);
- Assertions.assertNotEquals(eq1, null);
Assertions.assertNotEquals(eq1, notEq2);
Assertions.assertNotEquals(notEq, notEq2);
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexConditionTest.java
index 734177de..5913f1d6 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexConditionTest.java
@@ -49,6 +49,12 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
public class IndexConditionTest {
@Test
@@ -89,9 +95,7 @@ void notEqualsTest() {
IndexCondition notEq = new IndexCondition("1000", "EQUALS", false);
IndexCondition notEq2 = new IndexCondition("946677600", "EQUALS", true);
Assertions.assertNotEquals(eq1, notEq);
- Assertions.assertNotEquals(notEq, eq1);
Assertions.assertNotEquals(eq1, notEq2);
Assertions.assertNotEquals(notEq, notEq2);
- Assertions.assertNotEquals(eq1, null);
}
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java
index 0773a437..b180b2dc 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java
@@ -45,57 +45,216 @@
*/
package com.teragrep.pth_06.planner.walker.conditions;
-import com.teragrep.blf_01.Tokenizer;
import com.teragrep.pth_06.config.ConditionConfig;
+import org.apache.spark.util.sketch.BloomFilter;
+import org.jooq.Condition;
import org.jooq.DSLContext;
-import org.jooq.exception.SQLDialectNotSupportedException;
+import org.jooq.exception.DataAccessException;
import org.jooq.impl.DSL;
import org.jooq.tools.jdbc.MockConnection;
import org.jooq.tools.jdbc.MockResult;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.*;
+
+import java.io.ByteArrayOutputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
/**
- * Requires database setup for full test
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
*/
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
public class IndexStatementConditionTest {
- final ConditionConfig config = new ConditionConfig(
+ final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
+ final String userName = "sa";
+ final String password = "";
+ // matches IPv4
+ final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ // matches IPv4 starting with 255.
+ final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255));
+ final ConditionConfig mockConfig = new ConditionConfig(
DSL.using(new MockConnection(ctx -> new MockResult[0])),
false,
- true,
- false
+ true
);
- final Tokenizer tokenizer = new Tokenizer(32);
+ final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+
+ @BeforeAll
+ void setup() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS filtertype").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip255").execute();
+ String filtertype = "CREATE TABLE`filtertype`" + "("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `expectedElements` bigint(20) unsigned NOT NULL,"
+ + " `targetFpp` DOUBLE(2) unsigned NOT NULL,"
+ + " `pattern` VARCHAR(2048) NOT NULL,"
+ + " UNIQUE KEY (`expectedElements`, `targetFpp`, `pattern`)" + ")";
+ String ip = "CREATE TABLE `pattern_test_ip`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ String ip255 = "CREATE TABLE `pattern_test_ip255`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ conn.prepareStatement(filtertype).execute();
+ conn.prepareStatement(ip).execute();
+ conn.prepareStatement(ip255).execute();
+ String typeSQL = "INSERT INTO `filtertype` (`id`,`expectedElements`, `targetFpp`, `pattern`) VALUES (?,?,?,?)";
+ int id = 1;
+ for (String pattern : patternList) {
+ PreparedStatement filterType = conn.prepareStatement(typeSQL);
+ filterType.setInt(1, id);
+ filterType.setInt(2, 1000);
+ filterType.setDouble(3, 0.01);
+ filterType.setString(4, pattern);
+ filterType.executeUpdate();
+ id++;
+ }
+ writeFilter("pattern_test_ip", 1);
+ writeFilter("pattern_test_ip255", 2);
+ });
+ }
+
+ @AfterAll
+ void tearDown() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("DROP ALL OBJECTS"); // h2 clear database
+ conn.close();
+ });
+ }
+
+ @Test
+ void testConnectionException() {
+ DSLContext ctx = DSL.using(new MockConnection(c -> new MockResult[0]));
+ ConditionConfig config = new ConditionConfig(ctx, false, true);
+ ConditionConfig noBloomConfig = new ConditionConfig(ctx, false);
+ IndexStatementCondition cond1 = new IndexStatementCondition("test", config, DSL.trueCondition());
+ IndexStatementCondition cond2 = new IndexStatementCondition("test", noBloomConfig, DSL.trueCondition());
+ Assertions.assertThrows(DataAccessException.class, cond1::condition);
+ Assertions.assertDoesNotThrow(cond2::condition);
+ }
+
+ @Test
+ void noMatchesTest() {
+ DSLContext ctx = DSL.using(conn);
+ Condition e1 = DSL.falseCondition();
+ Condition e2 = DSL.trueCondition();
+ ConditionConfig config = new ConditionConfig(ctx, false, true);
+ ConditionConfig withoutFiltersConfig = new ConditionConfig(ctx, false, true, true, 1L);
+ IndexStatementCondition cond1 = new IndexStatementCondition("test", config, e1);
+ IndexStatementCondition cond2 = new IndexStatementCondition("test", withoutFiltersConfig, e2);
+ Assertions.assertEquals(e1, cond1.condition());
+ Assertions.assertEquals(e2, cond2.condition());
+ Assertions.assertTrue(cond1.patternMatchTables().isEmpty());
+ Assertions.assertTrue(cond2.patternMatchTables().isEmpty());
+ }
@Test
- void conditionTest() {
- DSLContext ctx = DSL.using(new MockConnection(context -> new MockResult[0]));
- // only tests that database access is tried as expected
- Assertions
- .assertThrows(
- SQLDialectNotSupportedException.class, () -> new IndexStatementCondition(
- "value",
- new ConditionConfig(ctx, false, true, false),
- tokenizer
- ).condition()
- );
+ void oneMatchingTableTest() {
+ DSLContext ctx = DSL.using(conn);
+ ConditionConfig config = new ConditionConfig(ctx, false, true);
+ IndexStatementCondition cond = new IndexStatementCondition("192.168.1.1", config);
+ String e = "(\n" + " (\n" + " bloommatch(\n" + " (\n"
+ + " select \"term_0_pattern_test_ip\".\"filter\"\n" + " from \"term_0_pattern_test_ip\"\n"
+ + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n"
+ + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")";
+ Assertions.assertEquals(e, cond.condition().toString());
+ Assertions.assertEquals(1, cond.patternMatchTables().size());
+ }
+
+ @Test
+ void testOneMatchWithoutFilters() {
+ DSLContext ctx = DSL.using(conn);
+ ConditionConfig config = new ConditionConfig(ctx, false, true, true);
+ IndexStatementCondition cond = new IndexStatementCondition("192.168.1.1", config);
+ String e = "\"bloomdb\".\"pattern_test_ip\".\"filter\" is null";
+ Assertions.assertEquals(e, cond.condition().toString());
+ Assertions.assertEquals(1, cond.patternMatchTables().size());
+ }
+
+ @Test
+ void testTwoMatchWithoutFilters() {
+ DSLContext ctx = DSL.using(conn);
+ ConditionConfig config = new ConditionConfig(ctx, false, true, true);
+ IndexStatementCondition cond = new IndexStatementCondition("255.255.255.255", config);
+ String e = "(\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + ")";
+ Assertions.assertEquals(e, cond.condition().toString());
+ Assertions.assertEquals(2, cond.patternMatchTables().size());
+ }
+
+ @Test
+ void twoMatchingTableTest() {
+ DSLContext ctx = DSL.using(conn);
+ ConditionConfig config = new ConditionConfig(ctx, false, true);
+ IndexStatementCondition cond = new IndexStatementCondition("255.255.255.255", config);
+ String e = "(\n" + " (\n" + " bloommatch(\n" + " (\n"
+ + " select \"term_0_pattern_test_ip\".\"filter\"\n" + " from \"term_0_pattern_test_ip\"\n"
+ + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + " or (\n"
+ + " bloommatch(\n" + " (\n" + " select \"term_0_pattern_test_ip255\".\"filter\"\n"
+ + " from \"term_0_pattern_test_ip255\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip255\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip255\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is not null\n" + " )\n" + " or (\n"
+ + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + ")";
+ Assertions.assertEquals(e, cond.condition().toString());
+ Assertions.assertEquals(2, cond.patternMatchTables().size());
}
@Test
void equalsTest() {
- IndexStatementCondition eq1 = new IndexStatementCondition("946677600", config, tokenizer);
- IndexStatementCondition eq2 = new IndexStatementCondition("946677600", config, tokenizer);
+ IndexStatementCondition eq1 = new IndexStatementCondition("946677600", mockConfig);
+ IndexStatementCondition eq2 = new IndexStatementCondition("946677600", mockConfig);
Assertions.assertEquals(eq1, eq2);
- Assertions.assertEquals(eq2, eq1);
}
@Test
void notEqualsTest() {
- IndexStatementCondition eq1 = new IndexStatementCondition("946677600", config, tokenizer);
- IndexStatementCondition notEq = new IndexStatementCondition("1000", config, tokenizer);
+ IndexStatementCondition eq1 = new IndexStatementCondition("946677600", mockConfig);
+ IndexStatementCondition notEq = new IndexStatementCondition("1000", mockConfig);
Assertions.assertNotEquals(eq1, notEq);
- Assertions.assertNotEquals(notEq, eq1);
- Assertions.assertNotEquals(eq1, null);
+ }
+
+ private void writeFilter(String tableName, int filterId) {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ String sql = "INSERT INTO `" + tableName + "` (`partition_id`, `filter_type_id`, `filter`) "
+ + "VALUES (?, (SELECT `id` FROM `filtertype` WHERE id=?), ?)";
+ PreparedStatement stmt = conn.prepareStatement(sql);
+ BloomFilter filter = BloomFilter.create(1000, 0.01);
+ final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream();
+ Assertions.assertDoesNotThrow(() -> {
+ filter.writeTo(filterBAOS);
+ filterBAOS.close();
+ });
+ stmt.setInt(1, 1);
+ stmt.setInt(2, filterId);
+ stmt.setBytes(3, filterBAOS.toByteArray());
+ stmt.executeUpdate();
+ stmt.close();
+ });
}
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/LatestConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/LatestConditionTest.java
index 7937abc3..7e8fc2f8 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/LatestConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/LatestConditionTest.java
@@ -49,6 +49,12 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
class LatestConditionTest {
@Test
@@ -71,26 +77,20 @@ void conditionUpdatedTest() {
@Test
void equalsTest() {
- IndexCondition eq1 = new IndexCondition("946720800", "EQUALS", false);
+ LatestCondition eq1 = new LatestCondition("946720800");
eq1.condition();
- IndexCondition eq2 = new IndexCondition("946720800", "EQUALS", false);
- IndexCondition eq3 = new IndexCondition("946720800", "EQUALS", true);
+ LatestCondition eq2 = new LatestCondition("946720800");
+ LatestCondition eq3 = new LatestCondition("946720800");
eq3.condition();
- IndexCondition eq4 = new IndexCondition("946720800", "EQUALS", true);
+ LatestCondition eq4 = new LatestCondition("946720800");
Assertions.assertEquals(eq1, eq2);
- Assertions.assertEquals(eq2, eq1);
Assertions.assertEquals(eq3, eq4);
}
@Test
void notEqualsTest() {
- IndexCondition eq1 = new IndexCondition("946720800", "EQUALS", false);
- IndexCondition notEq = new IndexCondition("1000", "EQUALS", false);
- IndexCondition notEq2 = new IndexCondition("946720800", "EQUALS", true);
+ LatestCondition eq1 = new LatestCondition("946720800");
+ LatestCondition notEq = new LatestCondition("1000");
Assertions.assertNotEquals(eq1, notEq);
- Assertions.assertNotEquals(notEq, eq1);
- Assertions.assertNotEquals(eq1, null);
- Assertions.assertNotEquals(eq1, notEq2);
- Assertions.assertNotEquals(notEq, notEq2);
}
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
new file mode 100644
index 00000000..fd486989
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
@@ -0,0 +1,92 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.walker.conditions;
+
+import org.jooq.Condition;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
+class PatternMatchConditionTest {
+
+ @Test
+ void testSingleToken() {
+ Condition condition = new PatternMatchCondition("test").condition();
+ String e = "('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")";
+ Assertions.assertEquals(e, condition.toString());
+ }
+
+ @Test
+ void testMultipleTokens() {
+ Condition condition = new PatternMatchCondition("test.nest").condition();
+ String e = "(\n" + " ('test.' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
+ + " or ('.nest' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
+ + " or ('test.nest' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
+ + " or ('nest' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
+ + " or ('.' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
+ + " or ('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n" + ")";
+ Assertions.assertEquals(e, condition.toString());
+ }
+
+ @Test
+ void testEquality() {
+ PatternMatchCondition cond1 = new PatternMatchCondition("test");
+ PatternMatchCondition cond2 = new PatternMatchCondition("test");
+ Assertions.assertEquals(cond1, cond2);
+ }
+
+ @Test
+ void testNotEquals() {
+ PatternMatchCondition cond1 = new PatternMatchCondition("test");
+ PatternMatchCondition cond2 = new PatternMatchCondition("next");
+ Assertions.assertNotEquals(cond1, cond2);
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/SourceTypeConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/SourceTypeConditionTest.java
index a6179093..1e0b8130 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/SourceTypeConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/SourceTypeConditionTest.java
@@ -49,6 +49,12 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
class SourceTypeConditionTest {
@Test
@@ -73,25 +79,22 @@ void negationTest() {
@Test
void equalsTest() {
- IndexCondition eq1 = new IndexCondition("946677600", "EQUALS", false);
+ SourceTypeCondition eq1 = new SourceTypeCondition("946677600", "EQUALS", false);
eq1.condition();
- IndexCondition eq2 = new IndexCondition("946677600", "EQUALS", false);
- IndexCondition eq3 = new IndexCondition("946677600", "EQUALS", true);
+ SourceTypeCondition eq2 = new SourceTypeCondition("946677600", "EQUALS", false);
+ SourceTypeCondition eq3 = new SourceTypeCondition("946677600", "EQUALS", true);
eq3.condition();
- IndexCondition eq4 = new IndexCondition("946677600", "EQUALS", true);
+ SourceTypeCondition eq4 = new SourceTypeCondition("946677600", "EQUALS", true);
Assertions.assertEquals(eq1, eq2);
- Assertions.assertEquals(eq2, eq1);
Assertions.assertEquals(eq3, eq4);
}
@Test
void notEqualsTest() {
- IndexCondition eq1 = new IndexCondition("946677600", "EQUALS", false);
- IndexCondition notEq = new IndexCondition("1000", "EQUALS", false);
- IndexCondition notEq2 = new IndexCondition("1000", "EQUALS", true);
+ SourceTypeCondition eq1 = new SourceTypeCondition("946677600", "EQUALS", false);
+ SourceTypeCondition notEq = new SourceTypeCondition("1000", "EQUALS", false);
+ SourceTypeCondition notEq2 = new SourceTypeCondition("1000", "EQUALS", true);
Assertions.assertNotEquals(eq1, notEq);
- Assertions.assertNotEquals(notEq, eq1);
- Assertions.assertNotEquals(eq1, null);
Assertions.assertNotEquals(eq1, notEq2);
Assertions.assertNotEquals(notEq, notEq2);
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ValidElementTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ValidElementTest.java
new file mode 100644
index 00000000..ea811cb8
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ValidElementTest.java
@@ -0,0 +1,130 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.walker.conditions;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
+class ValidElementTest {
+
+ final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ final Document document = Assertions.assertDoesNotThrow(() -> factory.newDocumentBuilder().newDocument());
+
+ @Test
+ void validTest() {
+ Element element = document.createElement("test");
+ element.setAttribute("value", "value");
+ element.setAttribute("operation", "operation");
+ ValidElement valid = new ValidElement(element);
+ Assertions.assertDoesNotThrow(() -> {
+ Assertions.assertEquals("test", valid.tag());
+ Assertions.assertEquals("value", valid.value());
+ Assertions.assertEquals("operation", valid.operation());
+ });
+ }
+
+ @Test
+ void missingValueTest() {
+ Element noValue = document.createElement("test");
+ noValue.setAttribute("operation", "operation");
+ ValidElement invalid1 = new ValidElement(noValue);
+ Assertions.assertThrows(RuntimeException.class, () -> Assertions.assertEquals("test", invalid1.tag()));
+ }
+
+ @Test
+ void missingOperationTest() {
+ Element noValue = document.createElement("test");
+ noValue.setAttribute("value", "value");
+ ValidElement invalid1 = new ValidElement(noValue);
+ Assertions.assertThrows(RuntimeException.class, () -> Assertions.assertEquals("test", invalid1.tag()));
+ }
+
+ @Test
+ void equalityTest() {
+ Element element = document.createElement("test");
+ element.setAttribute("value", "value");
+ element.setAttribute("operation", "operation");
+ ValidElement eq1 = new ValidElement(element);
+ ValidElement eq2 = new ValidElement(element);
+ Assertions.assertEquals(eq1, eq2);
+ }
+
+ @Test
+ void notEqualValueTest() {
+ Element element1 = document.createElement("test");
+ element1.setAttribute("value", "value");
+ element1.setAttribute("operation", "operation");
+ Element element2 = document.createElement("test");
+ element2.setAttribute("value", "notValue");
+ element2.setAttribute("operation", "operation");
+ ValidElement eq1 = new ValidElement(element1);
+ ValidElement eq2 = new ValidElement(element2);
+ Assertions.assertNotEquals(eq1, eq2);
+ }
+
+ @Test
+ void notEqualOperationTest() {
+ Element element1 = document.createElement("test");
+ element1.setAttribute("value", "value");
+ element1.setAttribute("operation", "operation");
+ Element element2 = document.createElement("test");
+ element2.setAttribute("value", "value");
+ element2.setAttribute("operation", "notOperation");
+ ValidElement eq1 = new ValidElement(element1);
+ ValidElement eq2 = new ValidElement(element2);
+ Assertions.assertNotEquals(eq1, eq2);
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
new file mode 100644
index 00000000..60346b7e
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
@@ -0,0 +1,335 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.walker;
+
+import com.teragrep.pth_06.planner.walker.ConditionWalker;
+import org.apache.spark.util.sketch.BloomFilter;
+import org.jooq.Condition;
+import org.jooq.impl.DSL;
+import org.junit.jupiter.api.*;
+
+import java.io.ByteArrayOutputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
+ * inherited from QueryPart
+ *
+ * @see org.jooq.QueryPart
+ */
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+public class ConditionWalkerTest {
+
+ final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
+ final String userName = "sa";
+ final String password = "";
+ final List patternList = new ArrayList<>(
+ Arrays
+ .asList(
+ "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}",
+ "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"
+ )
+ );
+ final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+
+ @BeforeAll
+ void setup() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS filtertype").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip255").execute();
+ String filtertype = "CREATE TABLE`filtertype`" + "("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `expectedElements` bigint(20) unsigned NOT NULL,"
+ + " `targetFpp` DOUBLE(2) unsigned NOT NULL,"
+ + " `pattern` VARCHAR(2048) NOT NULL,"
+ + " UNIQUE KEY (`expectedElements`, `targetFpp`, `pattern`)" + ")";
+ String ip = "CREATE TABLE `pattern_test_ip`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ String ip255 = "CREATE TABLE `pattern_test_ip255`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
+ conn.prepareStatement(filtertype).execute();
+ conn.prepareStatement(ip).execute();
+ conn.prepareStatement(ip255).execute();
+ String typeSQL = "INSERT INTO `filtertype` (`id`,`expectedElements`, `targetFpp`, `pattern`) VALUES (?,?,?,?)";
+ int id = 1;
+ for (String pattern : patternList) {
+ PreparedStatement filterType = conn.prepareStatement(typeSQL);
+ filterType.setInt(1, id);
+ filterType.setInt(2, 1000);
+ filterType.setDouble(3, 0.01);
+ filterType.setString(4, pattern);
+ filterType.executeUpdate();
+ id++;
+ }
+ writeFilter("pattern_test_ip", 1);
+ writeFilter("pattern_test_ip255", 2);
+ });
+ }
+
+ @AfterAll
+ void tearDown() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("DROP ALL OBJECTS").execute(); //h2 clear database
+ conn.close();
+ });
+ }
+
+ @Test
+ void bloomNoMatchTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ String q = "";
+ String e = "\"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(0, walker.patternMatchTables().size());
+ }
+
+ @Test
+ void bloomNoMatchStreamQueryTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ String q = "";
+ String e = "\"streamdb\".\"stream\".\"directory\" like 'haproxy'";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, true));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(0, walker.patternMatchTables().size());
+ }
+
+ @Test
+ void bloomNoMatchStreamQueryWithoutFiltersTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true, true);
+ String q = "";
+ String e = "\"streamdb\".\"stream\".\"directory\" like 'haproxy'";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, true));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(0, walker.patternMatchTables().size());
+ }
+
+ @Test
+ void singleTablePatternMatchStreamQueryTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ String q = "";
+ String e = "\"streamdb\".\"stream\".\"directory\" like 'haproxy'";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, true));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(0, walker.patternMatchTables().size());
+ }
+
+ @Test
+ void singleTablePatternMatchTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ String q = "";
+ String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'\n" + " and (\n"
+ + " (\n" + " bloommatch(\n" + " (\n"
+ + " select \"term_0_pattern_test_ip\".\"filter\"\n"
+ + " from \"term_0_pattern_test_ip\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n"
+ + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + ")";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(1, walker.patternMatchTables().size());
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ }
+
+ @Test
+ void singleTablePatternMatchWithoutFiltersTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true, true);
+ String q = "";
+ String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(1, walker.patternMatchTables().size());
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ }
+
+ @Test
+ void twoTablePatternMatchTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ String q = "";
+ String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'\n" + " and (\n"
+ + " (\n" + " bloommatch(\n" + " (\n"
+ + " select \"term_0_pattern_test_ip\".\"filter\"\n"
+ + " from \"term_0_pattern_test_ip\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + " or (\n"
+ + " bloommatch(\n" + " (\n" + " select \"term_0_pattern_test_ip255\".\"filter\"\n"
+ + " from \"term_0_pattern_test_ip255\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip255\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip255\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is not null\n" + " )\n" + " or (\n"
+ + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + " )\n" + ")";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(2, walker.patternMatchTables().size());
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ }
+
+ @Test
+ void twoTablePatternMatchWithoutFiltersTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true, true);
+ String q = "";
+ String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + ")";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(2, walker.patternMatchTables().size());
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ }
+
+ @Test
+ void multipleSearchTermTestOneMatchTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ String q = "";
+ String e = "(\n" + " (\n" + " bloommatch(\n" + " (\n"
+ + " select \"term_1_pattern_test_ip\".\"filter\"\n" + " from \"term_1_pattern_test_ip\"\n"
+ + " where (\n" + " term_id = 1\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n"
+ + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(1, walker.patternMatchTables().size());
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ }
+
+ @Test
+ void multipleSearchTermTwoAndOneMatchTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ String q = "";
+ String e = "(\n" + " (\n" + " (\n" + " bloommatch(\n" + " (\n"
+ + " select \"term_0_pattern_test_ip\".\"filter\"\n"
+ + " from \"term_0_pattern_test_ip\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + " or (\n"
+ + " bloommatch(\n" + " (\n" + " select \"term_0_pattern_test_ip255\".\"filter\"\n"
+ + " from \"term_0_pattern_test_ip255\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip255\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip255\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is not null\n" + " )\n" + " or (\n"
+ + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + " )\n"
+ + " and (\n" + " (\n" + " bloommatch(\n" + " (\n"
+ + " select \"term_1_pattern_test_ip\".\"filter\"\n"
+ + " from \"term_1_pattern_test_ip\"\n" + " where (\n" + " term_id = 1\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n"
+ + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + ")";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(2, walker.patternMatchTables().size());
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ }
+
+ @Test
+ void multipleSearchTermTwoAndOneMatchWithoutFiltersTest() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true, true);
+ String q = "";
+ String e = "(\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ Assertions.assertEquals(e, cond.toString());
+ Assertions.assertEquals(2, walker.patternMatchTables().size());
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ Assertions
+ .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ }
+
+ private void writeFilter(String tableName, int filterId) {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ String sql = "INSERT INTO `" + tableName + "` (`partition_id`, `filter_type_id`, `filter`) "
+ + "VALUES (?, (SELECT `id` FROM `filtertype` WHERE id=?), ?)";
+ PreparedStatement stmt = conn.prepareStatement(sql);
+ BloomFilter filter = BloomFilter.create(1000, 0.01);
+ final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream();
+ Assertions.assertDoesNotThrow(() -> {
+ filter.writeTo(filterBAOS);
+ filterBAOS.close();
+ });
+ stmt.setInt(1, 1);
+ stmt.setInt(2, filterId);
+ stmt.setBytes(3, filterBAOS.toByteArray());
+ stmt.executeUpdate();
+ });
+ }
+}