Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FederatedQueryPlanner #2216

Merged
merged 75 commits into from
Sep 9, 2024
Merged
Changes from 1 commit
Commits
Show all changes
75 commits
Select commit Hold shift + click to select a range
cfefd87
Enrich 'i' and 'ri' rows in metadata table with event date
lbschanno Sep 7, 2023
0e4f806
Merge branch 'integration' into task/datedIndexMetadata
ivakegg Sep 29, 2023
f2aa20b
Merge branch 'integration' into task/datedIndexMetadata
ivakegg Oct 4, 2023
ab9ee4c
Merge branch 'integration' into task/datedIndexMetadata
ivakegg Oct 17, 2023
43aeae8
Merge branch 'integration' into task/datedIndexMetadata
lbschanno Nov 7, 2023
83014c1
Merge branch 'integration' into task/datedIndexMetadata
ivakegg Nov 24, 2023
ab14fe2
Add counts to 'i' and 'ri' rows
lbschanno Dec 19, 2023
9aad9f6
Merge branch 'integration' into task/datedIndexMetadata
lbschanno Jan 10, 2024
da6ee69
Initial federated query planner implementation
lbschanno Oct 25, 2023
b40201b
code formatting
lbschanno Jan 12, 2024
8ca62b3
Fixed issues with FederatedQueryIterable
lbschanno Jan 13, 2024
59d3be9
Fix test failures
lbschanno Jan 13, 2024
bde374d
Fix failing tests
lbschanno Jan 13, 2024
461a526
Additional test fixes
lbschanno Jan 13, 2024
7784b4c
pr feedback
lbschanno Jan 18, 2024
b288196
Use new MetadataHelper function version
lbschanno Jan 23, 2024
c34a543
Extract fields to filter index holes
lbschanno Jan 24, 2024
e0ef160
Correct logic for determining sub date ranges
lbschanno Jan 24, 2024
9ea3a84
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Jan 24, 2024
7052bcb
Remove unnecessary check
lbschanno Jan 24, 2024
d44e0f0
code formatting
lbschanno Jan 24, 2024
fca2fff
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Jan 27, 2024
e341c72
Add check for null query model
lbschanno Jan 27, 2024
8779a99
Limit config arg to function scope
lbschanno Jan 29, 2024
a875a20
Update metadata-utils submodule commit
lbschanno Jan 29, 2024
cd20ca5
code formatting
lbschanno Jan 29, 2024
56e3bad
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Feb 6, 2024
39ecec4
Fix failing tests
lbschanno Feb 6, 2024
906f3ee
Additional test fixes
lbschanno Feb 6, 2024
9478a62
Ensure all original tests pass
lbschanno Feb 6, 2024
1bf9201
Add federated planner tests and chained schedulers
lbschanno Feb 27, 2024
941607f
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Feb 27, 2024
c019c78
pr feedback
lbschanno Feb 29, 2024
9c72dab
metadata-utils 3.0.3 tag
ivakegg Mar 6, 2024
e5af693
Fixed the index hole data ingest to set appropriate time stamps on th…
ivakegg Mar 6, 2024
0842cec
Merge branch 'integration' into task/federatedQueryPlanner
ivakegg Mar 6, 2024
d774f6f
Updated applyModel to use the passed in script
ivakegg Mar 6, 2024
3da641d
Remove unneeded changes
lbschanno Mar 6, 2024
235dad8
Make FederatedQueryPlanner the default
lbschanno Mar 7, 2024
b63b24c
Restore original log4j.properties
lbschanno Mar 8, 2024
0c4ccff
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Mar 8, 2024
5515fdc
code formatting
lbschanno Mar 8, 2024
fede1a0
Fix QueryPlanTest
lbschanno Mar 9, 2024
6ed7a39
Updated to test with teardown
ivakegg Mar 9, 2024
1d506e3
Test debugging edits
lbschanno Mar 11, 2024
0578bc2
Updated formatting
ivakegg Mar 13, 2024
e9a76e6
Concatenate sub-plans
lbschanno Mar 13, 2024
195dabe
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Mar 13, 2024
d6354ce
Make FederatedQueryPlanner implement Cloneable
lbschanno Mar 14, 2024
c2a57f0
code formatting
lbschanno Mar 14, 2024
6e1f131
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Jun 3, 2024
a2da80c
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Jun 11, 2024
49c4bc6
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Jun 13, 2024
53a3ea8
Merge branch 'integration' into task/federatedQueryPlanner
ivakegg Jun 25, 2024
cbe8d87
Merge remote-tracking branch 'origin/integration' into task/federated…
ivakegg Jul 1, 2024
74c9db8
* Updated with metadata-utils 4.0.5 (index markers and avoid non-inde…
ivakegg Jul 2, 2024
6608e5f
Merge branch 'integration' into task/federatedQueryPlanner
ivakegg Jul 2, 2024
390ae48
* Allow subclasses of ShardQueryConfiguration
ivakegg Jul 5, 2024
5b52f8b
Merge remote-tracking branch 'origin/integration' into task/federated…
ivakegg Jul 5, 2024
5516f73
Merge branch 'integration' into task/federatedQueryPlanner
ivakegg Jul 8, 2024
8370ea2
Updated to throw a NoResultsException for am empty query.
ivakegg Jul 8, 2024
a9cb2fa
Merge branch 'task/federatedQueryPlanner' of github.com:NationalSecur…
ivakegg Jul 8, 2024
fece27b
import reorg
ivakegg Jul 8, 2024
9aca68a
Updated to avoid expanding unfielded if disabled, and to assume no in…
ivakegg Jul 8, 2024
08ac120
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Aug 22, 2024
a3ba76d
Add tests for default query planner with ne and not-eq
lbschanno Aug 26, 2024
cc1873d
Revert changes to test data format
lbschanno Aug 26, 2024
591f252
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Aug 26, 2024
c09cee1
Revert changes to log4j.properties
lbschanno Aug 26, 2024
0ed1f67
Ensure query plan updated after any exception type
lbschanno Aug 26, 2024
accc7bc
Merge branch 'integration' into task/federatedQueryPlanner
lbschanno Aug 27, 2024
fc0760b
Revert all changes to test data format
lbschanno Aug 27, 2024
c62e331
Merge branch 'integration' into task/federatedQueryPlanner
ivakegg Aug 29, 2024
2a97ede
Merge branch 'integration' into task/federatedQueryPlanner
ivakegg Sep 4, 2024
12cd61e
Merge branch 'integration' into task/federatedQueryPlanner
hgklohr Sep 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Test debugging edits
lbschanno committed Mar 11, 2024
commit 1d506e3febf6947b8d776b3d810572a5b76a3167
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package datawave.query.config;

import datawave.query.CloseableIterable;
import datawave.webservice.query.configuration.QueryData;

import java.util.ArrayList;
import java.util.List;

public class FederatedQueryConfiguration {

private List<ShardQueryConfiguration> configs = new ArrayList<>();

private List<CloseableIterable<QueryData>> queryDatas = new ArrayList<>();

public List<ShardQueryConfiguration> getConfigs() {
return configs;
}

public void addConfig(ShardQueryConfiguration configuration) {
this.configs.add(configuration);
}

public List<CloseableIterable<QueryData>> getQueryDatas() {
return queryDatas;
}

public void addQueryData(CloseableIterable<QueryData> iterator) {
this.queryDatas.add(iterator);
}
}
Original file line number Diff line number Diff line change
@@ -7,12 +7,14 @@
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;

import datawave.query.config.FederatedQueryConfiguration;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
@@ -60,7 +62,8 @@ public class FederatedQueryPlanner extends QueryPlanner {

private DefaultQueryPlanner queryPlanner;
private String plannedScript;

private FederatedQueryConfiguration federatedConfig;

/**
* Return a new {@link FederatedQueryPlanner} instance with a new {@link DefaultQueryPlanner} inner query planner instance.
*/
@@ -276,7 +279,8 @@ public CloseableIterable<QueryData> process(GenericQueryConfiguration genericCon

// Reset the planned script.
this.plannedScript = null;

this.federatedConfig = new FederatedQueryConfiguration();

log.debug("Federated query: " + query);

ShardQueryConfiguration originalConfig = (ShardQueryConfiguration) genericConfig;
@@ -323,6 +327,8 @@ public CloseableIterable<QueryData> process(GenericQueryConfiguration genericCon
try {
CloseableIterable<QueryData> queryData = subPlan.process(configCopy, query, settings, scannerFactory);
results.addIterable(queryData);
federatedConfig.addConfig(configCopy);
federatedConfig.addQueryData(queryData);
} catch (DatawaveQueryException|DatawaveFatalQueryException e) {
log.warn("Exception occured when processing sub-plan [" + totalProcessed + " of " + dateRanges.size() + "] against date range (" + subStartDate
+ "-" + subEndDate + ")", e);
@@ -358,7 +364,32 @@ public CloseableIterable<QueryData> process(GenericQueryConfiguration genericCon
// Copy over any changes from the first sub-config to the original config. This will not affect the start date, end date, or timers of the original
// config.
copySubConfigPropertiesToOriginal(originalConfig, firstConfigCopy);


// Uncomment the following debug block to see what query strings and query datas resulted from the sub-queries. Note that this will result in an
// exception being thrown down the line when iterator() is called again on the query data iterables, so this should be uncommented only for debugging
// purposes.
// Debug block start
/*log.debug("Federated query results:");
List<ShardQueryConfiguration> configs = federatedConfig.getConfigs();
List<CloseableIterable<QueryData>> queryDatas = federatedConfig.getQueryDatas();
for (int i = 0; i < totalProcessed; i++) {
ShardQueryConfiguration config = configs.get(i);
log.debug("Sub-query " + i + " over " + dateFormat.format(config.getBeginDate()) + "-" + dateFormat.format(config.getEndDate()));
log.debug("Query String: " + config.getQueryString());
Iterator<QueryData> iter = queryDatas.get(i).iterator();
int queryDataCount = 0;
if (iter.hasNext()) {
while (iter.hasNext()) {
log.debug("Query Data " + queryDataCount + ": " + iter.next());
queryDataCount++;
}
} else {
log.debug("Empty query data iterable returned");
}
}*/
// Debug block end


// Return the collected results.
return results;
}
Original file line number Diff line number Diff line change
@@ -8,6 +8,9 @@
import java.util.ArrayList;
import java.util.Collection;

import com.fasterxml.jackson.jaxrs.json.annotation.JSONP;
import datawave.query.planner.DefaultQueryPlanner;
import datawave.query.planner.FederatedQueryPlanner;
import org.apache.log4j.Logger;
import org.junit.Assert;
import org.junit.BeforeClass;
@@ -52,8 +55,11 @@ public MaxExpansionIndexOnlyQueryTest() {
// test cases

@Test
public void testMaxValueRegexIndexOnly() throws Exception {
log.info("------ testMaxValueRegexIndexOnly ------");
public void testMaxValueRegexIndexOnly_defaultQueryPlanner() throws Exception {
log.info("------ testMaxValueRegexIndexOnly : " + DefaultQueryPlanner.class.getSimpleName() + " ------");

this.logic.setQueryPlanner(new DefaultQueryPlanner());

// set regex to match multiple fields
String city = EQ_OP + "'a-1'";
String code = RE_OP + "'b.*'";
@@ -76,10 +82,43 @@ public void testMaxValueRegexIndexOnly() throws Exception {
runTest(query, query);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 1);
}



@Test
public void testMaxValueRegexIndexOnly_federatedQueryPlanner() throws Exception {
log.info("------ testMaxValueRegexIndexOnly : " + FederatedQueryPlanner.class.getSimpleName() + " ------");

this.logic.setQueryPlanner(new FederatedQueryPlanner());

// set regex to match multiple fields
String city = EQ_OP + "'a-1'";
String code = RE_OP + "'b.*'";

String query = CitiesDataType.CityField.CITY.name() + city + AND_OP + CitiesDataType.CityField.STATE.name() + code;

this.logic.setMaxValueExpansionThreshold(20);
runTest(query, query);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 0);

this.logic.setMaxValueExpansionThreshold(2);
try {
runTest(query, query);
Assert.fail("exception expected");
} catch (DatawaveFatalQueryException e) {
// expected
}

ivaratorConfig();
runTest(query, query);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 1);
}

@Test
public void testMaxValueAnyField() throws Exception {
log.info("------ testMaxValueAnyField ------");
public void testMaxValueAnyField_defaultQueryPlanner() throws Exception {
log.info("------ testMaxValueAnyField : " + DefaultQueryPlanner.class.getSimpleName() + " ------");

this.logic.setQueryPlanner(new DefaultQueryPlanner());

String regexT = RE_OP + "'b-.*'";
String regexA = RE_OP + "'a-.*'";
String query = Constants.ANY_FIELD + regexT + AND_OP + Constants.ANY_FIELD + regexA;
@@ -108,10 +147,48 @@ public void testMaxValueAnyField() throws Exception {
runTest(query, expect);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 2);
}

@Test
public void testMaxValueAnyField_federatedQueryPlanner() throws Exception {
log.info("------ testMaxValueAnyField : " + FederatedQueryPlanner.class.getSimpleName() + " ------");

this.logic.setQueryPlanner(new FederatedQueryPlanner());

String regexT = RE_OP + "'b-.*'";
String regexA = RE_OP + "'a-.*'";
String query = Constants.ANY_FIELD + regexT + AND_OP + Constants.ANY_FIELD + regexA;
String anyT = this.dataManager.convertAnyField(regexT);
String anyA = this.dataManager.convertAnyField(regexA);
String expect = anyT + AND_OP + anyA;

this.logic.setMaxValueExpansionThreshold(10);
runTest(query, expect);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 0);

this.logic.setMaxValueExpansionThreshold(2);
try {
runTest(query, expect);
Assert.fail("exception expected");
} catch (RuntimeException re) {
// expected
}

ivaratorConfig();
runTest(query, expect);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 1);

this.logic.setMaxValueExpansionThreshold(1);
ivaratorConfig();
runTest(query, expect);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 2);
}

@Test
public void testMaxValueNegAnyField() throws Exception {
log.info("------ testMaxValueNegAnyField ------");
public void testMaxValueNegAnyField_defaultQueryPlanner() throws Exception {
log.info("------ testMaxValueNegAnyField : " + DefaultQueryPlanner.class.getSimpleName() + " ------");

this.logic.setQueryPlanner(new DefaultQueryPlanner());

String regexPhrase = RE_OP + "'a.*'";
String country = "'b-StaTe'";
String query = Constants.ANY_FIELD + EQ_OP + country + AND_OP + NOT_OP + "(" + Constants.ANY_FIELD + regexPhrase + ")";
@@ -133,6 +210,34 @@ public void testMaxValueNegAnyField() throws Exception {
runTest(query, expect);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 1);
}

@Test
public void testMaxValueNegAnyField_federatedQueryPlanner() throws Exception {
log.info("------ testMaxValueNegAnyField : " + FederatedQueryPlanner.class.getSimpleName() + " ------");

this.logic.setQueryPlanner(new FederatedQueryPlanner());

String regexPhrase = RE_OP + "'a.*'";
String country = "'b-StaTe'";
String query = Constants.ANY_FIELD + EQ_OP + country + AND_OP + NOT_OP + "(" + Constants.ANY_FIELD + regexPhrase + ")";
String expect = CitiesDataType.CityField.STATE.name() + EQ_OP + "'bi-s'";

this.logic.setMaxValueExpansionThreshold(10);
runTest(query, expect);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 0);

this.logic.setMaxValueExpansionThreshold(1);
try {
runTest(query, expect);
Assert.fail("exception expected");
} catch (FullTableScansDisallowedException e) {
// expected
}

ivaratorConfig();
runTest(query, expect);
parsePlan(VALUE_THRESHOLD_JEXL_NODE, 1);
}

// ============================================
// implemented abstract methods
Loading