diff --git a/docs/configuration/queries.md b/docs/configuration/queries.md index ffb3bf90a..25758d0b6 100644 --- a/docs/configuration/queries.md +++ b/docs/configuration/queries.md @@ -130,16 +130,21 @@ The results may look like the following: ### Configuration The `template` attribute has the following properties: -| property | required | default | description | example | -|----------|----------|---------|---------------------------------------------------------------------|-----------------------------| -| endpoint | yes | | The endpoint to query. | `http://dbpedia.org/sparql` | -| limit | no | `2000` | The maximum number of instances per query template. | `100` | -| save | no | `true` | If set to `true`, query instances will be saved in a separate file. | `false` | +| property | required | default | description | example | +|-------------------|----------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------| +| endpoint | yes | | The endpoint to query. | `http://dbpedia.org/sparql` | +| limit | no | `2000` | The maximum number of instances per query template. | `100` | +| save | no | `true` | If set to `true`, query instances will be saved in a separate file. | `false` | +| individualResults | no | `false` | If set to `true`, the results of each individual template instance will be reported, otherwise if set to `false` their results will be subsumed for the query template. | `true` | If the `save` attribute is set to `true`, the instances will be saved in a separate file in the same directory as the query templates. If the query templates are stored in a folder, the instances will be saved in the parent directory. +If the `individualResults` attribute is set to `false`, +the results of the instances will be subsumed for the query template. +The query template will then be considered as an actual query in the results. + Example of query configuration with query templates: ```yaml queries: @@ -149,4 +154,5 @@ queries: endpoint: "http://dbpedia.org/sparql" limit: 100 save: true + individualResults: true ``` diff --git a/example-suite.yml b/example-suite.yml index c4b3d2803..50d65cef2 100644 --- a/example-suite.yml +++ b/example-suite.yml @@ -75,10 +75,11 @@ tasks: requestType: post query queries: path: "./example/query_pattern.txt" - pattern: + template: endpoint: "https://dbpedia.org/sparql" limit: 1000 save: false + individualResults: false timeout: 180s completionTarget: duration: 1000s diff --git a/graalvm/suite.yml b/graalvm/suite.yml index 75c9bd4c7..5ae514bd3 100644 --- a/graalvm/suite.yml +++ b/graalvm/suite.yml @@ -6,7 +6,7 @@ connections: - name: "Blazegraph" version: "1.1.1" dataset: "DatasetName" - endpoint: "http://localhost:9999/blazegraph/sparql" + endpoint: "https://query.wikidata.org/" authentication: user: "user" password: "test" @@ -60,13 +60,14 @@ tasks: seed: 123 lang: "SPARQL" template: - endpoint: "http://dbpedia.org/sparql" + endpoint: "https://dbpedia.org/sparql" limit: 1 save: false + individualResults: false timeout: 2s connection: Blazegraph completionTarget: - duration: 1s + duration: 0.5s acceptHeader: "application/sparql-results+json" requestType: get query parseResults: true @@ -78,7 +79,7 @@ tasks: timeout: 3m connection: Blazegraph completionTarget: - duration: 1s + duration: 0.5s requestType: get query acceptHeader: "application/sparql-results+json" - number: 1 diff --git a/schema/iguana-schema.json b/schema/iguana-schema.json index 0ad54c1b7..cf1539f96 100644 --- a/schema/iguana-schema.json +++ b/schema/iguana-schema.json @@ -351,6 +351,9 @@ }, "save": { "type": "boolean" + }, + "individualResults": { + "type": "boolean" } }, "required": [ diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java index 8582f2020..72ca4bf5e 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java @@ -29,7 +29,7 @@ public AggregatedExecutionStatistics() { public Model createMetricModel(List workers, List[][] data, IRES.Factory iresFactory) { Model m = ModelFactory.createDefaultModel(); for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentedQueryCount(); i++) { Resource queryRes = iresFactory.getWorkerQueryResource(worker, i); m.add(createAggregatedModel(data[(int) worker.getWorkerID()][i], queryRes)); } diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java index c6e1bf95a..ce45b63d7 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java @@ -26,7 +26,7 @@ public EachExecutionStatistic() { public Model createMetricModel(List workers, List[][] data, IRES.Factory iresFactory) { Model m = ModelFactory.createDefaultModel(); for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentedQueryCount(); i++) { Resource workerQueryResource = iresFactory.getWorkerQueryResource(worker, i); Resource queryRes = IRES.getResource(worker.config().queries().getQueryId(i)); BigInteger run = BigInteger.ONE; diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java index ab60be12a..8054cad66 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java @@ -28,7 +28,7 @@ public Number calculateTaskMetric(List workers, List[] data) { BigDecimal successes = BigDecimal.ZERO; - BigDecimal noq = BigDecimal.valueOf(worker.queries().getQueryCount()); + BigDecimal noq = BigDecimal.valueOf(worker.queries().getExecutableQueryCount()); Duration totalTime = Duration.ZERO; for (List datum : data) { for (HttpWorker.ExecutionStats exec : datum) { diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java index 02858cb8f..70ecce540 100644 --- a/src/main/java/org/aksw/iguana/cc/query/QueryData.java +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -2,6 +2,7 @@ import org.apache.jena.update.UpdateFactory; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; @@ -12,9 +13,23 @@ * At the moment, it only stores if the query is an update query or not. * * @param queryId The id of the query - * @param update If the query is an update query */ -public record QueryData(int queryId, boolean update) { +public record QueryData(int queryId, QueryType type, Integer templateId) { + public enum QueryType { + DEFAULT, + UPDATE, + TEMPLATE, + TEMPLATE_INSTANCE + } + + /** + * Generates a list of QueryData objects for a collection of queries. + * The method uses the Jena library to check if the query is an update query. + * It only checks if the query is an update query or not and sets their index in the order they were given. + * + * @param queries collection of input streams of queries + * @return list of QueryData objects + */ public static List generate(Collection queries) { final var queryData = new ArrayList(); int i = 0; @@ -25,8 +40,31 @@ public static List generate(Collection queries) { } catch (Exception e) { update = false; } - queryData.add(new QueryData(i++, update)); + queryData.add(new QueryData(i++, update ? QueryType.UPDATE : QueryType.DEFAULT, null)); + try { + query.close(); + } catch (IOException ignored) {} } return queryData; } + + /** + * Checks if the given query is an update query. + * The method uses the Jena library to check if the query is an update query. + * + * @param query input stream of the query + * @return true if the query is an update query, false otherwise + */ + public static boolean checkIfUpdate(InputStream query) { + try { + UpdateFactory.read(query); // Throws an exception if the query is not an update query + return true; + } catch (Exception e) { + return false; + } + } + + public boolean update() { + return type == QueryType.UPDATE; + } } diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 3bf41a02e..e01fb89ae 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -6,7 +6,6 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import org.aksw.iguana.cc.query.QueryData; -import org.aksw.iguana.cc.query.list.impl.StringListQueryList; import org.aksw.iguana.cc.query.selector.QuerySelector; import org.aksw.iguana.cc.query.selector.impl.LinearQuerySelector; import org.aksw.iguana.cc.query.selector.impl.RandomQuerySelector; @@ -17,23 +16,15 @@ import org.aksw.iguana.cc.query.source.impl.FileLineQuerySource; import org.aksw.iguana.cc.query.source.impl.FileSeparatorQuerySource; import org.aksw.iguana.cc.query.source.impl.FolderQuerySource; -import org.apache.jena.query.*; -import org.apache.jena.sparql.exec.http.QueryExecutionHTTP; -import org.apache.jena.sparql.exec.http.QueryExecutionHTTPBuilder; -import org.apache.jena.sparql.service.single.ServiceExecutor; -import org.apache.jena.sparql.service.single.ServiceExecutorHttp; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; import java.net.URI; -import java.nio.file.Files; import java.nio.file.Path; import java.util.*; import java.util.function.Supplier; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -141,18 +132,42 @@ public String value() { } } - public record Template(@JsonProperty(required = true) URI endpoint, Long limit, Boolean save) { - public Template(URI endpoint, Long limit, Boolean save) { + public record Template(@JsonProperty(required = true) URI endpoint, Long limit, Boolean save, Boolean individualResults) { + public Template(URI endpoint, Long limit, Boolean save, Boolean individualResults) { this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; + this.individualResults = individualResults != null && individualResults; } } } - public record QueryStringWrapper(int index, String query, boolean update) {} + /** + * Wrapper for the next query that will be executed. + * The wrapper contains the query as a string. + * The result id is only set if the query is a template instance. + * They are used to aggregate the results of multiple queries by using the same id. + * + * @param index the index of the query + * @param query the query string + * @param update whether the query is an update query + * @param resultId the query id that should be used inside the result + */ + public record QueryStringWrapper(int index, String query, boolean update, Integer resultId) {} - public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier, boolean update) {} + /** + * Wrapper for the next query that will be executed. + * The wrapper contains the query as an input stream supplier, that generates an input stream with the query. + * The result id is only set if the query is a template instance. + * They are used to aggregate the results of multiple queries by using the same id. + * + * @param index the index of the query + * @param cached whether the query is cached in memory + * @param queryInputStreamSupplier the supplier that generates the input stream with the query + * @param update whether the query is an update query + * @param resultId the query id that should be used inside the result + */ + public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier, boolean update, Integer resultId) {} protected static final Logger LOGGER = LoggerFactory.getLogger(QueryHandler.class); @@ -161,7 +176,18 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; + protected List queryData; + + // stores the number of queries that can be executed + private int executableQueryCount = 0; + + // Stores the number of queries that are represented in the results. + // If individual results are disabled for query templates, + // the template instances will represent the template by using its id. + // Otherwise, each template instance will represent itself by using its own unique id, and therefore + // query templates won't be represented in the results because they can't be executed otherwise. + // 'executableQueryCount == representedQueryCount' is always true when templates are not used. + private int representedQueryCount = 0; private int workerCount = 0; // give every worker inside the same worker config an offset seed private int totalWorkerCount = 0; @@ -186,61 +212,32 @@ public QueryHandler(Config config) throws IOException { // initialize queryList based on the given configuration if (config.template() != null) { - queryList = initializeTemplateQueryHandler(querySource); + final var templateHandler = new QueryTemplateHandler(config.template); + queryList = templateHandler.initializeTemplateQueryHandler(querySource); + queryData = templateHandler.getQueryData(); + executableQueryCount = templateHandler.getExecutableQueryCount(); + representedQueryCount = templateHandler.getRepresentedQueryCount(); } else { queryList = (config.caching()) ? new FileCachingQueryList(querySource) : new FileReadingQueryList(querySource); + queryData = QueryData.generate(IntStream.range(0, queryList.size()).mapToObj(i -> { + try { + return queryList.getQueryStream(i); + } catch (IOException e) { + throw new RuntimeException("Couldn't read query stream", e); + } + }).collect(Collectors.toList())); + executableQueryCount = queryList.size(); + representedQueryCount = queryList.size(); } this.hashCode = queryList.hashCode(); - this.queryData = QueryData.generate(IntStream.range(0, queryList.size()).mapToObj(i -> { - try { - return queryList.getQueryStream(i); - } catch (IOException e) { - throw new RuntimeException("Couldn't read query stream", e); - } - }).collect(Collectors.toList())); } public void setTotalWorkerCount(int workers) { this.totalWorkerCount = workers; } - private QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { - QuerySource querySource = templateSource; - final var originalPath = templateSource.getPath(); - final var postfix = String.format("_instances_f%s_l%s.txt", - Integer.toUnsignedString(this.config.template.endpoint.hashCode()), Integer.toUnsignedString((int) this.config.template.limit.longValue())); - final Path instancePath = Files.isDirectory(originalPath) ? - originalPath.resolveSibling(originalPath.getFileName() + postfix) : // if the source of the query templates is a folder, the instances will be saved in a file with the same name as the folder - originalPath.resolveSibling(originalPath.getFileName().toString().split("\\.")[0] + postfix); // if the source of the query templates is a file, the instances will be saved in a file with the same name as the file - if (Files.exists(instancePath)) { - LOGGER.info("Already existing query template instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); - querySource = createQuerySource(instancePath); // if the instances already exist, use them - } else { - final List instances = instantiateTemplateQueries(querySource, config.template); - if (config.template.save) { - // save the instances to a file - Files.createFile(instancePath); - try (var writer = Files.newBufferedWriter(instancePath)) { - for (String instance : instances) { - writer.write(instance); - writer.newLine(); - } - } - // create a new query source based on the new instance file - querySource = createQuerySource(instancePath); - } else { - // query source isn't necessary, because queries aren't stored in a file, - // directly return a list of the instances instead - return new StringListQueryList(instances); - } - } - return (config.caching()) ? - new FileCachingQueryList(querySource) : // if caching is enabled, cache the instances - new FileReadingQueryList(querySource); // if caching is disabled, read the instances from the file every time - } - /** * Creates a QuerySource based on the given path and the format in the configuration. * @@ -265,20 +262,43 @@ public QuerySelector getQuerySelectorInstance() { throw new IllegalStateException("Unknown query selection order: " + config.order()); } + public QuerySelector getQuerySelectorInstance(Config.Order type) { + switch (type) { + case LINEAR -> { return new LinearQuerySelector(queryList.size()); } + case RANDOM -> { return new RandomQuerySelector(queryList.size(), config.seed() + workerCount++); } + } + + throw new IllegalStateException("Unknown query selection order: " + type); + } + public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { - final var queryIndex = querySelector.getNextIndex(); - return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex), queryData.get(queryIndex).update()); + final var queryIndex = getNextQueryIndex(querySelector); + return new QueryStringWrapper(queryData.get(queryIndex[0]).queryId(), queryList.getQuery(queryIndex[0]), queryData.get(queryIndex[0]).update(), queryIndex[1]); } public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { - final var queryIndex = querySelector.getNextIndex(); - return new QueryStreamWrapper(queryIndex, config.caching(), () -> { + final var queryIndex = getNextQueryIndex(querySelector); + return new QueryStreamWrapper(queryData.get(queryIndex[0]).queryId(), config.caching(), () -> { try { - return this.queryList.getQueryStream(queryIndex); + return this.queryList.getQueryStream(queryIndex[0]); } catch (IOException e) { throw new RuntimeException(e); } - }, queryData.get(queryIndex).update()); + }, queryData.get(queryIndex[0]).update(), queryIndex[1]); + } + + private Integer[] getNextQueryIndex(QuerySelector querySelector) { + int queryIndex; + do { + queryIndex = querySelector.getNextIndex(); + } while (queryData.get(queryIndex).type() == QueryData.QueryType.TEMPLATE); // query templates can't be executed directly + + // if individual results are disabled, the query instance will represent the template, by using its id + Integer resultId = null; + if (queryData.get(queryIndex).type() == QueryData.QueryType.TEMPLATE_INSTANCE && !config.template().individualResults) { + resultId = queryData.get(queryIndex).templateId(); + } + return new Integer[]{ queryIndex, resultId }; } @Override @@ -286,8 +306,12 @@ public int hashCode() { return hashCode; } - public int getQueryCount() { - return this.queryList.size(); + public int getExecutableQueryCount() { + return executableQueryCount; + } + + public int getRepresentedQueryCount() { + return representedQueryCount; } public String getQueryId(int i) { @@ -301,8 +325,8 @@ public String getQueryId(int i) { * @return String[] of query ids */ public String[] getAllQueryIds() { - String[] out = new String[queryList.size()]; - for (int i = 0; i < queryList.size(); i++) { + String[] out = new String[getRepresentedQueryCount()]; + for (int i = 0; i < getRepresentedQueryCount(); i++) { out[i] = getQueryId(i); } return out; @@ -318,83 +342,4 @@ public Config getConfig() { } - /** - * Query templates are queries containing placeholders for some terms. - * Replacement candidates are identified by querying a given endpoint. - * This is done in a way that the resulting queries will yield results against endpoints with the same data. - * The placeholders are written in the form of %%var[0-9]+%%, where [0-9]+ - * represents any number. - *

- * Exemplary template:
- * SELECT * WHERE {?s %%var1%% ?o . ?o <http://exa.com> %%var2%%}
- * This template will then be converted to:
- * SELECT ?var1 ?var2 {?s ?var1 ?o . ?o <http://exa.com> ?var2}
- * and will request query solutions from the given sparql endpoint (e.g DBpedia).
- * The solutions will then be instantiated into the template. - * The result may look like the following:
- * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "123"}
- * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
- * SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
- */ - private static List instantiateTemplateQueries(QuerySource querySource, Config.Template config) throws IOException { - // charset for generating random varia ble names - final String charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - final Random random = new Random(); - - final var templateQueries = new FileCachingQueryList(querySource); - final Pattern template = Pattern.compile("%%[a-zA-Z0-9_]+%%"); - final var instances = new ArrayList(); - for (int i = 0; i < templateQueries.size(); i++) { - // replace all variables in the query template with SPARQL variables - // and store the variable names - var templateQueryString = templateQueries.getQuery(i); - final Matcher matcher = template.matcher(templateQueryString); - final var variables = new LinkedHashMap(); // a set, that preserves insertion order - while (matcher.find()) { - final var match = matcher.group(); - if (variables.containsKey(match)) continue; - String variableName = match.replaceAll("%%", ""); - while (templateQueryString.contains("?" + variableName) || templateQueryString.contains("$" + variableName)) { // generate random variable name with 20 characters until it is unique - variableName = IntStream.range(0, 20).mapToObj(m -> String.valueOf(charset.charAt(random.nextInt(charset.length())))).collect(Collectors.joining()); - } - final var variable = "?" + variableName; - variables.put(match, variable); - templateQueryString = templateQueryString.replaceAll(match, variable); - } - - // if no placeholders are found, the query is already a valid SPARQL query - if (variables.isEmpty()) { - instances.add(templateQueryString); - continue; - } - - // build SELECT query for finding bindings for the variables - final var templateQuery = QueryFactory.create(templateQueryString); - final var whereClause = "WHERE " + templateQuery.getQueryPattern(); - final var selectQueryString = new ParameterizedSparqlString(); - selectQueryString.setCommandText("SELECT DISTINCT " + String.join(" ", variables.values())); - selectQueryString.append(" " + whereClause); - selectQueryString.append(" LIMIT " + config.limit()); - selectQueryString.setNsPrefixes(templateQuery.getPrefixMapping()); - - // send request to SPARQL endpoint and instantiate the template based on results - try (QueryExecution exec = QueryExecutionHTTP.service(config.endpoint().toString(), selectQueryString.asQuery())) { - ResultSet resultSet = exec.execSelect(); - if (!resultSet.hasNext()) { - LOGGER.warn("No results for query template: {}", templateQueryString); - } - int count = 0; - while (resultSet.hasNext() && count++ < config.limit()) { - var instance = new ParameterizedSparqlString(templateQueryString); - QuerySolution solution = resultSet.next(); - for (String var : resultSet.getResultVars()) { - instance.clearParam(var); - instance.setParam(var, solution.get(var)); - } - instances.add(instance.toString()); - } - } - } - return instances; - } } diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryTemplateHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryTemplateHandler.java new file mode 100644 index 000000000..29b2811a6 --- /dev/null +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryTemplateHandler.java @@ -0,0 +1,301 @@ +package org.aksw.iguana.cc.query.handler; + +import org.aksw.iguana.cc.query.QueryData; +import org.aksw.iguana.cc.query.list.QueryList; +import org.aksw.iguana.cc.query.list.impl.FileCachingQueryList; +import org.aksw.iguana.cc.query.list.impl.StringListQueryList; +import org.aksw.iguana.cc.query.source.QuerySource; +import org.apache.jena.query.*; +import org.apache.jena.sparql.exec.http.QueryExecutionHTTP; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Random; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +/** + * This class is responsible for handling query templates. + * Query templates are queries containing placeholders for some terms. + * Replacement candidates are identified by querying a given endpoint. + * The placeholders are written in the form of %%var[0-9]+%%, where [0-9]+ + * represents any number. + *

+ * Exemplary template:
+ * SELECT * WHERE {?s %%var1%% ?o . ?o <http://exa.com> %%var2%%}
+ * This template will then be converted to:
+ * SELECT ?var1 ?var2 {?s ?var1 ?o . ?o <http://exa.com> ?var2}
+ * and will request query solutions from the given sparql endpoint (e.g DBpedia).
+ * The solutions will then be instantiated into the template. + * The result may look like the following:
+ * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "123"}
+ * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
+ * SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
+ * The template data that this class returns will contain a list of all queries, + * where the first queries are the original queries including the query templates. + * The query instances will be appended to the original queries. + * The queryData is later used to keep track of the queries, their types, ids, and relations. + */ +public class QueryTemplateHandler { + private record TemplateData(List queries, int templates, int[] indices, int[] instanceNumber, int instanceStart) {} + + private static final Logger LOGGER = LoggerFactory.getLogger(QueryTemplateHandler.class); + + private List queryData; + private int executableQueryCount = 0; // stores the number of queries that can be executed + private int representedQueryCount = 0; // stores the number of queries that are represented in the results + private final QueryHandler.Config.Template templateConfig; + + + public QueryTemplateHandler(QueryHandler.Config.Template templateConfig) { + queryData = new ArrayList<>(); + this.templateConfig = templateConfig; + } + + public QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { + final var originalPath = templateSource.getPath(); + final var postfix = String.format("_instances_f%s_l%s.txt", + Integer.toUnsignedString(templateConfig.endpoint().hashCode()), Integer.toUnsignedString((int) templateConfig.limit().longValue())); + final Path instancePath = Files.isDirectory(originalPath) ? + originalPath.resolveSibling(originalPath.getFileName() + postfix) : // if the source of the query templates is a folder, the instances will be saved in a file with the same name as the folder + originalPath.resolveSibling(originalPath.getFileName().toString().split("\\.")[0] + postfix); // if the source of the query templates is a file, the instances will be saved in a file with the same name as the file + TemplateData templateData; + + if (Files.exists(instancePath)) { + LOGGER.info("Already existing query template instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); + + // read in the template data + // the header contains the number of templates and the index (index doesn't count headers) of the first instance + // afterward for each template the index of the template and the number of instances are stored + String header; + try (var reader = Files.newBufferedReader(instancePath)) { + header = reader.readLine(); + Pattern digitRegex = Pattern.compile("\\d+"); + Matcher matcher = digitRegex.matcher(header); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + int templates = Integer.parseInt(matcher.group()); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + int instanceStart = Integer.parseInt(matcher.group()); + final var indices = new int[templates]; + final var instanceNumber = new int[templates]; + for (int i = 0; i < templates; i++) { + if (!matcher.find()) throw new IOException("Invalid instance file header"); + indices[i] = Integer.parseInt(matcher.group()); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + instanceNumber[i] = Integer.parseInt(matcher.group()); + } + templateData = new TemplateData(reader.lines().toList(), templates, indices, instanceNumber, instanceStart); + } + } else { + templateData = instantiateTemplateQueries(templateSource, templateConfig); + + if (templateConfig.save()) { + // save the instances to a file + Files.createFile(instancePath); + + try (var writer = Files.newBufferedWriter(instancePath)) { + // write header line + writer.write(String.format("templates: %d instances_start: %d ", templateData.templates, templateData.instanceStart)); + writer.write(String.format("%s", IntStream.range(0, templateData.templates) + .mapToObj(i -> "index: " + templateData.indices[i] + " instances_count: " + templateData.instanceNumber[i]) + .collect(Collectors.joining(" ")))); + writer.newLine(); + // write queries and instances + for (String instance : templateData.queries) { + writer.write(instance); + writer.newLine(); + } + } + } + } + + // Initialize queryData based on the template data. + // This means that every query is assigned a type (default, update, template, template instance) and + // an id (index), based on their type, position in the query file and the configuration. + // Because of the way the "StresstestResultProcessor" is currently implemented, the ids of the queries + // that are represented in the results need to be continuous and start at 0. + // In the case of "individualResults" turned on, + // every normal query and every template instance should be represented. + // Therefore, the ids of the templates have to be the last ones. + // Otherwise every normal query and every template should be represented. + // Template instances are located at the end of the query list. + // The queryData is later used to keep track of the queries, their types, ids, and relations. + int templateIndex = 0; // index of the next template + int index = 0; // index of the current query + int instanceId = 0; // id of the current instance for the current template + queryData = new ArrayList<>(); + for (var query : templateData.queries) { + // Once the template instances are being iterated, the template index is reset + // and reused to track of which query template the instances are being iterated. + if (index == templateData.instanceStart) templateIndex = 0; + + if (index >= templateData.instanceStart) { + // query is an instance of a template + + // if the instance id is equal to the number of instances for the current template, + // the next instances belong to the next template + if (instanceId++ == templateData.instanceNumber[templateIndex]) { + templateIndex++; + instanceId = 0; + } + + if (templateConfig.individualResults()) { + // In this case, the ids of the instances are shifted by the number of templates, + // because the templates received the last ids. + // This way, there are no gaps in the ids, + // and they can be correctly assigned to the results. + queryData.add(new QueryData(index++ - templateData.templates, QueryData.QueryType.TEMPLATE_INSTANCE, templateData.queries.size() - templateData.templates + templateIndex)); + } + else { + queryData.add(new QueryData(index++, QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex)); + } + } else if (templateIndex < templateData.templates && index == templateData.indices[templateIndex]) { + // query is a template + if (templateConfig.individualResults()) { + // Give the templates the last ids. + index++; + queryData.add(new QueryData(templateData.queries.size() - templateData.templates + templateIndex++, QueryData.QueryType.TEMPLATE, null)); + } else { + templateIndex++; + queryData.add(new QueryData(index++, QueryData.QueryType.TEMPLATE, null)); + } + } else { + // query is neither a template nor an instance + final var update = QueryData.checkIfUpdate(new ByteArrayInputStream(query.getBytes())); + if (templateConfig.individualResults()) { + // Fill the gaps caused by the templates. + queryData.add(new QueryData(index++ - templateIndex, update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null)); + } + else { + queryData.add(new QueryData(index++, update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null)); + } + } + + } + + // set the number of queries that can be executed and the number of queries + // that are represented in the results + this.executableQueryCount = templateData.queries.size() - templateData.templates; + this.representedQueryCount = templateConfig.individualResults() ? + templateData.queries.size() - templateData.templates : + templateData.instanceStart; + return new StringListQueryList(templateData.queries); + } + + + /** + * Query templates are queries containing placeholders for some terms. + * Replacement candidates are identified by querying a given endpoint. + * This is done in a way that the resulting queries will yield results against endpoints with the same data. + * The placeholders are written in the form of %%var[0-9]+%%, where [0-9]+ + * represents any number. + *

+ * Exemplary template:
+ * SELECT * WHERE {?s %%var1%% ?o . ?o <http://exa.com> %%var2%%}
+ * This template will then be converted to:
+ * SELECT ?var1 ?var2 {?s ?var1 ?o . ?o <http://exa.com> ?var2}
+ * and will request query solutions from the given sparql endpoint (e.g DBpedia).
+ * The solutions will then be instantiated into the template. + * The result may look like the following:
+ * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "123"}
+ * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
+ * SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
+ * + * The template data that this method returns will contain a list of all queries, + * where the first queries are the original queries including the query templates. + * The query instances will be appended to the original queries. + */ + private static TemplateData instantiateTemplateQueries(QuerySource querySource, QueryHandler.Config.Template config) throws IOException { + // charset for generating random variable names + final String charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + final Random random = new Random(); + + final var templateQueries = new FileCachingQueryList(querySource); + final Pattern template = Pattern.compile("%%[a-zA-Z0-9_]+%%"); + final var oldQueries = new ArrayList(); + final var instances = new ArrayList(); + + int templateNumber = 0; + final var indices = new ArrayList(); + final var instanceNumber = new ArrayList(); + + for (int i = 0; i < templateQueries.size(); i++) { + oldQueries.add(templateQueries.getQuery(i)); + // replace all variables in the query template with SPARQL variables + // and store the variable names + var templateQueryString = templateQueries.getQuery(i); + final Matcher matcher = template.matcher(templateQueryString); + final var variables = new LinkedHashMap(); // a set, that preserves insertion order + while (matcher.find()) { + final var match = matcher.group(); + if (variables.containsKey(match)) continue; + String variableName = match.replaceAll("%%", ""); + while (templateQueryString.contains("?" + variableName) || templateQueryString.contains("$" + variableName)) { // generate random variable name with 20 characters until it is unique + variableName = IntStream.range(0, 20).mapToObj(m -> String.valueOf(charset.charAt(random.nextInt(charset.length())))).collect(Collectors.joining()); + } + final var variable = "?" + variableName; + variables.put(match, variable); + templateQueryString = templateQueryString.replaceAll(match, variable); + } + + // if no placeholders are found, the query is already a valid SPARQL query + if (variables.isEmpty()) { + continue; + } + + // build SELECT query for finding bindings for the variables + final var templateQuery = QueryFactory.create(templateQueryString); + final var whereClause = "WHERE " + templateQuery.getQueryPattern(); + final var selectQueryString = new ParameterizedSparqlString(); + selectQueryString.setCommandText("SELECT DISTINCT " + String.join(" ", variables.values())); + selectQueryString.append(" " + whereClause); + selectQueryString.append(" LIMIT " + config.limit()); + selectQueryString.setNsPrefixes(templateQuery.getPrefixMapping()); + + int count = 0; + // send request to SPARQL endpoint and instantiate the template based on results + try (QueryExecution exec = QueryExecutionHTTP.service(config.endpoint().toString(), selectQueryString.asQuery())) { + ResultSet resultSet = exec.execSelect(); + if (!resultSet.hasNext()) { + LOGGER.warn("No results for query template: {}", templateQueryString); + } + while (resultSet.hasNext() && count++ < config.limit()) { + var instance = new ParameterizedSparqlString(templateQueryString); + QuerySolution solution = resultSet.next(); + for (String var : resultSet.getResultVars()) { + instance.clearParam(var); + instance.setParam(var, solution.get(var)); + } + instances.add(instance.toString()); + } + } + // store the number of instances and the index of the template query + templateNumber++; + indices.add(i); + instanceNumber.add(count); + } + return new TemplateData(Stream.concat(oldQueries.stream(), instances.stream()).toList(), templateNumber, indices.stream().mapToInt(Integer::intValue).toArray(), instanceNumber.stream().mapToInt(Integer::intValue).toArray(), oldQueries.size()); + } + + public int getExecutableQueryCount() { + return executableQueryCount; + } + + public int getRepresentedQueryCount() { + return representedQueryCount; + } + + public List getQueryData() { + return queryData; + } +} diff --git a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java index 0256fee53..013093fe7 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java @@ -1,9 +1,11 @@ package org.aksw.iguana.cc.query.list; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.source.QuerySource; import java.io.IOException; import java.io.InputStream; +import java.util.List; /** * The abstract class for a QueryList. A query list provides the queries to the QueryHandler. diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java index 8f6c3a38d..a804702b4 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list.impl; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.FileBasedQueryList; import org.aksw.iguana.cc.query.source.QuerySource; import org.aksw.iguana.commons.io.ByteArrayListInputStream; diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java index e3074be04..0999deba5 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list.impl; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.FileBasedQueryList; import org.aksw.iguana.cc.query.source.QuerySource; diff --git a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java index c748f3244..b8e4c8f38 100644 --- a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java +++ b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java @@ -63,8 +63,8 @@ public StresstestResultProcessor(String suiteID, this.workerQueryExecutions = new ArrayList[workers.size()][]; for (int i = 0; i < workers.size(); i++) { - this.workerQueryExecutions[i] = new ArrayList[workers.get(i).config().queries().getQueryCount()]; - for (int j = 0; j < workers.get(i).config().queries().getQueryCount(); j++) { + this.workerQueryExecutions[i] = new ArrayList[workers.get(i).config().queries().getRepresentedQueryCount()]; + for (int j = 0; j < workers.get(i).config().queries().getRepresentedQueryCount(); j++) { this.workerQueryExecutions[i][j] = new ArrayList<>(); } } @@ -128,7 +128,13 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { m.add(workerRes, RDF.type, IONT.worker); m.add(workerRes, IPROP.workerID, toInfinitePrecisionIntegerLiteral(worker.getWorkerID())); m.add(workerRes, IPROP.workerType, ResourceFactory.createTypedLiteral(worker.getClass().getSimpleName())); - m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral(config.queries().getQueryCount())); + if (config.queries().getConfig() == null || config.queries().getConfig().template() == null) + m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral(config.queries().getExecutableQueryCount())); + else + m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral( + config.queries().getConfig().template().individualResults() ? + config.queries().getExecutableQueryCount() : + config.queries().getRepresentedQueryCount())); m.add(workerRes, IPROP.timeOut, TimeUtils.createTypedDurationLiteral(config.timeout())); if (config.completionTarget() instanceof HttpWorker.QueryMixes) m.add(workerRes, IPROP.noOfQueryMixes, toInfinitePrecisionIntegerLiteral(((HttpWorker.QueryMixes) config.completionTarget()).number())); @@ -155,7 +161,7 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { for (var worker : workers) { var config = worker.config(); var workerQueryIDs = config.queries().getAllQueryIds(); - for (int i = 0; i < config.queries().getQueryCount(); i++) { + for (int i = 0; i < config.queries().getRepresentedQueryCount(); i++) { Resource workerQueryRes = iresFactory.getWorkerQueryResource(worker, i); Resource queryRes = IRES.getResource(workerQueryIDs[i]); m.add(workerQueryRes, IPROP.queryID, queryRes); @@ -257,7 +263,7 @@ private Model createMetricModel(Metric metric) { if (metric instanceof QueryMetric) { // queries grouped by worker for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentedQueryCount(); i++) { Number metricValue = ((QueryMetric) metric).calculateQueryMetric(workerQueryExecutions[(int) worker.getWorkerID()][i]); if (metricValue != null) { Literal lit = ResourceFactory.createTypedLiteral(metricValue); diff --git a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java index e0853166e..9d44cb95a 100644 --- a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java +++ b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java @@ -4,7 +4,6 @@ import com.fasterxml.jackson.annotation.JsonValue; import org.aksw.iguana.cc.config.elements.ConnectionConfig; import org.aksw.iguana.cc.query.handler.QueryHandler; -import org.aksw.iguana.cc.query.selector.impl.LinearQuerySelector; import org.aksw.iguana.cc.worker.HttpWorker; import org.aksw.iguana.cc.worker.impl.SPARQLProtocolWorker; import org.apache.hc.core5.http.HttpHeaders; @@ -173,8 +172,8 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que * @param queryHandler the query handler to preload requests for */ public void preloadRequests(QueryHandler queryHandler) { - final var selector = new LinearQuerySelector(queryHandler.getQueryCount()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + final var selector = queryHandler.getQuerySelectorInstance(QueryHandler.Config.Order.LINEAR); + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { try { // build request and discard it buildHttpRequest(queryHandler.getNextQueryStream(selector)); diff --git a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java index a4e841039..b39152d60 100644 --- a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java +++ b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java @@ -174,7 +174,7 @@ public CompletableFuture start() { List executionStats = new ArrayList<>(); if (config().completionTarget() instanceof QueryMixes queryMixes) { for (int i = 0; i < queryMixes.number(); i++) { - for (int j = 0; j < config().queries().getQueryCount(); j++) { + for (int j = 0; j < config().queries().getExecutableQueryCount(); j++) { ExecutionStats execution = executeQuery(config().timeout(), false); if (execution == null) throw new RuntimeException("Execution returned null at a place, where it should have never been null."); logExecution(execution); @@ -186,7 +186,7 @@ public CompletableFuture start() { final var startNanos = System.nanoTime(); long queryExecutionCount = 0; int queryMixExecutionCount = 0; - int queryMixSize = config().queries().getQueryCount(); + int queryMixSize = config().queries().getExecutableQueryCount(); long now; while ((now = System.nanoTime()) - startNanos < ((TimeLimit) config.completionTarget()).duration().toNanos()) { final var timeLeft = ((TimeLimit) config.completionTarget()).duration().toNanos() - (now - startNanos); @@ -265,6 +265,7 @@ private HttpExecutionResult executeHttpRequest(Duration timeout) { // get the next query and request final var queryHandle = config().queries().getNextQueryStream(querySelector); final int queryIndex = queryHandle.index(); + final int resultIndex = queryHandle.resultId() == null ? queryIndex : queryHandle.resultId(); final AsyncRequestProducer request; try { @@ -358,7 +359,7 @@ protected HttpExecutionResult buildResult() { // check for http error if (response.getCode() / 100 != 2) { - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, null); + return createFailedResultDuringResponse(resultIndex, response, timeStamp, duration, null); } // check content length @@ -370,18 +371,18 @@ protected HttpExecutionResult buildResult() { if (responseSize != responseBody.size()) LOGGER.error("Error during copying the response data. (expected written data size = {}, actual written data size = {}, Content-Length-Header = {})", responseSize, responseBody.size(), contentLengthHeader.getValue()); final var exception = new HttpException(String.format("Content-Length header value doesn't match actual content length. (Content-Length-Header = %s, written data size = %s)", contentLength, config.parseResults() ? responseBody.size() : responseSize)); - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, exception); + return createFailedResultDuringResponse(resultIndex, response, timeStamp, duration, exception); } } // check timeout if (duration.compareTo(timeout) > 0) { - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, new TimeoutException()); + return createFailedResultDuringResponse(resultIndex, response, timeStamp, duration, new TimeoutException()); } // return successful result return new HttpExecutionResult( - queryIndex, + resultIndex, Optional.of(response), timeStamp, Duration.ofNanos(responseEnd - requestStart), @@ -402,18 +403,18 @@ protected HttpExecutionResult buildResult() { } catch (InterruptedException | ExecutionException e) { // This will close the connection and cancel the request if it's still running. future.cancel(true); - return createFailedResultBeforeRequest(queryIndex, e); + return createFailedResultBeforeRequest(resultIndex, e); } catch (TimeoutException e) { if (future.isDone()) { LOGGER.warn("Request finished immediately after timeout but will still be counted as timed out."); try { return future.get(); } catch (InterruptedException | ExecutionException ex) { - return createFailedResultBeforeRequest(queryIndex, ex); + return createFailedResultBeforeRequest(resultIndex, ex); } } else { future.cancel(true); - return createFailedResultBeforeRequest(queryIndex, e); + return createFailedResultBeforeRequest(resultIndex, e); } } } diff --git a/src/main/resources/iguana-schema.json b/src/main/resources/iguana-schema.json index d92fb8d67..621bc3e10 100644 --- a/src/main/resources/iguana-schema.json +++ b/src/main/resources/iguana-schema.json @@ -351,6 +351,9 @@ }, "save": { "type": "boolean" + }, + "individualResults": { + "type": "boolean" } }, "required": [ diff --git a/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java b/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java index 6988f0ab9..804fb4bb9 100644 --- a/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java +++ b/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java @@ -30,7 +30,12 @@ public String[] getAllQueryIds() { } @Override - public int getQueryCount() { + public int getExecutableQueryCount() { + return queryNumber; + } + + @Override + public int getRepresentedQueryCount() { return queryNumber; } diff --git a/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java b/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java index 9950c9f9d..7c6d171b7 100644 --- a/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java +++ b/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java @@ -68,7 +68,7 @@ public static List createWorkerResults(QueryHandler queries, List testDeserializationData() { QueryHandler.Config.Order.RANDOM, 42L, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true) + new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true, false) ), """ {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","template": {"endpoint": "http://example.org/sparql"}} """ + ), + Arguments.of(new QueryHandler.Config("some.queries", + QueryHandler.Config.Format.SEPARATOR, + "\n", + true, + QueryHandler.Config.Order.RANDOM, + 42L, + QueryHandler.Config.Language.SPARQL, + new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 10L, false, true) + ), + """ + {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","template": {"endpoint": "http://example.org/sparql", "limit": 10, "save": false, "individualResults": true }} + """ ) ); } diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerTest.java index aa0832311..ce7b182ba 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerTest.java @@ -9,7 +9,6 @@ import org.aksw.iguana.cc.query.source.impl.FolderQuerySource; import org.aksw.iguana.cc.query.source.impl.FolderQuerySourceTest; import org.junit.jupiter.api.*; -import org.junit.jupiter.api.condition.DisabledInNativeImage; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -90,9 +89,9 @@ public void testDeserialization(String json, Class sourceType) thro QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertInstanceOf(LinearQuerySelector.class, selector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { final var wrapper = queryHandler.getNextQuery(selector); assertEquals(i, selector.getCurrentIndex()); if (FolderQuerySource.class.isAssignableFrom(sourceType)) @@ -110,9 +109,9 @@ public void testQueryStreamWrapper(String json, Class sourceType) t QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertTrue(selector instanceof LinearQuerySelector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { final var wrapper = queryHandler.getNextQueryStream(selector); assertEquals(i, selector.getCurrentIndex()); final var acutalQuery = new String(wrapper.queryInputStreamSupplier().get().readAllBytes(), StandardCharsets.UTF_8); @@ -131,9 +130,9 @@ public void testQueryStringWrapper(String json, Class sourceType) t QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertInstanceOf(LinearQuerySelector.class, selector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { final var wrapper = queryHandler.getNextQuery(selector); assertEquals(i, selector.getCurrentIndex()); if (FolderQuerySource.class.isAssignableFrom(sourceType)) @@ -151,10 +150,10 @@ public void testQueryIDs(String json, Class sourceType) { QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertInstanceOf(LinearQuerySelector.class, selector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); final var allQueryIDs = queryHandler.getAllQueryIds(); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { assertEquals(queryHandler.hashCode() + ":" + i, allQueryIDs[i]); assertEquals(allQueryIDs[i], queryHandler.getQueryId(i)); } diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java index 2beb8bf19..0415b0a5b 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java @@ -69,7 +69,7 @@ public void testTemplateQueries() throws IOException { QueryHandler.Config.Order.LINEAR, null, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false) + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false, true) ); wm.stubFor(get(anyUrl()) .withQueryParam("query", matching("PREFIX\\s+rdf:\\s+\\s+SELECT\\s+DISTINCT\\s+\\?var0\\s+\\?var1\\s+\\?var2\\s+WHERE\\s+\\{\\s*\\?s\\s+rdf:type\\s+\\?var0\\s*;\\s*\\?var1\\s+\\?var2\\s*\\.\\s*\\?var2\\s+\\?p\\s+\\s*}\\s+LIMIT\\s+2000\\s*")) @@ -79,7 +79,7 @@ public void testTemplateQueries() throws IOException { .withBody(RESPONSE_JSON))); final var queryHandler = new QueryHandler(queryHandlerConfig); final var selector = queryHandler.getQuerySelectorInstance(); - Assertions.assertEquals(2, queryHandler.getQueryCount()); + Assertions.assertEquals(2, queryHandler.getExecutableQueryCount()); for (int i = 0; i < 2; i++) { final var query = queryHandler.getNextQuery(selector); Assertions.assertEquals("PREFIX rdf: SELECT * WHERE {?s rdf:type ; . ?p }", query.query()); @@ -99,9 +99,44 @@ public void testMalformedTemplateQuery() throws IOException { QueryHandler.Config.Order.LINEAR, null, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false) + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false, true) ); Assertions.assertThrows(QueryParseException.class, () -> new QueryHandler(queryHandlerConfig)); } + @Test + public void testSubsumedQueries() throws IOException { + String templateQuery = "PREFIX rdf: SELECT * WHERE {?s rdf:type %%var0%% ; %%var1%% %%var2%%. %%var2%% ?p }"; + tempTemplateFile = Files.createTempFile(parentFolder, "Template", ".txt"); + Files.writeString(tempTemplateFile, templateQuery, StandardCharsets.UTF_8); + final var queryHandlerConfig = new QueryHandler.Config( + tempTemplateFile.toString(), + QueryHandler.Config.Format.ONE_PER_LINE, + null, + true, + QueryHandler.Config.Order.LINEAR, + null, + QueryHandler.Config.Language.SPARQL, + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false, false) + ); + wm.stubFor(get(anyUrl()) + .withQueryParam("query", matching("PREFIX\\s+rdf:\\s+\\s+SELECT\\s+DISTINCT\\s+\\?var0\\s+\\?var1\\s+\\?var2\\s+WHERE\\s+\\{\\s*\\?s\\s+rdf:type\\s+\\?var0\\s*;\\s*\\?var1\\s+\\?var2\\s*\\.\\s*\\?var2\\s+\\?p\\s+\\s*}\\s+LIMIT\\s+2000\\s*")) + .willReturn(aResponse() + .withStatus(200) + .withHeader("Content-Type", "application/sparql-results+json") + .withBody(RESPONSE_JSON))); + final var queryHandler = new QueryHandler(queryHandlerConfig); + final var selector = queryHandler.getQuerySelectorInstance(); + Assertions.assertEquals(2, queryHandler.getExecutableQueryCount()); + Assertions.assertEquals(1, queryHandler.getRepresentedQueryCount()); + var query = queryHandler.getNextQuery(selector); + Assertions.assertEquals(0, query.resultId()); + Assertions.assertEquals(1, query.index()); + Assertions.assertFalse(query.update()); + query = queryHandler.getNextQuery(selector); + Assertions.assertEquals(0, query.resultId()); + Assertions.assertEquals(2, query.index()); + Assertions.assertFalse(query.update()); + } + } diff --git a/src/test/resources/suite-configs/valid/config-full.yaml b/src/test/resources/suite-configs/valid/config-full.yaml index ad228cc29..260add8eb 100644 --- a/src/test/resources/suite-configs/valid/config-full.yaml +++ b/src/test/resources/suite-configs/valid/config-full.yaml @@ -57,6 +57,7 @@ tasks: endpoint: "http://localhost:3030/sp2b" limit: 2000 save: false + individualResults: false timeout: 2S connection: fuseki completionTarget: