Skip to content

Commit

Permalink
Make some member functions of GroupBy const or even static (#1510)
Browse files Browse the repository at this point in the history
This makes the static analysis happier and (in the case of the static methods) avoids the passing around of fully-fledged `GroupBy` objects.
  • Loading branch information
RobinTF authored Sep 23, 2024
1 parent f5520af commit e3c2841
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 74 deletions.
64 changes: 32 additions & 32 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) {
// Always request child operation to provide a lazy result if the aggregate
// expressions allow to compute the full result in chunks
metadataForUnsequentialData =
computeUnsequentialProcessingMetadata(aggregates);
computeUnsequentialProcessingMetadata(aggregates, _groupByVariables);
subresult = _subtree->getResult(metadataForUnsequentialData.has_value());
}

Expand Down Expand Up @@ -519,7 +519,7 @@ cppcoro::generator<IdTable> GroupBy::computeResultLazily(
if (groupSplitAcrossTables) {
lazyGroupBy.processBlock(evaluationContext, blockStart, blockEnd);
lazyGroupBy.commitRow(resultTable, evaluationContext,
currentGroupBlock, *this);
currentGroupBlock);
groupSplitAcrossTables = false;
} else {
// This processes the whole block in batches if possible
Expand Down Expand Up @@ -566,13 +566,12 @@ cppcoro::generator<IdTable> GroupBy::computeResultLazily(

sparqlExpression::EvaluationContext evaluationContext =
createEvaluationContext(*localVocab, idTable);
lazyGroupBy.commitRow(resultTable, evaluationContext, currentGroupBlock,
*this);
lazyGroupBy.commitRow(resultTable, evaluationContext, currentGroupBlock);
co_yield resultTable;
}

// _____________________________________________________________________________
std::optional<IdTable> GroupBy::computeGroupByForSingleIndexScan() {
std::optional<IdTable> GroupBy::computeGroupByForSingleIndexScan() const {
// The child must be an `IndexScan` for this optimization.
auto indexScan =
std::dynamic_pointer_cast<const IndexScan>(_subtree->getRootOperation());
Expand Down Expand Up @@ -621,7 +620,7 @@ std::optional<IdTable> GroupBy::computeGroupByForSingleIndexScan() {
}

// ____________________________________________________________________________
std::optional<IdTable> GroupBy::computeGroupByObjectWithCount() {
std::optional<IdTable> GroupBy::computeGroupByObjectWithCount() const {
// The child must be an `IndexScan` with exactly two variables.
auto indexScan =
std::dynamic_pointer_cast<IndexScan>(_subtree->getRootOperation());
Expand Down Expand Up @@ -671,7 +670,7 @@ std::optional<IdTable> GroupBy::computeGroupByObjectWithCount() {
}

// _____________________________________________________________________________
std::optional<IdTable> GroupBy::computeGroupByForFullIndexScan() {
std::optional<IdTable> GroupBy::computeGroupByForFullIndexScan() const {
if (_groupByVariables.size() != 1) {
return std::nullopt;
}
Expand Down Expand Up @@ -766,7 +765,7 @@ std::optional<Permutation::Enum> GroupBy::getPermutationForThreeVariableTriple(

// ____________________________________________________________________________
std::optional<GroupBy::OptimizedGroupByData> GroupBy::checkIfJoinWithFullScan(
const Join& join) {
const Join& join) const {
if (_groupByVariables.size() != 1) {
return std::nullopt;
}
Expand Down Expand Up @@ -809,7 +808,7 @@ std::optional<GroupBy::OptimizedGroupByData> GroupBy::checkIfJoinWithFullScan(
}

// ____________________________________________________________________________
std::optional<IdTable> GroupBy::computeGroupByForJoinWithFullScan() {
std::optional<IdTable> GroupBy::computeGroupByForJoinWithFullScan() const {
auto join = std::dynamic_pointer_cast<Join>(_subtree->getRootOperation());
if (!join) {
return std::nullopt;
Expand Down Expand Up @@ -875,7 +874,7 @@ std::optional<IdTable> GroupBy::computeGroupByForJoinWithFullScan() {
}

// _____________________________________________________________________________
std::optional<IdTable> GroupBy::computeOptimizedGroupByIfPossible() {
std::optional<IdTable> GroupBy::computeOptimizedGroupByIfPossible() const {
// TODO<C++23> Use `std::optional::or_else`.
if (!RuntimeParameters().get<"group-by-disable-index-scan-optimizations">()) {
if (auto result = computeGroupByForSingleIndexScan()) {
Expand All @@ -897,7 +896,8 @@ std::optional<IdTable> GroupBy::computeOptimizedGroupByIfPossible() {
// _____________________________________________________________________________
std::optional<GroupBy::HashMapOptimizationData>
GroupBy::computeUnsequentialProcessingMetadata(
std::vector<Aggregate>& aliases) {
std::vector<Aggregate>& aliases,
const std::vector<Variable>& groupByVariables) {
// Get pointers to all aggregate expressions and their parents
size_t numAggregates = 0;
std::vector<HashMapAliasInformation> aliasesWithAggregateInfo;
Expand All @@ -914,10 +914,10 @@ GroupBy::computeUnsequentialProcessingMetadata(

// Find all grouped variables occurring in the alias expression
std::vector<HashMapGroupedVariableInformation> groupedVariables;
groupedVariables.reserve(_groupByVariables.size());
groupedVariables.reserve(groupByVariables.size());
// TODO<C++23> use views::enumerate
size_t i = 0;
for (const auto& groupedVariable : _groupByVariables) {
for (const auto& groupedVariable : groupByVariables) {
groupedVariables.emplace_back(groupedVariable, i,
findGroupedVariable(expr, groupedVariable));
++i;
Expand All @@ -933,15 +933,16 @@ GroupBy::computeUnsequentialProcessingMetadata(

// _____________________________________________________________________________
std::optional<GroupBy::HashMapOptimizationData>
GroupBy::checkIfHashMapOptimizationPossible(std::vector<Aggregate>& aliases) {
GroupBy::checkIfHashMapOptimizationPossible(
std::vector<Aggregate>& aliases) const {
if (!RuntimeParameters().get<"group-by-hash-map-enabled">()) {
return std::nullopt;
}

if (!std::dynamic_pointer_cast<const Sort>(_subtree->getRootOperation())) {
return std::nullopt;
}
return computeUnsequentialProcessingMetadata(aliases);
return computeUnsequentialProcessingMetadata(aliases, _groupByVariables);
}

// _____________________________________________________________________________
Expand Down Expand Up @@ -1096,9 +1097,8 @@ GroupBy::getHashMapAggregationResults(
IdTable* resultTable,
const HashMapAggregationData<NUM_GROUP_COLUMNS>& aggregationData,
size_t dataIndex, size_t beginIndex, size_t endIndex,
LocalVocab* localVocab) const {
sparqlExpression::VectorWithMemoryLimit<ValueId> aggregateResults(
getExecutionContext()->getAllocator());
LocalVocab* localVocab, const Allocator& allocator) {
sparqlExpression::VectorWithMemoryLimit<ValueId> aggregateResults(allocator);
aggregateResults.resize(endIndex - beginIndex);

auto& aggregateDataVariant =
Expand Down Expand Up @@ -1136,13 +1136,13 @@ GroupBy::getHashMapAggregationResults(
// _____________________________________________________________________________
void GroupBy::substituteGroupVariable(
const std::vector<ParentAndChildIndex>& occurrences, IdTable* resultTable,
size_t beginIndex, size_t count, size_t columnIndex) const {
size_t beginIndex, size_t count, size_t columnIndex,
const Allocator& allocator) {
decltype(auto) groupValues =
resultTable->getColumn(columnIndex).subspan(beginIndex, count);

for (const auto& occurrence : occurrences) {
sparqlExpression::VectorWithMemoryLimit<ValueId> values(
getExecutionContext()->getAllocator());
sparqlExpression::VectorWithMemoryLimit<ValueId> values(allocator);
values.resize(groupValues.size());
std::ranges::copy(groupValues, values.begin());

Expand All @@ -1161,15 +1161,15 @@ GroupBy::substituteAllAggregates(
std::vector<HashMapAggregateInformation>& info, size_t beginIndex,
size_t endIndex,
const HashMapAggregationData<NUM_GROUP_COLUMNS>& aggregationData,
IdTable* resultTable, LocalVocab* localVocab) const {
IdTable* resultTable, LocalVocab* localVocab, const Allocator& allocator) {
std::vector<std::unique_ptr<sparqlExpression::SparqlExpression>>
originalChildren;
originalChildren.reserve(info.size());
// Substitute in the results of all aggregates of `info`.
for (auto& aggregate : info) {
auto aggregateResults = getHashMapAggregationResults(
resultTable, aggregationData, aggregate.aggregateDataIndex_, beginIndex,
endIndex, localVocab);
endIndex, localVocab, allocator);

// Substitute the resulting vector as a literal
auto newExpression = std::make_unique<sparqlExpression::VectorIdExpression>(
Expand Down Expand Up @@ -1276,7 +1276,7 @@ void GroupBy::evaluateAlias(
HashMapAliasInformation& alias, IdTable* result,
sparqlExpression::EvaluationContext& evaluationContext,
const HashMapAggregationData<NUM_GROUP_COLUMNS>& aggregationData,
LocalVocab* localVocab) const {
LocalVocab* localVocab, const Allocator& allocator) {
auto& info = alias.aggregateInfo_;

// Either:
Expand Down Expand Up @@ -1304,8 +1304,7 @@ void GroupBy::evaluateAlias(
outValues.begin() + evaluationContext._beginIndex);

// We also need to store it for possible future use
sparqlExpression::VectorWithMemoryLimit<ValueId> values(
getExecutionContext()->getAllocator());
sparqlExpression::VectorWithMemoryLimit<ValueId> values(allocator);
values.resize(groupValues.size());
std::ranges::copy(groupValues, values.begin());

Expand All @@ -1321,7 +1320,8 @@ void GroupBy::evaluateAlias(
// Get aggregate results
auto aggregateResults = getHashMapAggregationResults(
result, aggregationData, aggregate.aggregateDataIndex_,
evaluationContext._beginIndex, evaluationContext._endIndex, localVocab);
evaluationContext._beginIndex, evaluationContext._endIndex, localVocab,
allocator);

// Copy to result table
decltype(auto) outValues = result->getColumn(alias.outCol_);
Expand All @@ -1339,15 +1339,15 @@ void GroupBy::evaluateAlias(
// Substitute in the values of the grouped variable
substituteGroupVariable(
occurrences, result, evaluationContext._beginIndex,
evaluationContext.size(), substitution.resultColumnIndex_);
evaluationContext.size(), substitution.resultColumnIndex_, allocator);
}

// Substitute in the results of all aggregates contained in the
// expression of the current alias, if `info` is non-empty.
std::vector<std::unique_ptr<sparqlExpression::SparqlExpression>>
originalChildren = substituteAllAggregates(
info, evaluationContext._beginIndex, evaluationContext._endIndex,
aggregationData, result, localVocab);
aggregationData, result, localVocab, allocator);

// Evaluate top-level alias expression
sparqlExpression::ExpressionResult expressionResult =
Expand Down Expand Up @@ -1378,7 +1378,7 @@ template <size_t NUM_GROUP_COLUMNS>
IdTable GroupBy::createResultFromHashMap(
const HashMapAggregationData<NUM_GROUP_COLUMNS>& aggregationData,
std::vector<HashMapAliasInformation>& aggregateAliases,
LocalVocab* localVocab) {
LocalVocab* localVocab) const {
// Create result table, filling in the group values, since they might be
// required in evaluation
ad_utility::Timer sortingTimer{ad_utility::Timer::Started};
Expand Down Expand Up @@ -1408,7 +1408,7 @@ IdTable GroupBy::createResultFromHashMap(

for (auto& alias : aggregateAliases) {
evaluateAlias(alias, &result, evaluationContext, aggregationData,
localVocab);
localVocab, allocator());
}
}
runtimeInfo().addDetail("timeEvaluationAndResults",
Expand Down Expand Up @@ -1449,7 +1449,7 @@ template <size_t NUM_GROUP_COLUMNS>
IdTable GroupBy::computeGroupByForHashMapOptimization(
std::vector<HashMapAliasInformation>& aggregateAliases,
const IdTable& subresult, const std::vector<size_t>& columnIndices,
LocalVocab* localVocab) {
LocalVocab* localVocab) const {
AD_CONTRACT_CHECK(columnIndices.size() == NUM_GROUP_COLUMNS ||
NUM_GROUP_COLUMNS == 0);

Expand Down
Loading

0 comments on commit e3c2841

Please sign in to comment.