Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vector agg: skip entire null words of filter bitmap #7569

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tsl/src/nodes/decompress_chunk/compressed_batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ typedef struct DecompressBatchState
* row. Indexed same as arrow arrays, w/o accounting for the reverse scan
* direction. Initialized to all ones, i.e. all rows pass.
*/
uint64 *restrict vector_qual_result;
const uint64 *restrict vector_qual_result;

/*
* This follows DecompressContext.compressed_chunk_columns, but does not
Expand Down
2 changes: 1 addition & 1 deletion tsl/src/nodes/decompress_chunk/vector_predicates.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ typedef enum VectorQualSummary
} VectorQualSummary;

static pg_attribute_always_inline VectorQualSummary
get_vector_qual_summary(uint64 *restrict qual_result, size_t n_rows)
get_vector_qual_summary(const uint64 *qual_result, size_t n_rows)
{
bool any_rows_pass = false;
bool all_rows_pass = true;
Expand Down
69 changes: 67 additions & 2 deletions tsl/src/nodes/vector_agg/grouping_policy_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ gp_hash_reset(GroupingPolicy *obj)

policy->stat_input_valid_rows = 0;
policy->stat_input_total_rows = 0;
policy->stat_bulk_filtered_rows = 0;
policy->stat_consecutive_keys = 0;
}

Expand Down Expand Up @@ -331,7 +332,71 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state)
* Add the batch rows to aggregate function states.
*/
const uint64 *restrict filter = batch_state->vector_qual_result;
add_one_range(policy, batch_state, 0, n);
if (filter == NULL)
{
/*
* We don't have a filter on this batch, so aggregate it entirely in one
* go.
*/
add_one_range(policy, batch_state, 0, n);
}
else
{
/*
* If we have a filter, skip the rows for which the entire words of the
* filter bitmap are zero. This improves performance for highly
* selective filters.
*/
int statistics_range_row = 0;
int start_word = 0;
int end_word = 0;
int past_the_end_word = (n - 1) / 64 + 1;
for (;;)
{
/*
* Skip the bitmap words which are zero.
*/
for (start_word = end_word; start_word < past_the_end_word && filter[start_word] == 0;
start_word++)
;

if (start_word >= past_the_end_word)
{
break;
}

/*
* Collect the consecutive bitmap words which are nonzero.
*/
for (end_word = start_word + 1; end_word < past_the_end_word && filter[end_word] != 0;
end_word++)
;

/*
* Now we have the [start, end] range of bitmap words that are
* nonzero.
*
* Determine starting and ending rows, also skipping the starting
* and trailing zero bits at the ends of the range.
*/
const int start_row = start_word * 64 + pg_rightmost_one_pos64(filter[start_word]);
Assert(start_row <= n);

/*
* The bits for past-the-end rows must be set to zero, so this
* calculation should yield no more than n.
*/
Assert(end_word > start_word);
const int end_row =
(end_word - 1) * 64 + pg_leftmost_one_pos64(filter[end_word - 1]) + 1;
Assert(end_row <= n);

statistics_range_row += end_row - start_row;

add_one_range(policy, batch_state, start_row, end_row);
}
policy->stat_bulk_filtered_rows += batch_state->total_batch_rows - statistics_range_row;
}

policy->stat_input_total_rows += batch_state->total_batch_rows;
policy->stat_input_valid_rows += arrow_num_valid(filter, batch_state->total_batch_rows);
Expand Down Expand Up @@ -378,7 +443,7 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot)
"%f ratio, %ld curctx bytes, %ld aggstate bytes",
policy->stat_input_total_rows,
policy->stat_input_valid_rows,
0UL,
policy->stat_bulk_filtered_rows,
policy->stat_consecutive_keys,
keys,
policy->stat_input_valid_rows / keys,
Expand Down
1 change: 1 addition & 0 deletions tsl/src/nodes/vector_agg/grouping_policy_hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ typedef struct GroupingPolicyHash
*/
uint64 stat_input_total_rows;
uint64 stat_input_valid_rows;
uint64 stat_bulk_filtered_rows;
uint64 stat_consecutive_keys;
} GroupingPolicyHash;

Expand Down
Loading