Skip to content

Commit

Permalink
Fix expression column aggregate calculation with group_by after `re…
Browse files Browse the repository at this point in the history
…move()`
  • Loading branch information
texodus committed Jul 20, 2023
1 parent abbfff6 commit 203c92b
Show file tree
Hide file tree
Showing 10 changed files with 138 additions and 32 deletions.
18 changes: 13 additions & 5 deletions cpp/perspective/src/cpp/context_grouped_pkey.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,8 +683,8 @@ t_ctx_grouped_pkey::get_column_dtype(t_uindex idx) const {
}

void
t_ctx_grouped_pkey::compute_expressions(
std::shared_ptr<t_data_table> flattened_masked,
t_ctx_grouped_pkey::compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) {
// Clear the transitional expression tables on the context so they are
// ready for the next update.
Expand All @@ -693,14 +693,22 @@ t_ctx_grouped_pkey::compute_expressions(
std::shared_ptr<t_data_table> master_expression_table
= m_expression_tables->m_master;

t_uindex flattened_num_rows = flattened->size();
m_expression_tables->reserve_transitional_table_size(flattened_num_rows);
m_expression_tables->set_transitional_table_size(flattened_num_rows);

// Set the master table to the right size.
master_expression_table->reserve(flattened_masked->size());
master_expression_table->set_size(flattened_masked->size());
t_uindex num_rows = master->size();
master_expression_table->reserve(num_rows);
master_expression_table->set_size(num_rows);

const auto& expressions = m_config.get_expressions();
for (const auto& expr : expressions) {
// Compute the expressions on the master table.
expr->compute(flattened_masked, master_expression_table,
expr->compute(
master, master_expression_table, expression_vocab, regex_mapping);

expr->compute(flattened, m_expression_tables->m_flattened,
expression_vocab, regex_mapping);
}
}
Expand Down
14 changes: 11 additions & 3 deletions cpp/perspective/src/cpp/context_one.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,8 @@ t_ctx1::get_trav_depth(t_index idx) const {
}

void
t_ctx1::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
t_ctx1::compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) {
// Clear the transitional expression tables on the context so they are
// ready for the next update.
Expand All @@ -620,15 +621,22 @@ t_ctx1::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
std::shared_ptr<t_data_table> master_expression_table
= m_expression_tables->m_master;

t_uindex flattened_num_rows = flattened->size();
m_expression_tables->reserve_transitional_table_size(flattened_num_rows);
m_expression_tables->set_transitional_table_size(flattened_num_rows);

// Set the master table to the right size.
t_uindex num_rows = flattened_masked->size();
t_uindex num_rows = master->size();
master_expression_table->reserve(num_rows);
master_expression_table->set_size(num_rows);

const auto& expressions = m_config.get_expressions();
for (const auto& expr : expressions) {
// Compute the expressions on the master table.
expr->compute(flattened_masked, master_expression_table,
expr->compute(
master, master_expression_table, expression_vocab, regex_mapping);

expr->compute(flattened, m_expression_tables->m_flattened,
expression_vocab, regex_mapping);
}
}
Expand Down
14 changes: 11 additions & 3 deletions cpp/perspective/src/cpp/context_two.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,8 @@ t_ctx2::get_column_dtype(t_uindex idx) const {
}

void
t_ctx2::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
t_ctx2::compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) {
// Clear the transitional expression tables on the context so they are
// ready for the next update.
Expand All @@ -1062,15 +1063,22 @@ t_ctx2::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
std::shared_ptr<t_data_table> master_expression_table
= m_expression_tables->m_master;

t_uindex flattened_num_rows = flattened->size();
m_expression_tables->reserve_transitional_table_size(flattened_num_rows);
m_expression_tables->set_transitional_table_size(flattened_num_rows);

// Set the master table to the right size.
t_uindex num_rows = flattened_masked->size();
t_uindex num_rows = master->size();
master_expression_table->reserve(num_rows);
master_expression_table->set_size(num_rows);

const auto& expressions = m_config.get_expressions();
for (const auto& expr : expressions) {
// Compute the expressions on the master table.
expr->compute(flattened_masked, master_expression_table,
expr->compute(
master, master_expression_table, expression_vocab, regex_mapping);

expr->compute(flattened, m_expression_tables->m_flattened,
expression_vocab, regex_mapping);
}
}
Expand Down
14 changes: 11 additions & 3 deletions cpp/perspective/src/cpp/context_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,8 @@ t_ctx0::get_step_delta(t_index bidx, t_index eidx) {
}

void
t_ctx0::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
t_ctx0::compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping) {
// Clear the transitional expression tables on the context so they are
// ready for the next update.
Expand All @@ -628,15 +629,22 @@ t_ctx0::compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
std::shared_ptr<t_data_table> master_expression_table
= m_expression_tables->m_master;

t_uindex flattened_num_rows = flattened->size();
m_expression_tables->reserve_transitional_table_size(flattened_num_rows);
m_expression_tables->set_transitional_table_size(flattened_num_rows);

// Set the master table to the right size.
t_uindex num_rows = flattened_masked->size();
t_uindex num_rows = master->size();
master_expression_table->reserve(num_rows);
master_expression_table->set_size(num_rows);

const auto& expressions = m_config.get_expressions();
for (const auto& expr : expressions) {
// Compute the expressions on the master table.
expr->compute(flattened_masked, master_expression_table,
expr->compute(
master, master_expression_table, expression_vocab, regex_mapping);

expr->compute(flattened, m_expression_tables->m_flattened,
expression_vocab, regex_mapping);
}
}
Expand Down
5 changes: 5 additions & 0 deletions cpp/perspective/src/cpp/expression_tables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ t_expression_tables::t_expression_tables(
m_transitions->init();
}

t_data_table*
t_expression_tables::get_table() const {
return m_master.get();
}

void
t_expression_tables::calculate_transitions(
std::shared_ptr<t_data_table> existed) {
Expand Down
31 changes: 17 additions & 14 deletions cpp/perspective/src/cpp/gnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -871,30 +871,29 @@ t_gnode::_register_context(
ctx->reset();

if (should_update) {
ctx->compute_expressions(
pkeyed_table, expression_vocab, expression_regex_mapping);
ctx->compute_expressions(m_gstate->get_table(), pkeyed_table,
expression_vocab, expression_regex_mapping);
update_context_from_state<t_ctx2>(ctx, name, pkeyed_table);
}
} break;
case ONE_SIDED_CONTEXT: {
set_ctx_state<t_ctx1>(ptr_);
t_ctx1* ctx = static_cast<t_ctx1*>(ptr_);
ctx->reset();

if (should_update) {
ctx->compute_expressions(
pkeyed_table, expression_vocab, expression_regex_mapping);
ctx->compute_expressions(m_gstate->get_table(), pkeyed_table,
expression_vocab, expression_regex_mapping);

update_context_from_state<t_ctx1>(ctx, name, pkeyed_table);
}
} break;
case ZERO_SIDED_CONTEXT: {
set_ctx_state<t_ctx0>(ptr_);
t_ctx0* ctx = static_cast<t_ctx0*>(ptr_);
ctx->reset();

if (should_update) {
ctx->compute_expressions(
pkeyed_table, expression_vocab, expression_regex_mapping);
ctx->compute_expressions(m_gstate->get_table(), pkeyed_table,
expression_vocab, expression_regex_mapping);
update_context_from_state<t_ctx0>(ctx, name, pkeyed_table);
}
} break;
Expand All @@ -914,8 +913,8 @@ t_gnode::_register_context(
ctx->reset();

if (should_update) {
ctx->compute_expressions(
pkeyed_table, expression_vocab, expression_regex_mapping);
ctx->compute_expressions(m_gstate->get_table(), pkeyed_table,
expression_vocab, expression_regex_mapping);
update_context_from_state<t_ctx_grouped_pkey>(
ctx, name, pkeyed_table);
}
Expand Down Expand Up @@ -999,23 +998,27 @@ t_gnode::_compute_expressions(std::shared_ptr<t_data_table> flattened_masked) {
switch (ctxh.get_type()) {
case TWO_SIDED_CONTEXT: {
t_ctx2* ctx = static_cast<t_ctx2*>(ctxh.m_ctx);
ctx->compute_expressions(flattened_masked, expression_vocab,
ctx->compute_expressions(m_gstate->get_table(),
flattened_masked, expression_vocab,
expression_regex_mapping);
} break;
case ONE_SIDED_CONTEXT: {
t_ctx1* ctx = static_cast<t_ctx1*>(ctxh.m_ctx);
ctx->compute_expressions(flattened_masked, expression_vocab,
ctx->compute_expressions(m_gstate->get_table(),
flattened_masked, expression_vocab,
expression_regex_mapping);
} break;
case ZERO_SIDED_CONTEXT: {
t_ctx0* ctx = static_cast<t_ctx0*>(ctxh.m_ctx);
ctx->compute_expressions(flattened_masked, expression_vocab,
ctx->compute_expressions(m_gstate->get_table(),
flattened_masked, expression_vocab,
expression_regex_mapping);
} break;
case GROUPED_PKEY_CONTEXT: {
t_ctx_grouped_pkey* ctx
= static_cast<t_ctx_grouped_pkey*>(ctxh.m_ctx);
ctx->compute_expressions(flattened_masked, expression_vocab,
ctx->compute_expressions(m_gstate->get_table(),
flattened_masked, expression_vocab,
expression_regex_mapping);
} break;
case UNIT_CONTEXT:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ std::shared_ptr<t_expression_tables> get_expression_tables() const;

// Given shared pointers to data tables from the gnode, use them to
// compute the results of expression columns.
void compute_expressions(std::shared_ptr<t_data_table> flattened_masked,
void compute_expressions(std::shared_ptr<t_data_table> master,
std::shared_ptr<t_data_table> flattened_masked,
t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping);

void compute_expressions(std::shared_ptr<t_data_table> master,
Expand Down
2 changes: 2 additions & 0 deletions cpp/perspective/src/include/perspective/expression_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ struct t_expression_tables {

void reset();

t_data_table* get_table() const;

// master table is calculated from t_gstate's master table
std::shared_ptr<t_data_table> m_master;

Expand Down
2 changes: 1 addition & 1 deletion cpp/perspective/src/include/perspective/gnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ t_gnode::update_context_from_state(CTX_T* ctx, const std::string& name,
std::shared_ptr<t_expression_tables> ctx_expression_tables
= ctx->get_expression_tables();
std::shared_ptr<t_data_table> joined_flattened
= flattened->join(ctx_expression_tables->m_master);
= flattened->join(ctx_expression_tables->m_flattened);
ctx->notify(*joined_flattened);
} else {
// Just use the table from the gnode
Expand Down
67 changes: 65 additions & 2 deletions python/perspective/perspective/tests/table/test_remove.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@


class TestRemove(object):

def test_remove_all(self):
tbl = Table([{"a": "abc", "b": 123}], index="a")
tbl.remove(["abc"])
assert tbl.view().to_records() == []
# assert tbl.size() == 0

def test_remove_nonsequential(self):
tbl = Table([{"a": "abc", "b": 123}, {"a": "def", "b": 456}, {"a": "efg", "b": 789}], index="a")
tbl = Table(
[{"a": "abc", "b": 123}, {"a": "def", "b": 456}, {"a": "efg", "b": 789}],
index="a",
)
tbl.remove(["abc", "efg"])
assert tbl.view().to_records() == [{"a": "def", "b": 456}]
# assert tbl.size() == 1
Expand All @@ -35,3 +37,64 @@ def test_remove_multiple_single(self):
tbl.remove([i])
assert tbl.view().to_records() == [{"a": 0, "b": "0"}]
# assert tbl.size() == 0

def test_remove_expressions(self):
schema = {"key": str, "delta$": float, "business_line": str}
data = [
{
"key": "A",
"delta$": 46412.3804275,
},
{
"key": "B",
"delta$": 2317615.875,
},
]

table = Table(schema, index="key")
table.update(data)
table.remove(["A"])
view = table.view(
group_by=["business_line"],
columns=["delta$", "alias"],
expressions=[
'// alias\n"delta$"',
],
)

records = view.to_records()
print(records)
assert records == [
{"__ROW_PATH__": [], "delta$": 2317615.875, "alias": 2317615.875},
{"__ROW_PATH__": [None], "delta$": 2317615.875, "alias": 2317615.875},
]

def test_remove_expressions_after_view(self):
schema = {"key": str, "delta$": float, "business_line": str}
data = [
{
"key": "A",
"delta$": 46412.3804275,
},
{
"key": "B",
"delta$": 2317615.875,
},
]

table = Table(schema, index="key")
table.update(data)
view = table.view(
group_by=["business_line"],
columns=["delta$", "alias"],
expressions=[
'// alias\n"delta$"',
],
)

table.remove(["A"])
records = view.to_records()
assert records == [
{"__ROW_PATH__": [], "delta$": 2317615.875, "alias": 2317615.875},
{"__ROW_PATH__": [None], "delta$": 2317615.875, "alias": 2317615.875},
]

0 comments on commit 203c92b

Please sign in to comment.