diff --git a/.gitignore b/.gitignore index 98fdd38f71..ff59d833ad 100644 --- a/.gitignore +++ b/.gitignore @@ -217,3 +217,4 @@ playwright-report/ playwright/.cache/ .pyodide-xbuildenv +benchmark_venv diff --git a/cpp/perspective/src/cpp/scalar.cpp b/cpp/perspective/src/cpp/scalar.cpp index 48f4af7b56..b3f29820f9 100644 --- a/cpp/perspective/src/cpp/scalar.cpp +++ b/cpp/perspective/src/cpp/scalar.cpp @@ -1039,11 +1039,16 @@ t_tscalar::to_string(bool for_expr) const { auto d = get(); ss << "date(" << d.year() << ", " << d.month() << ", " << d.day() << ")"; + + return ss.str(); } else { - ss << get(); + t_date date_val = get(); + tm t = date_val.get_tm(); + time_t epoch_delta = mktime(&t); + std::chrono::milliseconds timestamp(epoch_delta * 1000); + date::sys_time ts(timestamp); + return date::format("%F", ts); } - - return ss.str(); } break; case DTYPE_BOOL: { ss << std::boolalpha << get(); @@ -1063,28 +1068,7 @@ t_tscalar::to_string(bool for_expr) const { // local time and not UTC. std::chrono::milliseconds timestamp(to_int64()); date::sys_time ts(timestamp); - std::time_t temp = std::chrono::system_clock::to_time_t(ts); - std::tm* t = std::localtime(&temp); - - // use a mix of strftime and date::format - std::string buffer; - buffer.resize(64); - - // write y-m-d h:m in local time into buffer, and if successful - // write the rest of the date, otherwise print the date in UTC. - std::size_t len - = strftime(&buffer[0], buffer.size(), "%Y-%m-%d %H:%M:", t); - if (len > 0) { - buffer.resize(len); - ss << buffer; - ss << date::format( - "%S", ts); // represent second and millisecond - } else { - std::cerr << to_int64() << " failed strftime" << std::endl; - ss << date::format("%Y-%m-%d %H:%M:%S UTC", ts); - } - - return ss.str(); + return date::format("%F %T", ts); } break; case DTYPE_STR: { if (for_expr) { @@ -1595,9 +1579,9 @@ t_tscalar::can_store_inplace(const char* s) { bool t_tscalar::is_nan() const { if (m_type == DTYPE_FLOAT64) - return std::isnan(get()); + return std::isnan(get()) || std::isinf(get()); if (m_type == DTYPE_FLOAT32) - return std::isnan(get()); + return std::isnan(get()) || std::isinf(get()); return false; } diff --git a/cpp/perspective/src/cpp/view.cpp b/cpp/perspective/src/cpp/view.cpp index bc86b8f6ae..74b83a524f 100644 --- a/cpp/perspective/src/cpp/view.cpp +++ b/cpp/perspective/src/cpp/view.cpp @@ -173,7 +173,7 @@ View::column_names(bool skip, std::int32_t depth) const { for (t_uindex key = 0, max = m_ctx->unity_get_column_count(); key != max; ++key) { - std::string name = aggregate_names[key % aggregate_names.size()]; + const std::string& name = aggregate_names[key % aggregate_names.size()]; if (name == "psp_okey") { continue; @@ -326,9 +326,9 @@ View::schema() const { template <> std::map View::schema() const { - t_schema schema = m_ctx->get_schema(); - std::vector _types = schema.types(); - std::vector names = schema.columns(); + const t_schema& schema = m_ctx->get_schema(); + const std::vector& _types = schema.types(); + const std::vector& names = schema.columns(); std::map types; for (std::size_t i = 0, max = names.size(); i != max; ++i) { @@ -364,7 +364,7 @@ View::expression_schema() const { } for (const auto& expr : m_expressions) { - std::string expression_alias = expr->get_expression_alias(); + const std::string& expression_alias = expr->get_expression_alias(); new_schema[expression_alias] = dtype_to_str(expr->get_dtype()); if (m_row_pivots.size() > 0 && !is_column_only()) { @@ -385,9 +385,9 @@ View::expression_schema() const { template <> std::map View::expression_schema() const { - t_schema schema = m_ctx->get_schema(); - std::vector _types = schema.types(); - std::vector names = schema.columns(); + const t_schema& schema = m_ctx->get_schema(); + const std::vector& _types = schema.types(); + const std::vector& names = schema.columns(); std::map types; for (std::size_t i = 0, max = names.size(); i != max; ++i) { @@ -397,7 +397,7 @@ View::expression_schema() const { std::map new_schema; for (const auto& expr : m_expressions) { - std::string expression_alias = expr->get_expression_alias(); + const std::string& expression_alias = expr->get_expression_alias(); new_schema[expression_alias] = dtype_to_str(expr->get_dtype()); } @@ -1394,20 +1394,12 @@ View::_map_aggregate_types( template void -View::write_scalar(t_tscalar scalar, +View::write_scalar(t_tscalar scalar, bool is_formatted, rapidjson::Writer& writer) const { - auto str_val = scalar.to_string(); - - if (str_val == "null" || str_val == "nan") { + if (!scalar.is_valid()) { writer.Null(); return; - } else if (str_val == "inf") { - writer.String("Infinity"); - return; - } else if (str_val == "-inf") { - writer.String("-Infinity"); - return; } switch (scalar.get_dtype()) { @@ -1418,38 +1410,55 @@ View::write_scalar(t_tscalar scalar, writer.Bool(scalar.get()); break; case DTYPE_UINT8: - case DTYPE_UINT16: - case DTYPE_UINT32: case DTYPE_INT8: writer.Int(scalar.get()); break; + case DTYPE_UINT16: case DTYPE_INT16: writer.Int(scalar.get()); break; + case DTYPE_UINT32: case DTYPE_INT32: writer.Int(scalar.get()); break; + case DTYPE_UINT64: case DTYPE_INT64: writer.Int64(scalar.get()); break; case DTYPE_FLOAT32: - writer.Double(scalar.get()); + if (scalar.is_nan()) { + writer.Null(); + } else { + writer.Double(scalar.get()); + } break; case DTYPE_FLOAT64: - writer.Double(scalar.get()); + if (scalar.is_nan()) { + writer.Null(); + } else { + writer.Double(scalar.get()); + } break; case DTYPE_STR: writer.String(scalar.get()); break; - case DTYPE_UINT64: case DTYPE_TIME: - writer.Int64(scalar.get()); + if (is_formatted) { + writer.String(scalar.to_string().c_str()); + } else { + writer.Int64(scalar.get()); + } + break; case DTYPE_DATE: { - t_date date_val = scalar.get(); - tm t = date_val.get_tm(); - time_t epoch_delta = mktime(&t); - writer.Double(epoch_delta * 1000); + if (is_formatted) { + writer.String(scalar.to_string().c_str()); + } else { + t_date date_val = scalar.get(); + tm t = date_val.get_tm(); + time_t epoch_delta = mktime(&t); + writer.Double(epoch_delta * 1000); + } break; } @@ -1461,14 +1470,21 @@ View::write_scalar(t_tscalar scalar, template void View::write_row_path(t_uindex start_row, t_uindex end_row, - bool has_row_path, + bool has_row_path, bool leaves_only, bool is_formatted, rapidjson::Writer& writer) const { - writer.Key("__ROW_PATH__"); - writer.StartArray(); - if (has_row_path) { + writer.Key("__ROW_PATH__"); + writer.StartArray(); + t_uindex depth = m_row_pivots.size(); + for (auto r = start_row; r < end_row; ++r) { + if (has_row_path && leaves_only) { + if (m_ctx->unity_get_row_depth(r) < depth) { + continue; + } + } + writer.StartArray(); const auto row_path = get_row_path(r); @@ -1476,21 +1492,21 @@ View::write_row_path(t_uindex start_row, t_uindex end_row, for (auto entry = row_path.size(); entry > 0; entry--) { const t_tscalar& scalar = row_path[entry - 1]; - write_scalar(scalar, writer); + write_scalar(scalar, is_formatted, writer); } writer.EndArray(); } + writer.EndArray(); } - - writer.EndArray(); } template void View::write_column(t_uindex c, t_uindex start_row, t_uindex end_row, + bool has_row_path, bool leaves_only, bool is_formatted, std::shared_ptr> slice, - std::vector> col_names, + const std::vector>& col_names, rapidjson::Writer& writer) const { std::stringstream column_name; @@ -1503,14 +1519,20 @@ View::write_column(t_uindex c, t_uindex start_row, t_uindex end_row, column_name << col_names[c][col_names[c].size() - 1].get(); const std::string& tmp = column_name.str(); - + t_uindex depth = m_row_pivots.size(); writer.Key(tmp.c_str()); writer.StartArray(); for (auto r = start_row; r < end_row; ++r) { + if (has_row_path && leaves_only) { + if (m_ctx->unity_get_row_depth(r) < depth) { + continue; + } + } + auto scalar = slice->get(r, c); - write_scalar(scalar, writer); + write_scalar(scalar, is_formatted, writer); } writer.EndArray(); @@ -1519,20 +1541,27 @@ View::write_column(t_uindex c, t_uindex start_row, t_uindex end_row, template void View::write_index_column(t_uindex start_row, t_uindex end_row, + bool has_row_path, bool leaves_only, bool is_formatted, std::shared_ptr> slice, rapidjson::Writer& writer) const { - + t_uindex depth = m_row_pivots.size(); writer.Key("__INDEX__"); writer.StartArray(); for (auto r = start_row; r < end_row; ++r) { + if (has_row_path && leaves_only) { + if (m_ctx->unity_get_row_depth(r) < depth) { + continue; + } + } + std::vector keys = slice->get_pkeys(r, 0); writer.StartArray(); for (auto i = keys.size(); i > 0; --i) { auto scalar = keys[i - 1]; - write_scalar(scalar, writer); + write_scalar(scalar, is_formatted, writer); } writer.EndArray(); @@ -1548,8 +1577,8 @@ template <> std::string View::to_columns(t_uindex start_row, t_uindex end_row, t_uindex start_col, t_uindex end_col, t_uindex hidden, bool is_formatted, - bool get_pkeys, bool get_ids, bool leaves_only, t_uindex num_sides, - bool has_row_path, std::string nidx, t_uindex columns_length, + bool get_pkeys, bool get_ids, bool _leaves_only, t_uindex num_sides, + bool _has_row_path, std::string nidx, t_uindex columns_length, t_uindex group_by_length) const { auto slice = get_data(start_row, end_row, start_col, end_col); @@ -1561,8 +1590,14 @@ View::to_columns(t_uindex start_row, t_uindex end_row, writer.StartObject(); + if (start_row == end_row || start_col == end_col) { + writer.EndObject(); + return s.GetString(); + } + for (auto c = start_col; c < end_col; ++c) { - write_column(c, start_row, end_row, slice, col_names, writer); + write_column(c, start_row, end_row, false, false, is_formatted, slice, + col_names, writer); } if (get_ids) { @@ -1574,11 +1609,8 @@ View::to_columns(t_uindex start_row, t_uindex end_row, std::vector> vec{pair}; const auto keys = m_ctx->get_pkeys(vec); const t_tscalar& scalar = keys[0]; - writer.StartArray(); - - write_scalar(scalar, writer); - + write_scalar(scalar, is_formatted, writer); writer.EndArray(); } @@ -1593,37 +1625,35 @@ template <> std::string View::to_columns(t_uindex start_row, t_uindex end_row, t_uindex start_col, t_uindex end_col, t_uindex hidden, bool is_formatted, - bool get_pkeys, bool get_ids, bool leaves_only, t_uindex num_sides, - bool has_row_path, std::string nidx, t_uindex columns_length, + bool get_pkeys, bool get_ids, bool _leaves_only, t_uindex num_sides, + bool _has_row_path, std::string nidx, t_uindex columns_length, t_uindex group_by_length) const { - auto slice = get_data(start_row, end_row, start_col, end_col); auto col_names = slice->get_column_names(); auto schema = m_ctx->get_schema(); - rapidjson::StringBuffer s; rapidjson::Writer writer(s); - writer.StartObject(); - for (auto c = start_col; c < end_col; ++c) { - write_column(c, start_row, end_row, slice, col_names, writer); + write_column(c, start_row, end_row, false, false, is_formatted, slice, + col_names, writer); + } + + if (get_pkeys) { + write_index_column( + start_row, end_row, false, false, is_formatted, slice, writer); } if (get_ids) { writer.Key("__ID__"); writer.StartArray(); - for (auto x = start_row; x < end_row; ++x) { std::pair pair{x, 0}; std::vector> vec{pair}; const auto keys = m_ctx->get_pkeys(vec); const t_tscalar& scalar = keys[0]; - writer.StartArray(); - - write_scalar(scalar, writer); - + write_scalar(scalar, is_formatted, writer); writer.EndArray(); } @@ -1641,29 +1671,21 @@ View::to_columns(t_uindex start_row, t_uindex end_row, bool get_pkeys, bool get_ids, bool leaves_only, t_uindex num_sides, bool has_row_path, std::string nidx, t_uindex columns_length, t_uindex group_by_length) const { - auto slice = get_data(start_row, end_row, start_col, end_col); auto col_names = slice->get_column_names(); - rapidjson::StringBuffer s; rapidjson::Writer writer(s); - writer.StartObject(); - - write_row_path(start_row, end_row, true, writer); - + write_row_path(start_row, end_row, true, leaves_only, is_formatted, writer); if (get_ids) { writer.Key("__ID__"); writer.StartArray(); - for (auto r = start_row; r < end_row; ++r) { writer.StartArray(); const auto row_path = m_ctx->get_row_path(r); - for (auto entry = row_path.size(); entry > 0; entry--) { const t_tscalar& scalar = row_path[entry - 1]; - - write_scalar(scalar, writer); + write_scalar(scalar, is_formatted, writer); } writer.EndArray(); @@ -1672,18 +1694,20 @@ View::to_columns(t_uindex start_row, t_uindex end_row, writer.EndArray(); } + // Hidden columns are always at the end of the column names + // list, and we need to skip them from the output. for (auto c = start_col + 1; c < end_col; ++c) { - // Hidden columns are always at the end of the column names - // list, and we need to skip them from the output. if ((c - 1) > columns_length - hidden) { continue; } else { - write_column(c, start_row, end_row, slice, col_names, writer); + write_column(c, start_row, end_row, true, leaves_only, is_formatted, + slice, col_names, writer); } } if (get_pkeys) { - write_index_column(start_row, end_row, slice, writer); + write_index_column( + start_row, end_row, true, leaves_only, is_formatted, slice, writer); } writer.EndObject(); @@ -1697,30 +1721,22 @@ View::to_columns(t_uindex start_row, t_uindex end_row, bool get_pkeys, bool get_ids, bool leaves_only, t_uindex num_sides, bool has_row_path, std::string nidx, t_uindex columns_length, t_uindex group_by_length) const { - auto slice = get_data(start_row, end_row, start_col, end_col); auto col_names = slice->get_column_names(); - rapidjson::StringBuffer s; rapidjson::Writer writer(s); - writer.StartObject(); - - write_row_path(start_row, end_row, has_row_path, writer); - + write_row_path( + start_row, end_row, has_row_path, leaves_only, is_formatted, writer); if (get_ids) { writer.Key("__ID__"); writer.StartArray(); - for (auto r = start_row; r < end_row; ++r) { writer.StartArray(); - const auto row_path = m_ctx->get_row_path(r); - for (auto entry = row_path.size(); entry > 0; entry--) { const t_tscalar& scalar = row_path[entry - 1]; - - write_scalar(scalar, writer); + write_scalar(scalar, is_formatted, writer); } writer.EndArray(); @@ -1735,12 +1751,14 @@ View::to_columns(t_uindex start_row, t_uindex end_row, if (((c - 1) % (columns_length + hidden)) >= columns_length) { continue; } else { - write_column(c, start_row, end_row, slice, col_names, writer); + write_column(c, start_row, end_row, has_row_path, leaves_only, + is_formatted, slice, col_names, writer); } } if (get_pkeys) { - write_index_column(start_row, end_row, slice, writer); + write_index_column(start_row, end_row, has_row_path, leaves_only, + is_formatted, slice, writer); } writer.EndObject(); diff --git a/cpp/perspective/src/include/perspective/view.h b/cpp/perspective/src/include/perspective/view.h index 95644f2a49..3fa28098c0 100644 --- a/cpp/perspective/src/include/perspective/view.h +++ b/cpp/perspective/src/include/perspective/view.h @@ -129,18 +129,21 @@ class PERSPECTIVE_EXPORT View { std::pair get_min_max( const std::string& colname) const; - void write_scalar(t_tscalar scalar, + void write_scalar(t_tscalar scalar, bool is_formatted, rapidjson::Writer& writer) const; void write_row_path(t_uindex start_row, t_uindex end_row, bool has_row_path, + bool leaves_only, bool is_formatted, rapidjson::Writer& writer) const; void write_column(t_uindex c, t_uindex start_row, t_uindex end_row, + bool has_row_path, bool leaves_only, bool is_formatted, std::shared_ptr> slice, - std::vector> col_names, + const std::vector>& col_names, rapidjson::Writer& writer) const; void write_index_column(t_uindex start_row, t_uindex end_row, + bool has_row_path, bool leaves_only, bool is_formatted, std::shared_ptr> slice, rapidjson::Writer& writer) const; diff --git a/packages/perspective-viewer-d3fc/src/js/plugin/plugin.js b/packages/perspective-viewer-d3fc/src/js/plugin/plugin.js index 95d8ed49ae..276ad946d7 100644 --- a/packages/perspective-viewer-d3fc/src/js/plugin/plugin.js +++ b/packages/perspective-viewer-d3fc/src/js/plugin/plugin.js @@ -267,17 +267,23 @@ export function register(...plugins) { let jsonp, metadata; const leaves_only = chart.plugin.name !== "Sunburst"; if (end_col && end_row) { - jsonp = view.to_json({ + jsonp = view.to_columns_string({ end_row, end_col, leaves_only, }); } else if (end_col) { - jsonp = view.to_json({ end_col, leaves_only }); + jsonp = view.to_columns_string({ + end_col, + leaves_only, + }); } else if (end_row) { - jsonp = view.to_json({ end_row, leaves_only }); + jsonp = view.to_columns_string({ + end_row, + leaves_only, + }); } else { - jsonp = view.to_json({ leaves_only }); + jsonp = view.to_columns_string({ leaves_only }); } metadata = await Promise.all([ @@ -295,10 +301,23 @@ export function register(...plugins) { table_schema, expression_schema, view_schema, - json, + json_string, config, ] = metadata; + let json2 = JSON.parse(json_string); + const keys = Object.keys(json2); + let json = { + row(ridx) { + const obj = {}; + for (const name of keys) { + obj[name] = json2[name][ridx]; + } + + return obj; + }, + }; + this.config = real_config; const realValues = this.config.columns; @@ -317,10 +336,12 @@ export function register(...plugins) { }; const { columns, group_by, split_by, filter } = config; + const first_col = json2[Object.keys(json2)[0]] || []; const filtered = group_by.length > 0 - ? json.reduce( - (acc, col) => { + ? first_col.reduce( + (acc, _, idx) => { + const col = json.row(idx); if ( col.__ROW_PATH__ && col.__ROW_PATH__.length == @@ -345,7 +366,12 @@ export function register(...plugins) { }, { rows: [], aggs: [], agg_paths: [] } ) - : { rows: json }; + : { + rows: first_col.map((_, idx) => + json.row(idx) + ), + }; + const dataMap = (col, i) => !group_by.length ? { ...col, __ROW_PATH__: [i] } diff --git a/packages/perspective-viewer-datagrid/package.json b/packages/perspective-viewer-datagrid/package.json index 97327d1018..03673e0879 100644 --- a/packages/perspective-viewer-datagrid/package.json +++ b/packages/perspective-viewer-datagrid/package.json @@ -32,7 +32,7 @@ "@finos/perspective": "^2.3.2", "@finos/perspective-viewer": "^2.3.2", "chroma-js": "^1.3.4", - "regular-table": "=0.5.7" + "regular-table": "=0.5.9" }, "devDependencies": { "@prospective.co/procss": "^0.1.13", diff --git a/packages/perspective-viewer-datagrid/src/js/data_listener/index.js b/packages/perspective-viewer-datagrid/src/js/data_listener/index.js index 37911622c0..a2d983ed96 100644 --- a/packages/perspective-viewer-datagrid/src/js/data_listener/index.js +++ b/packages/perspective-viewer-datagrid/src/js/data_listener/index.js @@ -45,7 +45,9 @@ export function createDataListener() { id: true, }; - columns = await this._view.to_columns(new_window); + columns = JSON.parse( + await this._view.to_columns_string(new_window) + ); this._last_window = new_window; this._ids = columns.__ID__; diff --git a/packages/perspective/src/js/perspective.js b/packages/perspective/src/js/perspective.js index 30449e83bd..8e33692aa4 100644 --- a/packages/perspective/src/js/perspective.js +++ b/packages/perspective/src/js/perspective.js @@ -17,8 +17,6 @@ import { extract_vector, extract_map, fill_vector } from "./emscripten.js"; import { bindall, get_column_type } from "./utils.js"; import { Server } from "./api/server.js"; -import formatters from "./view_formatters"; - if (typeof self !== "undefined" && self.performance === undefined) { self.performance = { now: Date.now }; } @@ -519,167 +517,6 @@ export default function (Module) { } }; - /** - * Generic base function from which `to_json`, `to_columns` etc. derives. - * - * @private - */ - const to_format = function (options, formatter) { - _call_process(this.table.get_id()); - options = _parse_format_options.bind(this)(options); - const start_row = options.start_row; - const end_row = options.end_row; - const start_col = options.start_col; - const end_col = options.end_col; - const hidden = this._num_hidden(); - - const is_formatted = options.formatted; - const get_pkeys = !!options.index; - const get_ids = !!options.id; - const leaves_only = !!options.leaves_only; - const num_sides = this.sides(); - const has_row_path = num_sides !== 0 && !this.column_only; - const nidx = SIDES[num_sides]; - - let get_from_data_slice; - - if (this.is_unit_context) { - get_from_data_slice = __MODULE__.get_from_data_slice_unit; - } else { - get_from_data_slice = __MODULE__[`get_from_data_slice_${nidx}`]; - } - - const slice = this.get_data_slice( - start_row, - end_row, - start_col, - end_col - ); - const ns = slice.get_column_names(); - const col_names = extract_vector_scalar(ns).map((x) => - x.join(defaults.COLUMN_SEPARATOR_STRING) - ); - const schema = this.schema(); - - let data = formatter.initDataValue(); - - for (let cidx = start_col; cidx < end_col; cidx++) { - const col_name = col_names[cidx]; - formatter.initColumnValue(data, col_name); - } - - for (let ridx = start_row; ridx < end_row; ridx++) { - let row_path = has_row_path ? slice.get_row_path(ridx) : undefined; - if ( - has_row_path && - leaves_only && - row_path.size() < this.config.group_by.length - ) { - row_path.delete(); - continue; - } - let row = formatter.initRowValue(); - - if (get_ids) { - formatter.initColumnRowPath(data, row, "__ID__"); - } - - for (let cidx = start_col; cidx < end_col; cidx++) { - const col_name = col_names[cidx]; - const col_type = schema[col_name]; - const type_config = get_type_config(col_type); - - if (cidx === start_col && num_sides !== 0) { - if (!this.column_only) { - formatter.initColumnRowPath(data, row, "__ROW_PATH__"); - for (let i = 0; i < row_path.size(); i++) { - const s = row_path.get(i); - const value = __MODULE__.scalar_to_val( - s, - false, - false - ); - s.delete(); - formatter.addColumnValue( - data, - row, - "__ROW_PATH__", - value - ); - if (get_ids) { - formatter.addColumnValue( - data, - row, - "__ID__", - value - ); - } - } - } - } else if ( - (cidx - (num_sides > 0 ? 1 : 0)) % - (this.config.columns.length + hidden) >= - this.config.columns.length - ) { - // Hidden columns are always at the end of the column names - // list, and we need to skip them from the output. - continue; - } else { - let value = get_from_data_slice(slice, ridx, cidx); - if (is_formatted && value !== null && value !== undefined) { - if (col_type === "datetime" || col_type === "date") { - // TODO Annoyingly, CSV occupies the gray area of - // needing formatting _just_ for Date and Datetime - - // e.g., 10000 will format as CSV `"10,000.00" - // Otherwise, this would not need to be conditional. - value = new Date(value); - value = value.toLocaleString( - [], - type_config.format - ); - } - } - formatter.setColumnValue(data, row, col_name, value); - } - } - - if (get_pkeys) { - const keys = slice.get_pkeys(ridx, 0); - formatter.initColumnRowPath(data, row, "__INDEX__"); - for (let i = 0; i < keys.size(); i++) { - // TODO: if __INDEX__ and set index have the same value, - // don't we need to make sure that it only emits one? - const s = keys.get(i); - const value = __MODULE__.scalar_to_val(s, false, false); - s.delete(); - formatter.addColumnValue(data, row, "__INDEX__", value); - } - keys.delete(); - } - - // we could add an api to just clone the index column if - // it's already calculated - if (get_ids && num_sides === 0) { - const keys = slice.get_pkeys(ridx, 0); - for (let i = 0; i < keys.size(); i++) { - const s = keys.get(i); - const value = __MODULE__.scalar_to_val(s, false, false); - s.delete(); - formatter.addColumnValue(data, row, "__ID__", value); - } - keys.delete(); - } - - if (row_path) { - row_path.delete(); - } - formatter.addRow(data, row); - } - - slice.delete(); - return formatter.formatData(data, options.config); - }; - /** * Generic base function for returning serialized data for a single column. * @@ -751,25 +588,7 @@ export default function (Module) { * comma-separated column paths. */ view.prototype.to_columns = function (options) { - const schema = this.schema(); - - let parsed_json = JSON.parse(this.to_columns_string(options)); - - const corrected_json = Object.entries(parsed_json).map(([key, val]) => { - let col_type = schema[key]; - let v = val; - - // Convert date epoch numbers. - // Also handle Infinity and -Infinity in floats, - // which are returned as strings since JSON doesn't support them. - if (col_type === "date" || col_type === "float") { - v = val.map((x) => (x !== null ? Number(x) : null)); - } - - return [key, v]; - }); - - return Object.fromEntries(corrected_json); + return JSON.parse(this.to_columns_string(options)); }; /** @@ -777,53 +596,39 @@ export default function (Module) { * save additional round trip serialize/deserialize cycles. */ view.prototype.to_columns_string = function (options) { + _call_process(this.table.get_id()); + options = _parse_format_options.bind(this)(options); + const start_row = options.start_row; + const end_row = options.end_row; + const start_col = options.start_col; + const end_col = options.end_col; + const hidden = this._num_hidden(); + const is_formatted = options.formatted; + const get_pkeys = !!options.index; + const get_ids = !!options.id; + const leaves_only = !!options.leaves_only; const num_sides = this.sides(); - - switch (num_sides) { - case 0: - case 1: - - case 2: - _call_process(this.table.get_id()); - options = _parse_format_options.bind(this)(options); - const start_row = options.start_row; - const end_row = options.end_row; - const start_col = options.start_col; - const end_col = options.end_col; - const hidden = this._num_hidden(); - - const is_formatted = options.formatted; - const get_pkeys = !!options.index; - const get_ids = !!options.id; - const leaves_only = !!options.leaves_only; - const num_sides = this.sides(); - const has_row_path = num_sides !== 0 && !this.column_only; - const nidx = SIDES[num_sides]; - - const config = this.get_config(); - const columns_length = config.columns.length; - const group_by_length = config.group_by.length; - - return this._View.to_columns( - start_row, - end_row, - start_col, - end_col, - hidden, - is_formatted, - get_pkeys, - get_ids, - leaves_only, - num_sides, - has_row_path, - nidx, - columns_length, - group_by_length - ); - - default: - throw new Error("Unknown context type"); - } + const has_row_path = num_sides !== 0 && !this.column_only; + const nidx = SIDES[num_sides]; + const config = this.get_config(); + const columns_length = config.columns.length; + const group_by_length = config.group_by.length; + return this._View.to_columns( + start_row, + end_row, + start_col, + end_col, + hidden, + is_formatted, + get_pkeys, + get_ids, + leaves_only, + num_sides, + has_row_path, + nidx, + columns_length, + group_by_length + ); }; /** @@ -851,7 +656,17 @@ export default function (Module) { * comma-separated column paths. */ view.prototype.to_json = function (options) { - return to_format.call(this, options, formatters.jsonFormatter); + const cols = this.to_columns(options); + const colnames = Object.keys(cols); + const first_col = cols[colnames[0]] || []; + return first_col.map((_, idx) => { + const obj = {}; + for (const key of colnames) { + obj[key] = cols[key][idx]; + } + + return obj; + }); }; /** diff --git a/packages/perspective/src/js/view_formatters.js b/packages/perspective/src/js/view_formatters.js deleted file mode 100644 index 1a27b54bba..0000000000 --- a/packages/perspective/src/js/view_formatters.js +++ /dev/null @@ -1,55 +0,0 @@ -// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ -// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ -// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ -// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ -// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ -// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ -// ┃ Copyright (c) 2017, the Perspective Authors. ┃ -// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ -// ┃ This file is part of the Perspective library, distributed under the terms ┃ -// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ -// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ - -const jsonFormatter = { - initDataValue: () => [], - initRowValue: () => ({}), - initColumnValue: (data, colName) => {}, - initColumnRowPath: (data, row, colName) => (row[colName] = []), - setColumnValue: (data, row, colName, value) => (row[colName] = value), - addColumnValue: (data, row, colName, value) => row[colName].unshift(value), - addRow: (data, row) => data.push(row), - formatData: (data) => data, - slice: (data, start) => data.slice(start), -}; - -const jsonTableFormatter = { - initDataValue: () => new Object(), - initRowValue: () => {}, - initColumnValue: (data, colName) => { - data[colName] = []; - }, - setColumnValue: (data, row, colName, value) => { - data[colName].push(value); - }, - addColumnValue: (data, row, colName, value) => { - data[colName][data[colName].length - 1].unshift(value); - }, - initColumnRowPath: (data, row, colName) => { - data[colName] = data[colName] || []; - data[colName].push([]); - }, - addRow: () => {}, - formatData: (data) => data, - slice: (data, start) => { - let new_data = {}; - for (let x in data) { - new_data[x] = data[x].slice(start); - } - return new_data; - }, -}; - -export default { - jsonFormatter, - jsonTableFormatter, -}; diff --git a/packages/perspective/test/js/expressions/conversions.spec.js b/packages/perspective/test/js/expressions/conversions.spec.js index bf617e2cac..1ff5654253 100644 --- a/packages/perspective/test/js/expressions/conversions.spec.js +++ b/packages/perspective/test/js/expressions/conversions.spec.js @@ -361,10 +361,10 @@ const perspective = require("@finos/perspective"); expect(result["computed12"]).toEqual([null]); expect(result["computed13"]).toEqual([2147483648.1234566]); expect(result["computed14"]).toEqual([-2147483649]); - expect(result["computed15"]).toEqual([Infinity]); - expect(result["computed16"]).toEqual([-Infinity]); - expect(result["computed17"]).toEqual([Infinity]); - expect(result["computed18"]).toEqual([-Infinity]); + expect(result["computed15"]).toEqual([null]); + expect(result["computed16"]).toEqual([null]); + expect(result["computed17"]).toEqual([null]); + expect(result["computed18"]).toEqual([null]); await view.delete(); await table.delete(); diff --git a/packages/perspective/test/js/expressions/functionality.spec.js b/packages/perspective/test/js/expressions/functionality.spec.js index a59802bb64..44d03c7f83 100644 --- a/packages/perspective/test/js/expressions/functionality.spec.js +++ b/packages/perspective/test/js/expressions/functionality.spec.js @@ -2450,7 +2450,6 @@ const perspective = require("@finos/perspective"); "8.5|y": [null, null, null, "d"], "8.5|z": [null, null, null, false], '8.5|"w" + "x"': [null, null, null, 8.5], - __ROW_PATH__: [], }); view.delete(); table.delete(); diff --git a/packages/perspective/test/js/leaks.spec.js b/packages/perspective/test/js/leaks.spec.js index 2e236d7891..d77659dd89 100644 --- a/packages/perspective/test/js/leaks.spec.js +++ b/packages/perspective/test/js/leaks.spec.js @@ -87,6 +87,17 @@ test.describe("leaks", function () { view.delete(); table.delete(); }); + + test("to_columns_string does not leak", async () => { + const table = await perspective.table(arr.slice()); + const view = await table.view({ group_by: ["State"] }); + await leak_test(async function () { + let json = await view.to_columns_string(); + expect(json.length).toEqual(6722); + }); + view.delete(); + table.delete(); + }); }); }); @@ -111,6 +122,19 @@ test.describe("leaks", function () { view.delete(); table.delete(); }); + + test.skip("csv loading does not leak", async () => { + const table = await perspective.table(arr.slice()); + const view = await table.view(); + const csv = await view.to_csv({ end_row: 10 }); + view.delete(); + table.delete(); + await leak_test(async function () { + const table = await perspective.table(csv); + expect(await table.size()).toEqual(10); + await table.delete(); + }); + }); }); test.describe("expression columns", function () { diff --git a/packages/perspective/test/js/pivots.spec.js b/packages/perspective/test/js/pivots.spec.js index efaeeebe90..f33fb10240 100644 --- a/packages/perspective/test/js/pivots.spec.js +++ b/packages/perspective/test/js/pivots.spec.js @@ -358,7 +358,7 @@ const std = (nums) => { "null, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "null, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - null, + "null", ], }; let result = await view.to_columns(); @@ -2355,7 +2355,7 @@ const std = (nums) => { table.delete(); }); - test("['z'] only, datetime column", async function ({ page }) { + test("['z'] only, datetime column", async function () { var table = await perspective.table(data_8); var view = await table.view({ split_by: ["z"], @@ -2364,7 +2364,6 @@ const std = (nums) => { let result2 = await view.to_columns(); expect(result2).toEqual({ - __ROW_PATH__: [], "2019-04-11 23:40:35.065|x": [null, null, 3, 4], "2019-04-11 23:40:35.065|y": [null, null, "c", "d"], "2019-04-13 03:27:15.065|x": [1, 2, null, null], @@ -2434,7 +2433,6 @@ const std = (nums) => { }); let result2 = await view.to_columns(); expect(result2).toEqual({ - __ROW_PATH__: [], "true|w": [1.5, null, 3.5, null], "true|x": [1, null, 3, null], "true|y": ["a", null, "c", null], diff --git a/packages/perspective/test/js/sort.spec.js b/packages/perspective/test/js/sort.spec.js index 4913efa52a..1d18c5f083 100644 --- a/packages/perspective/test/js/sort.spec.js +++ b/packages/perspective/test/js/sort.spec.js @@ -575,7 +575,6 @@ const data3 = { const paths = await view.column_paths(); expect(paths).toEqual(["d|w", "c|w", "b|w", "a|w"]); const answer = { - __ROW_PATH__: [], "d|w": [null, null, null, 4.5, null, null, null, 8.5], "c|w": [null, null, 3.5, null, null, null, 7.5, null], "b|w": [null, 2.5, null, null, null, 6.5, null, null], @@ -604,7 +603,6 @@ const data3 = { const result = await view.to_columns(); expect(result).toEqual({ - __ROW_PATH__: [], "a|x": [null, 1, 2, 3], "b|x": [4, null, null, null], }); @@ -628,7 +626,6 @@ const data3 = { const result = await view.to_columns(); expect(result).toEqual({ - __ROW_PATH__: [], "b|x": [null, null, null, 4], "a|x": [1, 2, 3, null], }); @@ -659,7 +656,6 @@ const data3 = { const result = await view.to_columns(); expect(result).toEqual({ - __ROW_PATH__: [], "a|x": [null, 1, 2, 3], "b|x": [4, null, null, null], }); @@ -689,7 +685,6 @@ const data3 = { const result = await view.to_columns(); expect(result).toEqual({ - __ROW_PATH__: [], "b|x": [null, null, null, 4], "a|x": [1, 2, 3, null], }); @@ -715,7 +710,6 @@ const data3 = { let result = await view.to_columns(); expect(result).toEqual({ - __ROW_PATH__: [], "b|x": [null, null, null, 4], "a|x": [1, 2, 3, null], }); diff --git a/packages/perspective/test/js/to_format.spec.js b/packages/perspective/test/js/to_format.spec.js index 3bb9e258ff..82916c0500 100644 --- a/packages/perspective/test/js/to_format.spec.js +++ b/packages/perspective/test/js/to_format.spec.js @@ -82,7 +82,7 @@ const pivoted_output = [ let json = await view.to_json({ start_col: 5, }); - expect(json).toEqual([{}, {}, {}, {}]); + expect(json).toEqual([]); view.delete(); table.delete(); }); @@ -126,7 +126,7 @@ const pivoted_output = [ start_col: 2, }); - expect(json).toEqual([{}, {}, {}, {}]); + expect(json).toEqual([]); view.delete(); table.delete(); }); @@ -431,8 +431,8 @@ const pivoted_output = [ let view = await table.view(); let json = await view.to_json({ formatted: true }); expect(json).toEqual([ - { datetime: "6/13/16" }, - { datetime: "6/14/16" }, + { datetime: "2016-06-13" }, + { datetime: "2016-06-14" }, ]); view.delete(); table.delete(); @@ -445,14 +445,9 @@ const pivoted_output = [ ]); let view = await table.view(); let json = await view.to_json({ formatted: true }); - json = json.map((obj) => { - obj.datetime = obj.datetime.replace(/[^:,\/|A-Z0-9 ]/gi, " "); - return obj; - }, {}); - expect(json).toEqual([ - { datetime: "1/1/16, 12:30:00 AM" }, - { datetime: "6/15/16, 7:20:00 PM" }, + { datetime: "2016-01-01 00:30:00.000" }, + { datetime: "2016-06-15 19:20:00.000" }, ]); view.delete(); table.delete(); diff --git a/packages/perspective/test/js/to_format_viewport.spec.js b/packages/perspective/test/js/to_format_viewport.spec.js index d5178b2c60..0fb774bebd 100644 --- a/packages/perspective/test/js/to_format_viewport.spec.js +++ b/packages/perspective/test/js/to_format_viewport.spec.js @@ -188,7 +188,6 @@ test.describe("to_format viewport", function () { }); const cols = await view.to_columns({ start_col: 0, end_col: 1 }); expect(cols).toEqual({ - __ROW_PATH__: [], "false|w": [ null, 2.5, @@ -219,7 +218,6 @@ test.describe("to_format viewport", function () { }); const cols = await view.to_columns({ start_col: 1, end_col: 2 }); expect(cols).toEqual({ - __ROW_PATH__: [], "false|x": [ null, 2, @@ -250,7 +248,6 @@ test.describe("to_format viewport", function () { }); const cols = await view.to_columns({ start_col: 0, end_col: 2 }); expect(cols).toEqual({ - __ROW_PATH__: [], "false|w": [ null, 2.5, diff --git a/python/perspective/bench/runtime/perspective_benchmark.py b/python/perspective/bench/runtime/perspective_benchmark.py index 7c962bf789..994c1d574b 100644 --- a/python/perspective/bench/runtime/perspective_benchmark.py +++ b/python/perspective/bench/runtime/perspective_benchmark.py @@ -184,9 +184,12 @@ def benchmark_to_format_zero(self): for name in ( "arrow", "csv", + "columns", + "records", ): - test_meta = make_meta("to_format", "to_{}".format(name)) - func = Benchmark(lambda: getattr(self._view, "to_{0}".format(name))(), meta=test_meta) + method = "to_{0}".format(name) + test_meta = make_meta("to_format", method) + func = Benchmark(getattr(self._view, method), meta=test_meta) setattr(self, "to_format_{0}".format(name), func) def benchmark_to_format_one(self): @@ -194,13 +197,16 @@ def benchmark_to_format_one(self): for name in ( "arrow", "csv", + "columns", + "records", ): for pivot in PerspectiveBenchmark.group_by_OPTIONS: if len(pivot) == 0: continue test_meta = make_meta("to_format", "to_{0}_r{1}".format(name, len(pivot))) view = self._table.view(group_by=pivot) - func = Benchmark(lambda: getattr(view, "to_{0}".format(name))(), meta=test_meta) + method = "to_{0}".format(name) + func = Benchmark(getattr(view, method), meta=test_meta) setattr(self, "to_format_{0}".format(test_meta["name"]), func) def benchmark_to_format_two(self): @@ -208,6 +214,8 @@ def benchmark_to_format_two(self): for name in ( "arrow", "csv", + "columns", + "records", ): for i in range(len(PerspectiveBenchmark.group_by_OPTIONS)): RP = PerspectiveBenchmark.group_by_OPTIONS[i] @@ -216,7 +224,8 @@ def benchmark_to_format_two(self): continue test_meta = make_meta("to_format", "to_{0}_r{1}_c{2}".format(name, len(RP), len(CP))) view = self._table.view(group_by=RP, split_by=CP) - func = Benchmark(lambda: getattr(view, "to_{0}".format(name))(), meta=test_meta) + method = "to_{0}".format(name) + func = Benchmark(getattr(view, method), meta=test_meta) setattr(self, "to_format_{0}".format(test_meta["name"]), func) diff --git a/python/perspective/bench/runtime/run_perspective_benchmark.py b/python/perspective/bench/runtime/run_perspective_benchmark.py index 4d1d525d62..f924fde7fd 100644 --- a/python/perspective/bench/runtime/run_perspective_benchmark.py +++ b/python/perspective/bench/runtime/run_perspective_benchmark.py @@ -19,6 +19,7 @@ """Benchmark the `perspective-python` runtime locally.""" VERSIONS = [ "master", + "2.3.2", "2.3.1", # "2.3.0", "2.2.1", @@ -28,7 +29,7 @@ # Access the benchmark virtualenv HERE = os.path.abspath(os.path.dirname(__file__)) VIRTUALENV_NAME = "benchmark_venv" - VIRTUALENV_PATH = os.path.join(HERE, VIRTUALENV_NAME) + VIRTUALENV_PATH = os.path.join(HERE, "..", "..", "..", "..", VIRTUALENV_NAME) venv_handler = VirtualEnvHandler(VIRTUALENV_PATH) print("Benchmarking perspective-python==master") diff --git a/python/perspective/perspective/client/view_api.py b/python/perspective/perspective/client/view_api.py index fa4097efec..1f148b782b 100644 --- a/python/perspective/perspective/client/view_api.py +++ b/python/perspective/perspective/client/view_api.py @@ -153,3 +153,6 @@ def to_json(self, **kwargs): def to_columns(self, **kwargs): return self._async_queue("to_columns", "view_method", **kwargs) + + def to_columns_string(self, **kwargs): + return self._async_queue("to_columns_string", "view_method", **kwargs) diff --git a/python/perspective/perspective/include/perspective/python.h b/python/perspective/perspective/include/perspective/python.h index a021acadb2..f8f2b5ba42 100644 --- a/python/perspective/perspective/include/perspective/python.h +++ b/python/perspective/perspective/include/perspective/python.h @@ -112,6 +112,7 @@ PYBIND11_MODULE(libpsppy, m) { .def("get_min_max", &View::get_min_max) .def("get_step_delta", &View::get_step_delta) .def("get_column_dtype", &View::get_column_dtype) + .def("to_columns", &View::to_columns) .def("is_column_only", &View::is_column_only); py::class_, std::shared_ptr>>(m, "View_ctx0") @@ -136,6 +137,7 @@ PYBIND11_MODULE(libpsppy, m) { .def("get_min_max", &View::get_min_max) .def("get_step_delta", &View::get_step_delta) .def("get_column_dtype", &View::get_column_dtype) + .def("to_columns", &View::to_columns) .def("is_column_only", &View::is_column_only); py::class_, std::shared_ptr>>(m, "View_ctx1") @@ -163,6 +165,7 @@ PYBIND11_MODULE(libpsppy, m) { .def("get_min_max", &View::get_min_max) .def("get_step_delta", &View::get_step_delta) .def("get_column_dtype", &View::get_column_dtype) + .def("to_columns", &View::to_columns) .def("is_column_only", &View::is_column_only); py::class_, std::shared_ptr>>(m, "View_ctx2") @@ -191,6 +194,7 @@ PYBIND11_MODULE(libpsppy, m) { .def("get_row_path", &View::get_row_path) .def("get_step_delta", &View::get_step_delta) .def("get_column_dtype", &View::get_column_dtype) + .def("to_columns", &View::to_columns) .def("is_column_only", &View::is_column_only); /****************************************************************************** diff --git a/python/perspective/perspective/table/_data_formatter.py b/python/perspective/perspective/table/_data_formatter.py index e2f7c86909..ae59757152 100644 --- a/python/perspective/perspective/table/_data_formatter.py +++ b/python/perspective/perspective/table/_data_formatter.py @@ -213,8 +213,8 @@ def _parse_format_options(view, options): "end_col": int( ceil( min( - (options.get("end_col", max_cols) + column_only_offset) * (view._num_hidden_cols() + 1), max_cols, + (options.get("end_col") + column_only_offset if "end_col" in options else max_cols) * (view._num_hidden_cols() + 1), ) ) ), diff --git a/python/perspective/perspective/table/view.py b/python/perspective/perspective/table/view.py index 5abdf837c0..3cca41fd33 100644 --- a/python/perspective/perspective/table/view.py +++ b/python/perspective/perspective/table/view.py @@ -11,6 +11,8 @@ # ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ import pandas +import json +import datetime from functools import partial, wraps from random import random @@ -487,9 +489,34 @@ def to_records(self, **kwargs): represents a row of the current state of the :class:`~perspective.View`. """ - return to_format(kwargs, self, "records") + columns = self.to_columns(**kwargs) + colnames = list(columns.keys()) + if len(colnames) > 0: + if colnames[0] in columns: + nrows = len(columns[colnames[0]]) + return [{key: columns[key][i] for key in colnames} for i in range(nrows)] + return [] + + def to_columns_string(self, **kwargs): + options = _parse_format_options(self, kwargs) + return self._view.to_columns( + options["start_row"], + options["end_row"], + options["start_col"], + options["end_col"], + self._num_hidden_cols(), + kwargs.get("formatted", False), + kwargs.get("index", False), + kwargs.get("id", False), + kwargs.get("leaves_only", False), + self._sides, + self._sides != 0 and not self._column_only, + "zero" if self._sides == 0 else "one" if self._sides == 1 else "two", + len(self._config.get_columns()), + len(self._config.get_group_by()), + ) - def to_dict(self, **options): + def to_dict(self, **kwargs): """Serialize the :class:`~perspective.View`'s dataset into a :obj:`dict` of :obj:`str` keys and :obj:`list` values. Each key is a column name, and the associated value is the column's data packed into a :obj:`list`. @@ -514,7 +541,43 @@ def to_dict(self, **options): :obj:`dict`: A dictionary with string keys and list values, where key = column name and value = column values. """ - return to_format(options, self, "dict") + data = json.loads(self.to_columns_string(**kwargs)) + schema = self.schema(True) + table_schema = self._table.schema(True) + out = {} + + for name, col in data.items(): + if schema.get(name.split("|")[-1], "") in ( + "date", + "datetime", + ) or schema.get( + name, "" + ) in ("date", "datetime"): + out[name] = list( + map( + lambda x: datetime.datetime.fromtimestamp(x / 1000) if x is not None else None, + col, + ) + ) + else: + out[name] = col + + for idx, name in enumerate(self._config.get_group_by()): + if table_schema.get(name, "") in ("date", "datetime"): + row_path_col = out["__ROW_PATH__"] + for row in row_path_col: + if idx < len(row): + row[idx] = datetime.datetime.fromtimestamp(row[idx] / 1000) if row[idx] is not None else None + + if kwargs.get("index", False) and table_schema.get(self._table._index, "") in ( + "date", + "datetime", + ): + row_path_col = out["__INDEX__"] + for idx in range(len(row_path_col)): + row_path_col[idx][0] = datetime.datetime.fromtimestamp(row_path_col[idx][0] / 1000) if row_path_col[idx][0] is not None else None + + return out def to_numpy(self, **options): """Serialize the view's dataset into a :obj:`dict` of :obj:`str` keys diff --git a/python/perspective/perspective/tests/manager/test_manager.py b/python/perspective/perspective/tests/manager/test_manager.py index dddccfdd46..bff022bd16 100644 --- a/python/perspective/perspective/tests/manager/test_manager.py +++ b/python/perspective/perspective/tests/manager/test_manager.py @@ -562,10 +562,7 @@ def test_manager_to_dict_with_nan(self, util, sentinel): def handle_to_dict(msg): s.set(True) message = json.loads(msg) - assert message == { - "id": 2, - "error": "JSON serialization error: Cannot serialize `NaN`, `Infinity` or `-Infinity` to JSON.", - } + assert message == {"id": 2, "data": {"a": [1.5, None, 2.5, None]}} message = {"id": 1, "table_name": "table1", "view_name": "view1", "cmd": "view"} manager = PerspectiveManager() diff --git a/python/perspective/perspective/tests/table/test_to_arrow.py b/python/perspective/perspective/tests/table/test_to_arrow.py index d48cab303a..b733b6dd70 100644 --- a/python/perspective/perspective/tests/table/test_to_arrow.py +++ b/python/perspective/perspective/tests/table/test_to_arrow.py @@ -25,7 +25,15 @@ def test_to_arrow_nones_symmetric(self): assert tbl2.view().to_dict() == data def test_to_arrow_big_numbers_symmetric(self): - data = {"a": [1, 2, 3, 4], "b": [1.7976931348623157e308, 1.7976931348623157e308, 1.7976931348623157e308, 1.7976931348623157e308]} + data = { + "a": [1, 2, 3, 4], + "b": [ + 1.7976931348623157e308, + 1.7976931348623157e308, + 1.7976931348623157e308, + 1.7976931348623157e308, + ], + } tbl = Table(data) assert tbl.schema() == {"a": int, "b": float} arr = tbl.view().to_arrow() @@ -185,7 +193,7 @@ def test_to_arrow_start_end_row_equiv(self): assert tbl.schema() == {"a": int, "b": float} arr = tbl.view().to_arrow(start_row=2, end_row=2) tbl2 = Table(arr) - assert tbl2.view().to_dict() == {} + assert tbl2.view().to_dict() == {"a": [], "b": []} def test_to_arrow_start_row_invalid(self): data = {"a": [None, 1, None, 2, 3], "b": [1.5, 2.5, None, 3.5, None]} diff --git a/python/perspective/perspective/tests/table/test_to_format.py b/python/perspective/perspective/tests/table/test_to_format.py index 36d41bd789..be049c40f4 100644 --- a/python/perspective/perspective/tests/table/test_to_format.py +++ b/python/perspective/perspective/tests/table/test_to_format.py @@ -50,7 +50,10 @@ def test_to_records_date(self): data = [{"a": today, "b": "string2"}, {"a": today, "b": "string4"}] tbl = Table(data) view = tbl.view() - assert view.to_records() == [{"a": dt, "b": "string2"}, {"a": dt, "b": "string4"}] + assert view.to_records() == [ + {"a": dt, "b": "string2"}, + {"a": dt, "b": "string4"}, + ] def test_to_records_date_no_dst(self): # make sure that DST does not affect the way we read dates - if tm_dst in `t_date::get_tm()` isn't set to -1, it could reverse 1hr by assuming DST is not in effect. @@ -59,27 +62,45 @@ def test_to_records_date_no_dst(self): data = [{"a": today, "b": "string2"}, {"a": today, "b": "string4"}] tbl = Table(data) view = tbl.view() - assert view.to_records() == [{"a": dt, "b": "string2"}, {"a": dt, "b": "string4"}] + assert view.to_records() == [ + {"a": dt, "b": "string2"}, + {"a": dt, "b": "string4"}, + ] def test_to_records_date_str(self): - data = [{"a": "03/11/2019", "b": "string2"}, {"a": "03/12/2019", "b": "string4"}] + data = [ + {"a": "03/11/2019", "b": "string2"}, + {"a": "03/12/2019", "b": "string4"}, + ] tbl = Table(data) view = tbl.view() - assert view.to_records() == [{"a": datetime(2019, 3, 11), "b": "string2"}, {"a": datetime(2019, 3, 12), "b": "string4"}] + assert view.to_records() == [ + {"a": datetime(2019, 3, 11), "b": "string2"}, + {"a": datetime(2019, 3, 12), "b": "string4"}, + ] def test_to_records_date_str_month_first(self): data = [{"a": "1/2/2019", "b": "string2"}, {"a": "3/4/2019", "b": "string4"}] tbl = Table(data) view = tbl.view() assert view.schema() == {"a": date, "b": str} - assert view.to_records() == [{"a": datetime(2019, 1, 2), "b": "string2"}, {"a": datetime(2019, 3, 4), "b": "string4"}] + assert view.to_records() == [ + {"a": datetime(2019, 1, 2), "b": "string2"}, + {"a": datetime(2019, 3, 4), "b": "string4"}, + ] def test_to_records_date_str_month_ymd(self): - data = [{"a": "2019/01/02", "b": "string2"}, {"a": "2019/03/04", "b": "string4"}] + data = [ + {"a": "2019/01/02", "b": "string2"}, + {"a": "2019/03/04", "b": "string4"}, + ] tbl = Table(data) view = tbl.view() assert view.schema() == {"a": date, "b": str} - assert view.to_records() == [{"a": datetime(2019, 1, 2), "b": "string2"}, {"a": datetime(2019, 3, 4), "b": "string4"}] + assert view.to_records() == [ + {"a": datetime(2019, 1, 2), "b": "string2"}, + {"a": datetime(2019, 3, 4), "b": "string4"}, + ] def test_to_records_datetime(self): dt = datetime(2019, 9, 10, 19, 30, 59, 515000) @@ -89,10 +110,16 @@ def test_to_records_datetime(self): assert view.to_records() == data # should have symmetric input/output def test_to_records_datetime_str(self): - data = [{"a": "03/11/2019 3:15PM", "b": "string2"}, {"a": "3/11/2019 3:20PM", "b": "string4"}] + data = [ + {"a": "03/11/2019 3:15PM", "b": "string2"}, + {"a": "3/11/2019 3:20PM", "b": "string4"}, + ] tbl = Table(data) view = tbl.view() - assert view.to_records() == [{"a": datetime(2019, 3, 11, 15, 15), "b": "string2"}, {"a": datetime(2019, 3, 11, 15, 20), "b": "string4"}] + assert view.to_records() == [ + {"a": datetime(2019, 3, 11, 15, 15), "b": "string2"}, + {"a": datetime(2019, 3, 11, 15, 20), "b": "string4"}, + ] def test_to_records_datetime_str_tz(self): dt = "2019/07/25T15:30:00+00:00" @@ -102,13 +129,19 @@ def test_to_records_datetime_str_tz(self): records = view.to_records() for r in records: r["a"] = r["a"].replace(tzinfo=pytz.utc) - assert records == [{"a": datetime(2019, 7, 25, 15, 30, tzinfo=pytz.utc)}, {"a": datetime(2019, 7, 25, 15, 30, tzinfo=pytz.utc)}] + assert records == [ + {"a": datetime(2019, 7, 25, 15, 30, tzinfo=pytz.utc)}, + {"a": datetime(2019, 7, 25, 15, 30, tzinfo=pytz.utc)}, + ] def test_to_records_datetime_ms_str(self): data = [{"a": "03/11/2019 3:15:15.999PM"}, {"a": "3/11/2019 3:15:16.001PM"}] tbl = Table(data) view = tbl.view() - assert view.to_records() == [{"a": datetime(2019, 3, 11, 15, 15, 15, 999000)}, {"a": datetime(2019, 3, 11, 15, 15, 16, 1000)}] + assert view.to_records() == [ + {"a": datetime(2019, 3, 11, 15, 15, 15, 999000)}, + {"a": datetime(2019, 3, 11, 15, 15, 16, 1000)}, + ] def test_to_records_none(self): data = [{"a": None, "b": 1}, {"a": None, "b": 2}] @@ -120,15 +153,30 @@ def test_to_records_one(self): data = [{"a": 1, "b": "string1"}, {"a": 1, "b": "string2"}] tbl = Table(data) view = tbl.view(group_by=["a"]) - assert view.to_records() == [{"__ROW_PATH__": [], "a": 2, "b": 2}, {"__ROW_PATH__": [1], "a": 2, "b": 2}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "a": 2, "b": 2}, + {"__ROW_PATH__": [1], "a": 2, "b": 2}, + ] def test_to_records_two(self): data = [{"a": 1, "b": "string1"}, {"a": 1, "b": "string2"}] tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"]) assert view.to_records() == [ - {"__ROW_PATH__": [], "string1|a": 1, "string1|b": 1, "string2|a": 1, "string2|b": 1}, - {"__ROW_PATH__": [1], "string1|a": 1, "string1|b": 1, "string2|a": 1, "string2|b": 1}, + { + "__ROW_PATH__": [], + "string1|a": 1, + "string1|b": 1, + "string2|a": 1, + "string2|b": 1, + }, + { + "__ROW_PATH__": [1], + "string1|a": 1, + "string1|b": 1, + "string2|a": 1, + "string2|b": 1, + }, ] def test_to_records_column_only(self): @@ -136,8 +184,18 @@ def test_to_records_column_only(self): tbl = Table(data) view = tbl.view(split_by=["b"]) assert view.to_records() == [ - {"string1|a": 1, "string1|b": "string1", "string2|a": None, "string2|b": None}, - {"string1|a": None, "string1|b": None, "string2|a": 1, "string2|b": "string2"}, + { + "string1|a": 1, + "string1|b": "string1", + "string2|a": None, + "string2|b": None, + }, + { + "string1|a": None, + "string1|b": None, + "string2|a": 1, + "string2|b": "string2", + }, ] # to_dict @@ -179,7 +237,10 @@ def test_to_dict_string(self): data = [{"a": "string1", "b": "string2"}, {"a": "string3", "b": "string4"}] tbl = Table(data) view = tbl.view() - assert view.to_dict() == {"a": ["string1", "string3"], "b": ["string2", "string4"]} + assert view.to_dict() == { + "a": ["string1", "string3"], + "b": ["string2", "string4"], + } def test_to_dict_none(self): data = [{"a": None, "b": None}, {"a": None, "b": None}] @@ -197,7 +258,11 @@ def test_to_dict_two(self): data = [{"a": 1, "b": 2}, {"a": 1, "b": 2}] tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"]) - assert view.to_dict() == {"__ROW_PATH__": [[], [1]], "2|a": [2, 2], "2|b": [4, 4]} + assert view.to_dict() == { + "__ROW_PATH__": [[], [1]], + "2|a": [2, 2], + "2|b": [4, 4], + } def test_to_dict_column_only(self): data = [{"a": 1, "b": 2}, {"a": 1, "b": 2}] @@ -344,7 +409,11 @@ def test_to_records_one_over_max_row(self): tbl = Table(data) view = tbl.view(group_by=["a"]) records = view.to_records(end_row=1000) - assert records == [{"__ROW_PATH__": [], "a": 5, "b": 7}, {"__ROW_PATH__": [1.5], "a": 1.5, "b": 2.5}, {"__ROW_PATH__": [3.5], "a": 3.5, "b": 4.5}] + assert records == [ + {"__ROW_PATH__": [], "a": 5, "b": 7}, + {"__ROW_PATH__": [1.5], "a": 1.5, "b": 2.5}, + {"__ROW_PATH__": [3.5], "a": 3.5, "b": 4.5}, + ] def test_to_records_two_over_max_row(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] @@ -427,42 +496,58 @@ def test_to_records_zero_start_gt_end_col(self): tbl = Table(data) view = tbl.view() records = view.to_records(start_col=2, end_col=1) - assert records == [{}, {}] + assert records == [] def test_to_records_zero_start_eq_end_col(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data) view = tbl.view() records = view.to_records(start_col=1, end_col=1) - assert records == [{}, {}] + assert records == [] def test_to_records_one_over_max_col(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data) view = tbl.view(group_by=["a"]) records = view.to_records(end_col=1000) - assert records == [{"__ROW_PATH__": [], "a": 5, "b": 7}, {"__ROW_PATH__": [1.5], "a": 1.5, "b": 2.5}, {"__ROW_PATH__": [3.5], "a": 3.5, "b": 4.5}] + assert records == [ + {"__ROW_PATH__": [], "a": 5, "b": 7}, + {"__ROW_PATH__": [1.5], "a": 1.5, "b": 2.5}, + {"__ROW_PATH__": [3.5], "a": 3.5, "b": 4.5}, + ] def test_to_records_one_start_gt_end_col(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data) view = tbl.view(group_by=["a"]) records = view.to_records(start_col=2, end_col=1) - assert records == [{}, {}, {}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [1.5]}, + {"__ROW_PATH__": [3.5]}, + ] def test_to_records_one_start_gt_end_col_large(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data) view = tbl.view(group_by=["a"]) records = view.to_records(start_col=20, end_col=19) - assert records == [{}, {}, {}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [1.5]}, + {"__ROW_PATH__": [3.5]}, + ] def test_to_records_one_start_eq_end_col(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data) view = tbl.view(group_by=["a"]) records = view.to_records(start_col=0, end_col=0) - assert records == [{"__ROW_PATH__": []}, {"__ROW_PATH__": [1.5]}, {"__ROW_PATH__": [3.5]}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [1.5]}, + {"__ROW_PATH__": [3.5]}, + ] def test_to_records_two_over_max_col(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] @@ -505,56 +590,88 @@ def test_to_records_two_start_gt_end_col(self): tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"]) records = view.to_records(end_row=12, start_col=5, end_col=4) - assert records == [{}, {}, {}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [1]}, + {"__ROW_PATH__": [3]}, + ] def test_to_records_two_start_gt_end_col_large_overage(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"]) records = view.to_records(end_row=12, start_col=50, end_col=49) - assert records == [{}, {}, {}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [1]}, + {"__ROW_PATH__": [3]}, + ] def test_to_records_two_start_end_col_equiv(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"]) records = view.to_records(end_row=12, start_col=5, end_col=5) - assert records == [{}, {}, {}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [1]}, + {"__ROW_PATH__": [3]}, + ] def test_to_records_two_sorted_start_gt_end_col(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"], sort=[["a", "desc"]]) records = view.to_records(end_row=12, start_col=5, end_col=4) - assert records == [{}, {}, {}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [3]}, + {"__ROW_PATH__": [1]}, + ] def test_to_records_two_sorted_start_gt_end_col_large_overage(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"], sort=[["a", "desc"]]) records = view.to_records(end_row=12, start_col=20, end_col=30) - assert records == [{}, {}, {}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [3]}, + {"__ROW_PATH__": [1]}, + ] def test_to_records_two_sorted_start_gt_end_col_overage(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data) view = tbl.view(columns=[], group_by=["a"], split_by=["b"], sort=[["a", "desc"]]) records = view.to_records(end_row=12, start_col=1, end_col=3) - assert records == [{}, {}, {}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [3]}, + {"__ROW_PATH__": [1]}, + ] def test_to_records_two_sorted_start_end_col(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"], sort=[["a", "desc"]]) records = view.to_records(start_col=1, end_col=2) - assert records == [{"2|b": 2, "__ROW_PATH__": []}, {"2|b": None, "__ROW_PATH__": [3]}, {"2|b": 2, "__ROW_PATH__": [1]}] + assert records == [ + {"2|b": 2, "__ROW_PATH__": []}, + {"2|b": None, "__ROW_PATH__": [3]}, + {"2|b": 2, "__ROW_PATH__": [1]}, + ] def test_to_records_two_sorted_start_end_col_equiv(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"], sort=[["a", "desc"]]) records = view.to_records(end_row=12, start_col=5, end_col=5) - assert records == [{}, {}, {}] + assert records == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [3]}, + {"__ROW_PATH__": [1]}, + ] def test_to_records_start_col_end_col(self): data = [{"a": 1, "b": 2, "c": 3}, {"a": 3, "b": 4, "c": 5}] @@ -569,7 +686,7 @@ def test_to_records_start_col_end_col_equiv(self): tbl = Table(data) view = tbl.view() records = view.to_records(start_col=1, end_col=1) - assert records == [{}, {}] + assert records == [] def test_to_records_floor_start_col(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] @@ -728,25 +845,39 @@ def test_to_format_implicit_index_records(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data) view = tbl.view() - assert view.to_records(index=True) == [{"__INDEX__": [0], "a": 1.5, "b": 2.5}, {"__INDEX__": [1], "a": 3.5, "b": 4.5}] + assert view.to_records(index=True) == [ + {"__INDEX__": [0], "a": 1.5, "b": 2.5}, + {"__INDEX__": [1], "a": 3.5, "b": 4.5}, + ] def test_to_format_implicit_index_dict(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data) view = tbl.view() - assert view.to_dict(index=True) == {"__INDEX__": [[0], [1]], "a": [1.5, 3.5], "b": [2.5, 4.5]} + assert view.to_dict(index=True) == { + "__INDEX__": [[0], [1]], + "a": [1.5, 3.5], + "b": [2.5, 4.5], + } def test_to_format_implicit_id_records(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data) view = tbl.view() - assert view.to_records(id=True) == [{"__ID__": [0], "a": 1.5, "b": 2.5}, {"__ID__": [1], "a": 3.5, "b": 4.5}] + assert view.to_records(id=True) == [ + {"__ID__": [0], "a": 1.5, "b": 2.5}, + {"__ID__": [1], "a": 3.5, "b": 4.5}, + ] def test_to_format_implicit_id_dict(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data) view = tbl.view() - assert view.to_dict(id=True) == {"__ID__": [[0], [1]], "a": [1.5, 3.5], "b": [2.5, 4.5]} + assert view.to_dict(id=True) == { + "__ID__": [[0], [1]], + "a": [1.5, 3.5], + "b": [2.5, 4.5], + } def test_to_format_implicit_index_two_dict(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] @@ -757,7 +888,11 @@ def test_to_format_implicit_index_two_dict(self): "2.5|b": [2.5, 2.5, None], "4.5|a": [3.5, None, 3.5], "4.5|b": [4.5, None, 4.5], - "__INDEX__": [[], [], []], # index needs to be the same length as each column + "__INDEX__": [ + [], + [], + [], + ], # index needs to be the same length as each column "__ROW_PATH__": [[], [1.5], [3.5]], } @@ -770,7 +905,11 @@ def test_to_format_implicit_index_two_dict(self): "2.5|b": [2.5, 2.5, None], "4.5|a": [3.5, None, 3.5], "4.5|b": [4.5, None, 4.5], - "__ID__": [[], [1.5], [3.5]], # index needs to be the same length as each column + "__ID__": [ + [], + [1.5], + [3.5], + ], # index needs to be the same length as each column "__ROW_PATH__": [[], [1.5], [3.5]], } @@ -785,13 +924,20 @@ def test_to_format_explicit_index_records(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data, index="a") view = tbl.view() - assert view.to_records(index=True) == [{"__INDEX__": [1.5], "a": 1.5, "b": 2.5}, {"__INDEX__": [3.5], "a": 3.5, "b": 4.5}] + assert view.to_records(index=True) == [ + {"__INDEX__": [1.5], "a": 1.5, "b": 2.5}, + {"__INDEX__": [3.5], "a": 3.5, "b": 4.5}, + ] def test_to_format_explicit_index_dict(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] tbl = Table(data, index="a") view = tbl.view() - assert view.to_dict(index=True) == {"__INDEX__": [[1.5], [3.5]], "a": [1.5, 3.5], "b": [2.5, 4.5]} + assert view.to_dict(index=True) == { + "__INDEX__": [[1.5], [3.5]], + "a": [1.5, 3.5], + "b": [2.5, 4.5], + } def test_to_format_explicit_index_np(self): data = [{"a": 1.5, "b": 2.5}, {"a": 3.5, "b": 4.5}] @@ -804,13 +950,27 @@ def test_to_format_explicit_index_str_records(self): data = [{"a": "a", "b": 2.5}, {"a": "b", "b": 4.5}] tbl = Table(data, index="a") view = tbl.view() - assert view.to_records(index=True) == [{"__INDEX__": ["a"], "a": "a", "b": 2.5}, {"__INDEX__": ["b"], "a": "b", "b": 4.5}] + assert view.to_records(index=True) == [ + {"__INDEX__": ["a"], "a": "a", "b": 2.5}, + {"__INDEX__": ["b"], "a": "b", "b": 4.5}, + ] def test_to_format_explicit_index_datetime_records(self): - data = [{"a": datetime(2019, 7, 11, 9, 0), "b": 2.5}, {"a": datetime(2019, 7, 11, 9, 1), "b": 4.5}] + data = [ + {"a": datetime(2019, 7, 11, 9, 0), "b": 2.5}, + {"a": datetime(2019, 7, 11, 9, 1), "b": 4.5}, + ] tbl = Table(data, index="a") view = tbl.view() assert view.to_records(index=True) == [ - {"__INDEX__": [datetime(2019, 7, 11, 9, 0)], "a": datetime(2019, 7, 11, 9, 0), "b": 2.5}, - {"__INDEX__": [datetime(2019, 7, 11, 9, 1)], "a": datetime(2019, 7, 11, 9, 1), "b": 4.5}, + { + "__INDEX__": [datetime(2019, 7, 11, 9, 0)], + "a": datetime(2019, 7, 11, 9, 0), + "b": 2.5, + }, + { + "__INDEX__": [datetime(2019, 7, 11, 9, 1)], + "a": datetime(2019, 7, 11, 9, 1), + "b": 4.5, + }, ] diff --git a/python/perspective/perspective/tests/table/test_view.py b/python/perspective/perspective/tests/table/test_view.py index bdd2fb2a84..33e661abb3 100644 --- a/python/perspective/perspective/tests/table/test_view.py +++ b/python/perspective/perspective/tests/table/test_view.py @@ -42,7 +42,11 @@ def test_view_one(self): assert view.num_rows() == 3 assert view.num_columns() == 2 assert view.schema() == {"a": int, "b": int} - assert view.to_records() == [{"__ROW_PATH__": [], "a": 4, "b": 6}, {"__ROW_PATH__": [1], "a": 1, "b": 2}, {"__ROW_PATH__": [3], "a": 3, "b": 4}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "a": 4, "b": 6}, + {"__ROW_PATH__": [1], "a": 1, "b": 2}, + {"__ROW_PATH__": [3], "a": 3, "b": 4}, + ] def test_view_two(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] @@ -64,7 +68,10 @@ def test_view_two_column_only(self): assert view.num_rows() == 2 assert view.num_columns() == 4 assert view.schema() == {"a": int, "b": int} - assert view.to_records() == [{"2|a": 1, "2|b": 2, "4|a": None, "4|b": None}, {"2|a": None, "2|b": None, "4|a": 3, "4|b": 4}] + assert view.to_records() == [ + {"2|a": 1, "2|b": 2, "4|a": None, "4|b": None}, + {"2|a": None, "2|b": None, "4|a": 3, "4|b": 4}, + ] # column path @@ -115,7 +122,15 @@ def test_view_column_path_two(self): tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"]) paths = view.column_paths() - assert paths == ["__ROW_PATH__", "1.5|a", "1.5|b", "2.5|a", "2.5|b", "3.5|a", "3.5|b"] + assert paths == [ + "__ROW_PATH__", + "1.5|a", + "1.5|b", + "2.5|a", + "2.5|b", + "3.5|a", + "3.5|b", + ] def test_view_column_path_two_column_only(self): data = {"a": [1, 2, 3], "b": [1.5, 2.5, 3.5]} @@ -178,7 +193,7 @@ def test_view_no_columns(self): tbl = Table(data) view = tbl.view(columns=[]) assert view.num_columns() == 0 - assert view.to_records() == [{}, {}] + assert view.to_records() == [] def test_view_no_columns_pivoted(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] @@ -201,7 +216,17 @@ def test_view_column_order(self): assert view.to_records() == [{"b": 2, "a": 1}, {"b": 4, "a": 3}] def test_view_dataframe_column_order(self): - table = Table(pd.DataFrame({"0.1": [5, 6, 7, 8], "-0.05": [5, 6, 7, 8], "0.0": [1, 2, 3, 4], "-0.1": [1, 2, 3, 4], "str": ["a", "b", "c", "d"]})) + table = Table( + pd.DataFrame( + { + "0.1": [5, 6, 7, 8], + "-0.05": [5, 6, 7, 8], + "0.0": [1, 2, 3, 4], + "-0.1": [1, 2, 3, 4], + "str": ["a", "b", "c", "d"], + } + ) + ) view = table.view(columns=["-0.1", "-0.05", "0.0", "0.1"], group_by=["str"]) assert view.column_paths() == ["__ROW_PATH__", "-0.1", "-0.05", "0.0", "0.1"] @@ -209,16 +234,26 @@ def test_view_aggregate_order_with_columns(self): """If `columns` is provided, order is always guaranteed.""" data = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}] tbl = Table(data) - view = tbl.view(group_by=["a"], columns=["a", "b", "c", "d"], aggregates={"d": "avg", "c": "avg", "b": "last", "a": "last"}) + view = tbl.view( + group_by=["a"], + columns=["a", "b", "c", "d"], + aggregates={"d": "avg", "c": "avg", "b": "last", "a": "last"}, + ) order = ["__ROW_PATH__", "a", "b", "c", "d"] assert view.column_paths() == order def test_view_df_aggregate_order_with_columns(self): """If `columns` is provided, order is always guaranteed.""" - data = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5], "d": [4, 5, 6]}, columns=["d", "a", "c", "b"]) + data = pd.DataFrame( + {"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5], "d": [4, 5, 6]}, + columns=["d", "a", "c", "b"], + ) tbl = Table(data) - view = tbl.view(group_by=["a"], aggregates={"d": "avg", "c": "avg", "b": "last", "a": "last"}) + view = tbl.view( + group_by=["a"], + aggregates={"d": "avg", "c": "avg", "b": "last", "a": "last"}, + ) order = ["__ROW_PATH__", "index", "d", "a", "c", "b"] assert view.column_paths() == order @@ -228,7 +263,11 @@ def test_view_aggregates_with_no_columns(self): tbl = Table(data) view = tbl.view(group_by=["a"], aggregates={"c": "avg", "a": "last"}, columns=[]) assert view.column_paths() == ["__ROW_PATH__"] - assert view.to_records() == [{"__ROW_PATH__": []}, {"__ROW_PATH__": [1]}, {"__ROW_PATH__": [3]}] + assert view.to_records() == [ + {"__ROW_PATH__": []}, + {"__ROW_PATH__": [1]}, + {"__ROW_PATH__": [3]}, + ] def test_view_aggregates_default_column_order(self): """Order of columns are entirely determined by the `columns` kwarg. If @@ -300,50 +339,98 @@ def test_view_aggregate_int(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data) view = tbl.view(aggregates={"a": "avg"}, group_by=["a"]) - assert view.to_records() == [{"__ROW_PATH__": [], "a": 2.0, "b": 6}, {"__ROW_PATH__": [1], "a": 1.0, "b": 2}, {"__ROW_PATH__": [3], "a": 3.0, "b": 4}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "a": 2.0, "b": 6}, + {"__ROW_PATH__": [1], "a": 1.0, "b": 2}, + {"__ROW_PATH__": [3], "a": 3.0, "b": 4}, + ] def test_view_aggregate_str(self): data = [{"a": "abc", "b": 2}, {"a": "def", "b": 4}] tbl = Table(data) view = tbl.view(aggregates={"a": "count"}, group_by=["a"]) - assert view.to_records() == [{"__ROW_PATH__": [], "a": 2, "b": 6}, {"__ROW_PATH__": ["abc"], "a": 1, "b": 2}, {"__ROW_PATH__": ["def"], "a": 1, "b": 4}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "a": 2, "b": 6}, + {"__ROW_PATH__": ["abc"], "a": 1, "b": 2}, + {"__ROW_PATH__": ["def"], "a": 1, "b": 4}, + ] def test_view_aggregate_datetime(self): - data = [{"a": datetime(2019, 10, 1, 11, 30)}, {"a": datetime(2019, 10, 1, 11, 30)}] + data = [ + {"a": datetime(2019, 10, 1, 11, 30)}, + {"a": datetime(2019, 10, 1, 11, 30)}, + ] tbl = Table(data) view = tbl.view(aggregates={"a": "distinct count"}, group_by=["a"]) - assert view.to_records() == [{"__ROW_PATH__": [], "a": 1}, {"__ROW_PATH__": [datetime(2019, 10, 1, 11, 30)], "a": 1}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "a": 1}, + {"__ROW_PATH__": [datetime(2019, 10, 1, 11, 30)], "a": 1}, + ] def test_view_aggregate_datetime_leading_zeroes(self): - data = [{"a": datetime(2019, 1, 1, 5, 5, 5)}, {"a": datetime(2019, 1, 1, 5, 5, 5)}] + data = [ + {"a": datetime(2019, 1, 1, 5, 5, 5)}, + {"a": datetime(2019, 1, 1, 5, 5, 5)}, + ] tbl = Table(data) view = tbl.view(aggregates={"a": "distinct count"}, group_by=["a"]) - assert view.to_records() == [{"__ROW_PATH__": [], "a": 1}, {"__ROW_PATH__": [datetime(2019, 1, 1, 5, 5, 5)], "a": 1}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "a": 1}, + {"__ROW_PATH__": [datetime(2019, 1, 1, 5, 5, 5)], "a": 1}, + ] def test_view_aggregate_mean(self): - data = [{"a": "a", "x": 1, "y": 200}, {"a": "a", "x": 2, "y": 100}, {"a": "a", "x": 3, "y": None}] + data = [ + {"a": "a", "x": 1, "y": 200}, + {"a": "a", "x": 2, "y": 100}, + {"a": "a", "x": 3, "y": None}, + ] tbl = Table(data) view = tbl.view(aggregates={"y": "mean"}, group_by=["a"], columns=["y"]) - assert view.to_records() == [{"__ROW_PATH__": [], "y": 300 / 2}, {"__ROW_PATH__": ["a"], "y": 300 / 2}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "y": 300 / 2}, + {"__ROW_PATH__": ["a"], "y": 300 / 2}, + ] def test_view_aggregate_mean_from_schema(self): - data = [{"a": "a", "x": 1, "y": 200}, {"a": "a", "x": 2, "y": 100}, {"a": "a", "x": 3, "y": None}] + data = [ + {"a": "a", "x": 1, "y": 200}, + {"a": "a", "x": 2, "y": 100}, + {"a": "a", "x": 3, "y": None}, + ] tbl = Table({"a": str, "x": int, "y": float}) view = tbl.view(aggregates={"y": "mean"}, group_by=["a"], columns=["y"]) tbl.update(data) - assert view.to_records() == [{"__ROW_PATH__": [], "y": 300 / 2}, {"__ROW_PATH__": ["a"], "y": 300 / 2}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "y": 300 / 2}, + {"__ROW_PATH__": ["a"], "y": 300 / 2}, + ] def test_view_aggregate_weighted_mean(self): - data = [{"a": "a", "x": 1, "y": 200}, {"a": "a", "x": 2, "y": 100}, {"a": "a", "x": 3, "y": None}] + data = [ + {"a": "a", "x": 1, "y": 200}, + {"a": "a", "x": 2, "y": 100}, + {"a": "a", "x": 3, "y": None}, + ] tbl = Table(data) view = tbl.view(aggregates={"y": ["weighted mean", "x"]}, group_by=["a"], columns=["y"]) - assert view.to_records() == [{"__ROW_PATH__": [], "y": (1.0 * 200 + 2 * 100) / (1.0 + 2)}, {"__ROW_PATH__": ["a"], "y": (1.0 * 200 + 2 * 100) / (1.0 + 2)}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "y": (1.0 * 200 + 2 * 100) / (1.0 + 2)}, + {"__ROW_PATH__": ["a"], "y": (1.0 * 200 + 2 * 100) / (1.0 + 2)}, + ] def test_view_aggregate_weighted_mean_with_negative_weights(self): - data = [{"a": "a", "x": 1, "y": 200}, {"a": "a", "x": -2, "y": 100}, {"a": "a", "x": 3, "y": None}] + data = [ + {"a": "a", "x": 1, "y": 200}, + {"a": "a", "x": -2, "y": 100}, + {"a": "a", "x": 3, "y": None}, + ] tbl = Table(data) view = tbl.view(aggregates={"y": ["weighted mean", "x"]}, group_by=["a"], columns=["y"]) - assert view.to_records() == [{"__ROW_PATH__": [], "y": (1 * 200 + (-2) * 100) / (1 - 2)}, {"__ROW_PATH__": ["a"], "y": (1 * 200 + (-2) * 100) / (1 - 2)}] + assert view.to_records() == [ + {"__ROW_PATH__": [], "y": (1 * 200 + (-2) * 100) / (1 - 2)}, + {"__ROW_PATH__": ["a"], "y": (1 * 200 + (-2) * 100) / (1 - 2)}, + ] def test_view_variance(self): data = {"x": list(np.random.rand(10)), "y": ["a" for _ in range(10)]} @@ -357,7 +444,21 @@ def test_view_variance(self): assert result["x"] == approx([expected, expected]) def test_view_variance_multi(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + } table = Table(data) view = table.view(aggregates={"a": "var"}, group_by=["b"]) @@ -390,14 +491,40 @@ def test_view_variance_update_none(self): assert result["a"][2] == approx(np.var([0.5, 0.8])) def test_view_variance_multi_update(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + } table = Table(data) view = table.view(aggregates={"a": "var"}, group_by=["b"]) result = view.to_columns() expected_total = data["a"] - expected_zero = [data["a"][1], data["a"][3], data["a"][5], data["a"][7], data["a"][9]] - expected_one = [data["a"][0], data["a"][2], data["a"][4], data["a"][6], data["a"][8]] + expected_zero = [ + data["a"][1], + data["a"][3], + data["a"][5], + data["a"][7], + data["a"][9], + ] + expected_one = [ + data["a"][0], + data["a"][2], + data["a"][4], + data["a"][6], + data["a"][8], + ] assert result["a"] == approx([np.var(expected_total), np.var(expected_zero), np.var(expected_one)]) @@ -415,14 +542,40 @@ def test_view_variance_multi_update(self): assert result["a"][-1] is None def test_view_variance_multi_update_delta(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + } table = Table(data) view = table.view(aggregates={"a": "var"}, group_by=["b"]) result = view.to_columns() expected_total = data["a"] - expected_zero = [data["a"][1], data["a"][3], data["a"][5], data["a"][7], data["a"][9]] - expected_one = [data["a"][0], data["a"][2], data["a"][4], data["a"][6], data["a"][8]] + expected_zero = [ + data["a"][1], + data["a"][3], + data["a"][5], + data["a"][7], + data["a"][9], + ] + expected_one = [ + data["a"][0], + data["a"][2], + data["a"][4], + data["a"][6], + data["a"][8], + ] assert result["a"] == approx([np.var(expected_total), np.var(expected_zero), np.var(expected_one)]) @@ -458,19 +611,50 @@ def cb1(port_id, delta): table.update(update_data) def test_view_variance_multi_update_indexed(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)], "c": [i for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + "c": [i for i in range(10)], + } table = Table(data, index="c") view = table.view(aggregates={"a": "var"}, group_by=["b"]) result = view.to_columns() expected_total = data["a"] - expected_zero = [data["a"][1], data["a"][3], data["a"][5], data["a"][7], data["a"][9]] - expected_one = [data["a"][0], data["a"][2], data["a"][4], data["a"][6], data["a"][8]] + expected_zero = [ + data["a"][1], + data["a"][3], + data["a"][5], + data["a"][7], + data["a"][9], + ] + expected_one = [ + data["a"][0], + data["a"][2], + data["a"][4], + data["a"][6], + data["a"][8], + ] assert result["a"] == approx([np.var(expected_total), np.var(expected_zero), np.var(expected_one)]) # "b" = 2 here should result in null var because the group size is 1 - update_data = {"a": [15.12, 9.102, 0.99, 12.8], "b": [1, 0, 1, 2], "c": [1, 5, 2, 7]} + update_data = { + "a": [15.12, 9.102, 0.99, 12.8], + "b": [1, 0, 1, 2], + "c": [1, 5, 2, 7], + } table.update(update_data) @@ -494,19 +678,50 @@ def test_view_variance_multi_update_indexed(self): assert result["a"][-1] is None def test_view_variance_multi_update_indexed_delta(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)], "c": [i for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + "c": [i for i in range(10)], + } table = Table(data, index="c") view = table.view(aggregates={"a": "var", "b": "last", "c": "last"}, group_by=["b"]) result = view.to_columns() expected_total = data["a"] - expected_zero = [data["a"][1], data["a"][3], data["a"][5], data["a"][7], data["a"][9]] - expected_one = [data["a"][0], data["a"][2], data["a"][4], data["a"][6], data["a"][8]] + expected_zero = [ + data["a"][1], + data["a"][3], + data["a"][5], + data["a"][7], + data["a"][9], + ] + expected_one = [ + data["a"][0], + data["a"][2], + data["a"][4], + data["a"][6], + data["a"][8], + ] assert result["a"] == approx([np.var(expected_total), np.var(expected_zero), np.var(expected_one)]) # 2 here should result in null var because the group size is 1 - update_data = {"a": [15.12, 9.102, 0.99, 12.8], "b": [1, 0, 1, 2], "c": [0, 4, 1, 6]} + update_data = { + "a": [15.12, 9.102, 0.99, 12.8], + "b": [1, 0, 1, 2], + "c": [0, 4, 1, 6], + } def cb1(port_id, delta): table2 = Table(delta) @@ -569,7 +784,21 @@ def test_view_standard_deviation(self): assert result["x"] == approx([expected, expected]) def test_view_standard_deviation_multi(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + } table = Table(data) view = table.view(aggregates={"a": "stddev"}, group_by=["b"]) @@ -602,14 +831,40 @@ def test_view_standard_deviation_update_none(self): assert result["a"][2] == approx(np.std([0.5, 0.8])) def test_view_standard_deviation_multi_update(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + } table = Table(data) view = table.view(aggregates={"a": "stddev"}, group_by=["b"]) result = view.to_columns() expected_total = data["a"] - expected_zero = [data["a"][1], data["a"][3], data["a"][5], data["a"][7], data["a"][9]] - expected_one = [data["a"][0], data["a"][2], data["a"][4], data["a"][6], data["a"][8]] + expected_zero = [ + data["a"][1], + data["a"][3], + data["a"][5], + data["a"][7], + data["a"][9], + ] + expected_one = [ + data["a"][0], + data["a"][2], + data["a"][4], + data["a"][6], + data["a"][8], + ] assert result["a"] == approx([np.std(expected_total), np.std(expected_zero), np.std(expected_one)]) @@ -627,14 +882,40 @@ def test_view_standard_deviation_multi_update(self): assert result["a"][-1] is None def test_view_standard_deviation_multi_update_delta(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + } table = Table(data) view = table.view(aggregates={"a": "stddev"}, group_by=["b"]) result = view.to_columns() expected_total = data["a"] - expected_zero = [data["a"][1], data["a"][3], data["a"][5], data["a"][7], data["a"][9]] - expected_one = [data["a"][0], data["a"][2], data["a"][4], data["a"][6], data["a"][8]] + expected_zero = [ + data["a"][1], + data["a"][3], + data["a"][5], + data["a"][7], + data["a"][9], + ] + expected_one = [ + data["a"][0], + data["a"][2], + data["a"][4], + data["a"][6], + data["a"][8], + ] assert result["a"] == approx([np.std(expected_total), np.std(expected_zero), np.std(expected_one)]) @@ -670,19 +951,50 @@ def cb1(port_id, delta): table.update(update_data) def test_view_standard_deviation_multi_update_indexed(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)], "c": [i for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + "c": [i for i in range(10)], + } table = Table(data, index="c") view = table.view(aggregates={"a": "stddev"}, group_by=["b"]) result = view.to_columns() expected_total = data["a"] - expected_zero = [data["a"][1], data["a"][3], data["a"][5], data["a"][7], data["a"][9]] - expected_one = [data["a"][0], data["a"][2], data["a"][4], data["a"][6], data["a"][8]] + expected_zero = [ + data["a"][1], + data["a"][3], + data["a"][5], + data["a"][7], + data["a"][9], + ] + expected_one = [ + data["a"][0], + data["a"][2], + data["a"][4], + data["a"][6], + data["a"][8], + ] assert result["a"] == approx([np.std(expected_total), np.std(expected_zero), np.std(expected_one)]) # "b" = 2 here should result in null stddev because the group size is 1 - update_data = {"a": [15.12, 9.102, 0.99, 12.8], "b": [1, 0, 1, 2], "c": [1, 5, 2, 7]} + update_data = { + "a": [15.12, 9.102, 0.99, 12.8], + "b": [1, 0, 1, 2], + "c": [1, 5, 2, 7], + } table.update(update_data) @@ -706,19 +1018,50 @@ def test_view_standard_deviation_multi_update_indexed(self): assert result["a"][-1] is None def test_view_standard_deviation_multi_update_indexed_delta(self): - data = {"a": [91.96, 258.576, 29.6, 243.16, 36.24, 25.248, 79.99, 206.1, 31.5, 55.6], "b": [1 if i % 2 == 0 else 0 for i in range(10)], "c": [i for i in range(10)]} + data = { + "a": [ + 91.96, + 258.576, + 29.6, + 243.16, + 36.24, + 25.248, + 79.99, + 206.1, + 31.5, + 55.6, + ], + "b": [1 if i % 2 == 0 else 0 for i in range(10)], + "c": [i for i in range(10)], + } table = Table(data, index="c") view = table.view(aggregates={"a": "stddev", "b": "last", "c": "last"}, group_by=["b"]) result = view.to_columns() expected_total = data["a"] - expected_zero = [data["a"][1], data["a"][3], data["a"][5], data["a"][7], data["a"][9]] - expected_one = [data["a"][0], data["a"][2], data["a"][4], data["a"][6], data["a"][8]] + expected_zero = [ + data["a"][1], + data["a"][3], + data["a"][5], + data["a"][7], + data["a"][9], + ] + expected_one = [ + data["a"][0], + data["a"][2], + data["a"][4], + data["a"][6], + data["a"][8], + ] assert result["a"] == approx([np.std(expected_total), np.std(expected_zero), np.std(expected_one)]) # 2 here should result in null stddev because the group size is 1 - update_data = {"a": [15.12, 9.102, 0.99, 12.8], "b": [1, 0, 1, 2], "c": [0, 4, 1, 6]} + update_data = { + "a": [15.12, 9.102, 0.99, 12.8], + "b": [1, 0, 1, 2], + "c": [0, 4, 1, 6], + } def cb1(port_id, delta): table2 = Table(delta) @@ -793,13 +1136,22 @@ def test_view_sort_date(self): data = [{"a": date(2019, 7, 11), "b": 2}, {"a": date(2019, 7, 12), "b": 4}] tbl = Table(data) view = tbl.view(sort=[["a", "desc"]]) - assert view.to_records() == [{"a": datetime(2019, 7, 12), "b": 4}, {"a": datetime(2019, 7, 11), "b": 2}] + assert view.to_records() == [ + {"a": datetime(2019, 7, 12), "b": 4}, + {"a": datetime(2019, 7, 11), "b": 2}, + ] def test_view_sort_datetime(self): - data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + data = [ + {"a": datetime(2019, 7, 11, 8, 15), "b": 2}, + {"a": datetime(2019, 7, 11, 8, 16), "b": 4}, + ] tbl = Table(data) view = tbl.view(sort=[["a", "desc"]]) - assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 16), "b": 4}, {"a": datetime(2019, 7, 11, 8, 15), "b": 2}] + assert view.to_records() == [ + {"a": datetime(2019, 7, 11, 8, 16), "b": 4}, + {"a": datetime(2019, 7, 11, 8, 15), "b": 2}, + ] def test_view_sort_hidden(self): data = [{"a": 1.1, "b": 2}, {"a": 1.2, "b": 4}] @@ -808,7 +1160,11 @@ def test_view_sort_hidden(self): assert view.to_records() == [{"b": 4}, {"b": 2}] def test_view_sort_avg_nan(self): - data = {"w": [3.5, 4.5, None, None, None, None, 1.5, 2.5], "x": [1, 2, 3, 4, 4, 3, 2, 1], "y": ["a", "b", "c", "d", "e", "f", "g", "h"]} + data = { + "w": [3.5, 4.5, None, None, None, None, 1.5, 2.5], + "x": [1, 2, 3, 4, 4, 3, 2, 1], + "y": ["a", "b", "c", "d", "e", "f", "g", "h"], + } tbl = Table(data) view = tbl.view( columns=["x", "w"], @@ -817,13 +1173,27 @@ def test_view_sort_avg_nan(self): aggregates={"w": "avg", "x": "unique"}, ) assert view.to_dict() == { - "__ROW_PATH__": [[], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["a"], ["b"]], + "__ROW_PATH__": [ + [], + ["c"], + ["d"], + ["e"], + ["f"], + ["g"], + ["h"], + ["a"], + ["b"], + ], "w": [3, None, None, None, None, 1.5, 2.5, 3.5, 4.5], "x": [None, 3, 4, 4, 3, 2, 1, 1, 2], } def test_view_sort_sum_nan(self): - data = {"w": [3.5, 4.5, None, None, None, None, 1.5, 2.5], "x": [1, 2, 3, 4, 4, 3, 2, 1], "y": ["a", "b", "c", "d", "e", "f", "g", "h"]} + data = { + "w": [3.5, 4.5, None, None, None, None, 1.5, 2.5], + "x": [1, 2, 3, 4, 4, 3, 2, 1], + "y": ["a", "b", "c", "d", "e", "f", "g", "h"], + } tbl = Table(data) view = tbl.view( columns=["x", "w"], @@ -831,10 +1201,28 @@ def test_view_sort_sum_nan(self): sort=[["w", "asc"]], aggregates={"w": "sum", "x": "unique"}, ) - assert view.to_dict() == {"__ROW_PATH__": [[], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["a"], ["b"]], "w": [12, 0, 0, 0, 0, 1.5, 2.5, 3.5, 4.5], "x": [None, 3, 4, 4, 3, 2, 1, 1, 2]} + assert view.to_dict() == { + "__ROW_PATH__": [ + [], + ["c"], + ["d"], + ["e"], + ["f"], + ["g"], + ["h"], + ["a"], + ["b"], + ], + "w": [12, 0, 0, 0, 0, 1.5, 2.5, 3.5, 4.5], + "x": [None, 3, 4, 4, 3, 2, 1, 1, 2], + } def test_view_sort_unique_nan(self): - data = {"w": [3.5, 4.5, None, None, None, None, 1.5, 2.5], "x": [1, 2, 3, 4, 4, 3, 2, 1], "y": ["a", "b", "c", "d", "e", "f", "g", "h"]} + data = { + "w": [3.5, 4.5, None, None, None, None, 1.5, 2.5], + "x": [1, 2, 3, 4, 4, 3, 2, 1], + "y": ["a", "b", "c", "d", "e", "f", "g", "h"], + } tbl = Table(data) view = tbl.view( columns=["x", "w"], @@ -843,7 +1231,17 @@ def test_view_sort_unique_nan(self): aggregates={"w": "unique", "x": "unique"}, ) assert view.to_dict() == { - "__ROW_PATH__": [[], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["a"], ["b"]], + "__ROW_PATH__": [ + [], + ["c"], + ["d"], + ["e"], + ["f"], + ["g"], + ["h"], + ["a"], + ["b"], + ], "w": [None, None, None, None, None, 1.5, 2.5, 3.5, 4.5], "x": [None, 3, 4, 4, 3, 2, 1, 1, 2], } @@ -947,37 +1345,55 @@ def test_view_filter_date_str_neq(self): assert view.to_records() == [{"a": datetime(2019, 7, 11), "b": 2}] def test_view_filter_datetime_eq(self): - data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + data = [ + {"a": datetime(2019, 7, 11, 8, 15), "b": 2}, + {"a": datetime(2019, 7, 11, 8, 16), "b": 4}, + ] tbl = Table(data) view = tbl.view(filter=[["a", "==", datetime(2019, 7, 11, 8, 15)]]) assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}] def test_view_filter_datetime_neq(self): - data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + data = [ + {"a": datetime(2019, 7, 11, 8, 15), "b": 2}, + {"a": datetime(2019, 7, 11, 8, 16), "b": 4}, + ] tbl = Table(data) view = tbl.view(filter=[["a", "!=", datetime(2019, 7, 11, 8, 15)]]) assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 16), "b": 4}] def test_view_filter_datetime_np_eq(self): - data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + data = [ + {"a": datetime(2019, 7, 11, 8, 15), "b": 2}, + {"a": datetime(2019, 7, 11, 8, 16), "b": 4}, + ] tbl = Table(data) view = tbl.view(filter=[["a", "==", np.datetime64(datetime(2019, 7, 11, 8, 15))]]) assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}] def test_view_filter_datetime_np_neq(self): - data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + data = [ + {"a": datetime(2019, 7, 11, 8, 15), "b": 2}, + {"a": datetime(2019, 7, 11, 8, 16), "b": 4}, + ] tbl = Table(data) view = tbl.view(filter=[["a", "!=", np.datetime64(datetime(2019, 7, 11, 8, 15))]]) assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 16), "b": 4}] def test_view_filter_datetime_str_eq(self): - data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + data = [ + {"a": datetime(2019, 7, 11, 8, 15), "b": 2}, + {"a": datetime(2019, 7, 11, 8, 16), "b": 4}, + ] tbl = Table(data) view = tbl.view(filter=[["a", "==", "2019/7/11 8:15"]]) assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}] def test_view_filter_datetime_str_neq(self): - data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + data = [ + {"a": datetime(2019, 7, 11, 8, 15), "b": 2}, + {"a": datetime(2019, 7, 11, 8, 16), "b": 4}, + ] tbl = Table(data) view = tbl.view(filter=[["a", "!=", "2019/7/11 8:15"]]) assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 16), "b": 4}] @@ -1223,7 +1639,11 @@ def cb1(port_id, delta): tbl = Table(data) view = tbl.view(group_by=["a"]) - assert view.to_dict() == {"__ROW_PATH__": [[], [1], [3]], "a": [4, 1, 3], "b": [6, 2, 4]} + assert view.to_dict() == { + "__ROW_PATH__": [[], [1], [3]], + "a": [4, 1, 3], + "b": [6, 2, 4], + } view.on_update(cb1, mode="row") tbl.update(update_data) @@ -1315,7 +1735,17 @@ def test_view_row_delta_two(self, util): update_data = {"a": [5], "b": [6]} def cb1(port_id, delta): - compare_delta(delta, {"2|a": [1, None], "2|b": [2, None], "4|a": [3, None], "4|b": [4, None], "6|a": [5, 5], "6|b": [6, 6]}) + compare_delta( + delta, + { + "2|a": [1, None], + "2|b": [2, None], + "4|a": [3, None], + "4|b": [4, None], + "6|a": [5, 5], + "6|b": [6, 6], + }, + ) tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"]) @@ -1333,7 +1763,15 @@ def test_view_row_delta_two_from_schema(self, util): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] def cb1(port_id, delta): - compare_delta(delta, {"2|a": [1, 1, None], "2|b": [2, 2, None], "4|a": [3, None, 3], "4|b": [4, None, 4]}) + compare_delta( + delta, + { + "2|a": [1, 1, None], + "2|b": [2, 2, None], + "4|a": [3, None, 3], + "4|b": [4, None, 4], + }, + ) tbl = Table({"a": int, "b": int}) view = tbl.view(group_by=["a"], split_by=["b"]) @@ -1344,7 +1782,15 @@ def test_view_row_delta_two_from_schema_indexed(self, util): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 3, "b": 5}] def cb1(port_id, delta): - compare_delta(delta, {"2|a": [1, 1, None], "2|b": [2, 2, None], "5|a": [3, None, 3], "5|b": [5, None, 5]}) + compare_delta( + delta, + { + "2|a": [1, 1, None], + "2|b": [2, 2, None], + "5|a": [3, None, 3], + "5|b": [5, None, 5], + }, + ) tbl = Table({"a": int, "b": int}, index="a") view = tbl.view(group_by=["a"], split_by=["b"]) @@ -1356,7 +1802,17 @@ def test_view_row_delta_two_column_only(self, util): update_data = {"a": [5], "b": [6]} def cb1(port_id, delta): - compare_delta(delta, {"2|a": [1, None], "2|b": [2, None], "4|a": [3, None], "4|b": [4, None], "6|a": [5, 5], "6|b": [6, 6]}) + compare_delta( + delta, + { + "2|a": [1, None], + "2|b": [2, None], + "4|a": [3, None], + "4|b": [4, None], + "6|a": [5, 5], + "6|b": [6, 6], + }, + ) tbl = Table(data) view = tbl.view(split_by=["b"]) @@ -1374,7 +1830,17 @@ def test_view_row_delta_two_column_only_indexed(self, util): update_data = {"a": [5], "b": [6]} def cb1(port_id, delta): - compare_delta(delta, {"2|a": [1, None], "2|b": [2, None], "5|a": [3, None], "5|b": [5, None], "6|a": [5, 5], "6|b": [6, 6]}) + compare_delta( + delta, + { + "2|a": [1, None], + "2|b": [2, None], + "5|a": [3, None], + "5|b": [5, None], + "6|a": [5, 5], + "6|b": [6, 6], + }, + ) tbl = Table(data, index="a") view = tbl.view(split_by=["b"]) @@ -1391,7 +1857,15 @@ def test_view_row_delta_two_column_only_from_schema(self, util): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] def cb1(port_id, delta): - compare_delta(delta, {"2|a": [1, 1, None], "2|b": [2, 2, None], "4|a": [3, None, 3], "4|b": [4, None, 4]}) + compare_delta( + delta, + { + "2|a": [1, 1, None], + "2|b": [2, 2, None], + "4|a": [3, None, 3], + "4|b": [4, None, 4], + }, + ) tbl = Table({"a": int, "b": int}) view = tbl.view(split_by=["b"]) @@ -1402,7 +1876,15 @@ def test_view_row_delta_two_column_only_from_schema_indexed(self, util): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 3, "b": 5}] def cb1(port_id, delta): - compare_delta(delta, {"2|a": [1, 1, None], "2|b": [2, 2, None], "5|a": [3, None, 3], "5|b": [5, None, 5]}) + compare_delta( + delta, + { + "2|a": [1, 1, None], + "2|b": [2, 2, None], + "5|a": [3, None, 3], + "5|b": [5, None, 5], + }, + ) tbl = Table({"a": int, "b": int}, index="a") view = tbl.view(split_by=["b"]) @@ -1602,7 +2084,13 @@ def test_should_throw_on_first_invalid(self): data = [{"a": 1, "b": 2, "c": "a"}, {"a": 3, "b": 4, "c": "b"}] tbl = Table(data) with raises(PerspectiveCppError) as ex: - tbl.view(group_by=["a"], split_by=["c"], filter=[["a", ">", 1]], aggregates={"a": "avg"}, sort=[["x", "desc"]]) + tbl.view( + group_by=["a"], + split_by=["c"], + filter=[["a", ">", 1]], + aggregates={"a": "avg"}, + sort=[["x", "desc"]], + ) assert str(ex.value) == "Invalid column 'x' found in View sorts.\n" def test_invalid_columns_not_in_expression_should_throw(self): @@ -1622,6 +2110,14 @@ def test_should_not_throw_valid_expression(self): def test_should_not_throw_valid_expression_config(self): data = [{"a": 1, "b": 2, "c": "a"}, {"a": 3, "b": 4, "c": "b"}] tbl = Table(data) - view = tbl.view(aggregates={"abc": "dominant"}, columns=["abc"], sort=[["abc", "desc"]], filter=[["abc", "==", "A"]], group_by=["abc"], split_by=["abc"], expressions=["// abc \n 'hello!'"]) + view = tbl.view( + aggregates={"abc": "dominant"}, + columns=["abc"], + sort=[["abc", "desc"]], + filter=[["abc", "==", "A"]], + group_by=["abc"], + split_by=["abc"], + expressions=["// abc \n 'hello!'"], + ) assert view.schema() == {"abc": str} diff --git a/python/perspective/perspective/tests/table/test_view_expression.py b/python/perspective/perspective/tests/table/test_view_expression.py index bdeb0deb15..ab6a881493 100644 --- a/python/perspective/perspective/tests/table/test_view_expression.py +++ b/python/perspective/perspective/tests/table/test_view_expression.py @@ -637,7 +637,7 @@ def test_view_expression_create_clear(self): } table.clear() assert view.schema() == {"a": int, "b": int, "computed": float} - assert view.to_columns() == {} + assert view.to_columns() == {"a": [], "b": [], "computed": []} def test_view_expression_create_replace(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) @@ -738,9 +738,9 @@ def test_view_expression_multiple_views_should_all_clear(self): assert view2.schema() == {"a": int, "b": int, "computed2": float} - assert view.to_columns() == {} + assert view.to_columns() == {"a": [], "b": [], "computed": []} - assert view2.to_columns() == {} + assert view2.to_columns() == {"a": [], "b": [], "computed2": []} def test_view_expression_multiple_views_should_all_replace(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) diff --git a/python/perspective/setup.cfg b/python/perspective/setup.cfg index 22b5bc6184..72d2e2e435 100644 --- a/python/perspective/setup.cfg +++ b/python/perspective/setup.cfg @@ -10,4 +10,5 @@ ignore=E203, W503 max-line-length=200 per-file-ignores = __init__.py: F401, F403 - libpsp.py: F401, F403 + perspective/libpsp.py: F401, F403 + perspective/tests/*: F401, E712, F811, F841 \ No newline at end of file diff --git a/tools/perspective-bench/src/js/worker.js b/tools/perspective-bench/src/js/worker.js index ab7fd96a6e..b7e37562f4 100644 --- a/tools/perspective-bench/src/js/worker.js +++ b/tools/perspective-bench/src/js/worker.js @@ -244,9 +244,9 @@ async function table_suite() { } async function bench_all() { - await to_data_suite(); - await view_suite(); await table_suite(); + await view_suite(); + await to_data_suite(); process.send({ finished: true }); } diff --git a/tools/perspective-test/results.tar.gz b/tools/perspective-test/results.tar.gz index 8c2881bd0f..6fed608a57 100644 Binary files a/tools/perspective-test/results.tar.gz and b/tools/perspective-test/results.tar.gz differ diff --git a/yarn.lock b/yarn.lock index 6741f4265f..a1bd3fec27 100644 --- a/yarn.lock +++ b/yarn.lock @@ -13866,10 +13866,10 @@ regjsparser@^0.9.1: dependencies: jsesc "~0.5.0" -regular-table@=0.5.7: - version "0.5.7" - resolved "https://registry.yarnpkg.com/regular-table/-/regular-table-0.5.7.tgz#424f5dfa6e1fb95d1fc8e0b3ee299cea6289d540" - integrity sha512-BcneWdPuabfdVxqoEKe6U7qgtXpbNxQnL/pGdMbU4G4ldYifEQm+ihyBiUz4JwVqQZtgY8TU+8xzpgWNg8bgiw== +regular-table@=0.5.9: + version "0.5.9" + resolved "https://registry.yarnpkg.com/regular-table/-/regular-table-0.5.9.tgz#a5bfeeb67e3bcc9ab4e9f11cd9c3a40777267aa9" + integrity sha512-Ck5HYNS7lzsxsDWDBYzrCpwM1wsp5fMY42Ks730Otwq2U+XAlARJMg2tRexy+V8bLy9wiq+SH8EMC/yKqccLCA== relateurl@^0.2.7: version "0.2.7"