From a043903affd29d3885ff39b1ca3be1dc8eb5c238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Dock=C3=A8s?= Date: Mon, 16 Sep 2024 17:58:22 +0200 Subject: [PATCH] Add sorting of the summary statistics table (#1068) * add sorting of the summary statistics table * _ * buttons display * _ * _ * _ * _ * changelog + formatting * remove shadow below buttons * add tests * better icons --- CHANGES.rst | 4 +- .../_data/templates/icons/arrow-down.svg | 3 + .../_data/templates/icons/arrow-up.svg | 3 + .../templates/icons/sort-alpha-down-alt.svg | 5 + .../_data/templates/icons/sort-alpha-down.svg | 4 + .../templates/icons/sort-numeric-down-alt.svg | 4 + .../templates/icons/sort-numeric-down.svg | 5 + skrub/_reporting/_data/templates/report.css | 1 + skrub/_reporting/_data/templates/report.js | 66 ++++++++++++ .../_data/templates/summary-statistics.css | 58 ++++++++++ .../_data/templates/summary-statistics.html | 101 +++++++++++++----- .../cypress/e2e/summary-statistics.cy.js | 29 +++++ 12 files changed, 252 insertions(+), 31 deletions(-) create mode 100644 skrub/_reporting/_data/templates/icons/arrow-down.svg create mode 100644 skrub/_reporting/_data/templates/icons/arrow-up.svg create mode 100644 skrub/_reporting/_data/templates/icons/sort-alpha-down-alt.svg create mode 100644 skrub/_reporting/_data/templates/icons/sort-alpha-down.svg create mode 100644 skrub/_reporting/_data/templates/icons/sort-numeric-down-alt.svg create mode 100644 skrub/_reporting/_data/templates/icons/sort-numeric-down.svg create mode 100644 skrub/_reporting/_data/templates/summary-statistics.css create mode 100644 skrub/_reporting/js_tests/cypress/e2e/summary-statistics.cy.js diff --git a/CHANGES.rst b/CHANGES.rst index 09833b014..357751736 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -34,8 +34,8 @@ Minor changes * A "stats" panel has been added to the TableReport, showing summary statistics for all columns (number of missing values, mean, etc. -- similar to - ``pandas.info()`` ) in a table. - :pr:`1056` by :user:`Jérôme Dockès `. + ``pandas.info()`` ) in a table. It can be sorted by each column. + :pr:`1056` and :pr:`1068` by :user:`Jérôme Dockès `. * Added zero padding for column names in :class:`MinHashEncoder` to improve column ordering consistency. :pr:`1069` by :user:`Shreekant Nandiyawar `. diff --git a/skrub/_reporting/_data/templates/icons/arrow-down.svg b/skrub/_reporting/_data/templates/icons/arrow-down.svg new file mode 100644 index 000000000..afba641b5 --- /dev/null +++ b/skrub/_reporting/_data/templates/icons/arrow-down.svg @@ -0,0 +1,3 @@ + + + diff --git a/skrub/_reporting/_data/templates/icons/arrow-up.svg b/skrub/_reporting/_data/templates/icons/arrow-up.svg new file mode 100644 index 000000000..91c4ccc9c --- /dev/null +++ b/skrub/_reporting/_data/templates/icons/arrow-up.svg @@ -0,0 +1,3 @@ + + + diff --git a/skrub/_reporting/_data/templates/icons/sort-alpha-down-alt.svg b/skrub/_reporting/_data/templates/icons/sort-alpha-down-alt.svg new file mode 100644 index 000000000..85e53eb49 --- /dev/null +++ b/skrub/_reporting/_data/templates/icons/sort-alpha-down-alt.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/skrub/_reporting/_data/templates/icons/sort-alpha-down.svg b/skrub/_reporting/_data/templates/icons/sort-alpha-down.svg new file mode 100644 index 000000000..9fbb5fd87 --- /dev/null +++ b/skrub/_reporting/_data/templates/icons/sort-alpha-down.svg @@ -0,0 +1,4 @@ + + + + diff --git a/skrub/_reporting/_data/templates/icons/sort-numeric-down-alt.svg b/skrub/_reporting/_data/templates/icons/sort-numeric-down-alt.svg new file mode 100644 index 000000000..feb44528f --- /dev/null +++ b/skrub/_reporting/_data/templates/icons/sort-numeric-down-alt.svg @@ -0,0 +1,4 @@ + + + + diff --git a/skrub/_reporting/_data/templates/icons/sort-numeric-down.svg b/skrub/_reporting/_data/templates/icons/sort-numeric-down.svg new file mode 100644 index 000000000..9aecf482f --- /dev/null +++ b/skrub/_reporting/_data/templates/icons/sort-numeric-down.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/skrub/_reporting/_data/templates/report.css b/skrub/_reporting/_data/templates/report.css index c2a37690a..cd6f1ffe2 100644 --- a/skrub/_reporting/_data/templates/report.css +++ b/skrub/_reporting/_data/templates/report.css @@ -5,6 +5,7 @@ {% include "copybutton.css" %} {% include "column-summaries.css" %} {% include "dataframe-sample.css" %} +{% include "summary-statistics.css" %} {% include "tabs.css" %} {% include "toggletip.css" %} {% include "tooltip.css" %} diff --git a/skrub/_reporting/_data/templates/report.js b/skrub/_reporting/_data/templates/report.js index 0b159d615..cbcf80794 100644 --- a/skrub/_reporting/_data/templates/report.js +++ b/skrub/_reporting/_data/templates/report.js @@ -558,6 +558,72 @@ if (customElements.get('skrub-table-report') === undefined) { } SkrubTableReport.register(TabList); + class sortableTable extends Manager { + constructor(elem, exchange) { + super(elem, exchange); + this.elem.querySelectorAll("button[data-role='sort-button']").forEach( + b => b.addEventListener("click", e => this.sort(e))); + } + + getVal(row, tableColIdx) { + const td = row.querySelectorAll("td")[tableColIdx]; + if (!td.hasAttribute("data-value")) { + return td.textContent; + } + let value = td.dataset.value; + if (td.hasAttribute("data-numeric")) { + value = Number(value); + } + return value; + } + + compare(rowA, rowB, tableColIdx, ascending) { + let valA = this.getVal(rowA, tableColIdx); + let valB = this.getVal(rowB, tableColIdx); + // NaNs go at the bottom regardless of sorting order + if(typeof(valA) === "number" && typeof(valB) === "number"){ + if(isNaN(valA) && !isNaN(valB)){ + return 1; + } + if(isNaN(valB) && !isNaN(valA)){ + return -1; + } + } + // When the values are equal, keep the original dataframe column + // order + if (!(valA > valB || valB > valA)) { + valA = Number(rowA.dataset.dataframeColumnIdx); + valB = Number(rowB.dataset.dataframeColumnIdx); + return valA - valB; + } + // Sort + if (!ascending) { + [valA, valB] = [valB, valA]; + } + return valA > valB ? 1 : -1; + } + + sort(event) { + const colHeaders = Array.from(this.elem.querySelectorAll("thead tr th")); + const tableColIdx = colHeaders.indexOf(event.target.closest("th")); + const body = this.elem.querySelector("tbody"); + const rows = Array.from(body.querySelectorAll("tr")); + const ascending = event.target.dataset.direction === "ascending"; + + rows.sort((a, b) => this.compare(a, b, tableColIdx, ascending)); + + this.elem.querySelectorAll("button").forEach(b => b.removeAttribute("data-is-active")); + event.target.dataset.isActive = ""; + + body.innerHTML = ""; + for (let r of rows) { + body.appendChild(r); + } + } + + } + SkrubTableReport.register(sortableTable); + class SelectedColumnsDisplay extends Manager { constructor(elem, exchange) { diff --git a/skrub/_reporting/_data/templates/summary-statistics.css b/skrub/_reporting/_data/templates/summary-statistics.css new file mode 100644 index 000000000..47446b04a --- /dev/null +++ b/skrub/_reporting/_data/templates/summary-statistics.css @@ -0,0 +1,58 @@ +.summary-stats-table { + margin: 2px; +} + + +th.sort-button-group-wrapper { + --btn-width: 2rem; + --btn-group-width: calc(var(--btn-width) * 2); + position: relative; + padding-top: var(--micro); + padding-bottom: var(--micro); + padding-right: calc(var(--tiny) + var(--btn-group-width)); +} + +.sort-button-group { + position: absolute; + top: -1px; + bottom: 0; + right: calc(-1 * var(--btn-group-width)); + left: 100%; + transform: translateX(calc(-1 * var(--btn-group-width) + 1px)); + display: flex; + gap: 0px; + padding: 0px; +} + +.sort-button { + margin: 0; + box-sizing: border-box; + height: 100%; + flex-grow: 1; + border-radius: 0; + border: 1px solid #aaa; + background: #e0e0e0; + color: #222; + padding: var(--micro); +} + +.sort-button-group > .sort-button:focus-visible { + z-index: 2; +} + +.sort-button-group > .sort-button ~ .sort-button { + margin-left: -1px; +} + +.sort-button:hover { + background: #eeeeee; +} + +.sort-button:active { + background: #cccccc; +} + +.sort-button[data-is-active]{ + background: var(--lightgreen); + color: black; +} diff --git a/skrub/_reporting/_data/templates/summary-statistics.html b/skrub/_reporting/_data/templates/summary-statistics.html index 327447edb..7777b29f8 100644 --- a/skrub/_reporting/_data/templates/summary-statistics.html +++ b/skrub/_reporting/_data/templates/summary-statistics.html @@ -1,57 +1,100 @@ +{% macro th(name, ascending, descending, is_numeric) %} + + {% if name %} + {{ name }} + {% endif %} +
+ + +
+ +{% endmacro %} + +{% macro th1(name, low, high, is_numeric) %} +{{ th(name, "from columns with " + low + " to columns with " + high, "from columns with " + high + " to columns with " + low, is_numeric) }} +{% endmacro %} +
+ data-hide-on="EMPTY_COLUMN_FILTER_SELECTED">
- +
- - - - - - - - - + {{ th("Column", "from first column to last column", "from last column to first column", True) }} + {{ th("Column name", "by column name from A to Z", "by column name from Z to A", False) }} + {{ th("dtype", "by dtype from A to Z", "by dtype from Z to A", False) }} + {{ th1("Null values", "the fewest null values", "the most null values", True) }} + {{ th1("Unique values", "the fewest unique values", "the most unique values", True) }} + {{ th1("Mean", "the lowest mean", "the highest mean", True) }} + {{ th1("Std", "the lowest standard deviation", "the highest standard deviation", True) }} + {{ th1("Min", "the lowest minimum value", "the highest minimum value", True) }} + {{ th1("Median", "the lowest median", "the highest median", True) }} + {{ th1("Max", "the lowest maximum value", "the highest maximum value", True) }} {% for column in summary.columns %} - + + - {% if column.n_unique %} - {% else %} - + {% endif %} {% if "mean" in column %} - - + + {% else %} - - + + {% endif %} {% if column.quantiles %} - - - + + + {% elif "min" in column %} - - - + + + {% else %} - - - + + + {% endif %} diff --git a/skrub/_reporting/js_tests/cypress/e2e/summary-statistics.cy.js b/skrub/_reporting/js_tests/cypress/e2e/summary-statistics.cy.js new file mode 100644 index 000000000..4e1586c19 --- /dev/null +++ b/skrub/_reporting/js_tests/cypress/e2e/summary-statistics.cy.js @@ -0,0 +1,29 @@ +describe('test sorting the summary stats columns', () => { + it('sorts the table when clicking arrows', () => { + cy.get('@report').find('[data-test="summary-statistics-tab"]') + .click(); + cy.get('@report').find('.summary-stats-table').as('table'); + cy.get('@table').find('tbody tr').first().should('have.attr', + 'data-column-name', 'gender'); + cy.get('@report').contains('Column name').as('colName'); + cy.get('@colName').parent().find('button').first().as('colNameButton').click(); + cy.get('@colNameButton').should('have.attr', 'data-is-active'); + cy.get('@table').find('tbody tr').first().should('have.attr', + 'data-column-name', 'assignment_category'); + cy.get('@report').find('th').contains('Unique values').as( + 'unique'); + cy.get('@unique').parent().find('button').first().as('uniqueButton').click(); + cy.get('@uniqueButton').should('have.attr', 'data-is-active'); + cy.get('@colNameButton').should('not.have.attr', 'data-is-active'); + cy.get('@table').find('tbody tr').first().should('have.attr', + 'data-column-name', 'gender'); + cy.get('@table').find('tbody tr').last().should('have.attr', + 'data-column-name', 'year_first_hired'); + cy.get('@unique').parent().find('button').first().next() + .click(); + cy.get('@table').find('tbody tr').first().should('have.attr', + 'data-column-name', 'date_first_hired'); + cy.get('@table').find('tbody tr').last().should('have.attr', + 'data-column-name', 'year_first_hired'); + }); +});
Column namedtypeNull valuesUnique valuesMeanStdMinMedianMax
{{ loop.index0 }} + {{ column.name }} {{ column.dtype }} + {{ column.null_count }} ({{ column.null_proportion | format_percent }}) {{ column.n_unique }} ({{ column.unique_proportion | format_percent }}) + + {{ column.n_unique }} ({{ column.unique_proportion | format_percent }}) {{ column["mean"] | format_number }}{{ column["standard_deviation"] | format_number }} + {{ column["mean"] | format_number }} + {{ column["standard_deviation"] | format_number }}{{ column.quantiles[0.0] | format_number }}{{ column.quantiles[0.5] | format_number }}{{ column.quantiles[1.0] | format_number }} + {{ column.quantiles[0.0] | format_number }} + {{ column.quantiles[0.5] | format_number }} + {{ column.quantiles[1.0] | format_number }}{{ column.min | format_number }}{{ column.max | format_number }} + {{ column.min | format_number }} + {{ column.max | format_number }}