Skip to content

Commit

Permalink
Add sorting of the summary statistics table (#1068)
Browse files Browse the repository at this point in the history
* add sorting of the summary statistics table

* _

* buttons display

* _

* _

* _

* _

* changelog + formatting

* remove shadow below buttons

* add tests

* better icons
  • Loading branch information
jeromedockes authored Sep 16, 2024
1 parent 37c8c70 commit a043903
Show file tree
Hide file tree
Showing 12 changed files with 252 additions and 31 deletions.
4 changes: 2 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ Minor changes

* A "stats" panel has been added to the TableReport, showing summary statistics
for all columns (number of missing values, mean, etc. -- similar to
``pandas.info()`` ) in a table.
:pr:`1056` by :user:`Jérôme Dockès <jeromedockes>`.
``pandas.info()`` ) in a table. It can be sorted by each column.
:pr:`1056` and :pr:`1068` by :user:`Jérôme Dockès <jeromedockes>`.

* Added zero padding for column names in :class:`MinHashEncoder` to improve column ordering consistency.
:pr:`1069` by :user:`Shreekant Nandiyawar <Shree7676>`.
Expand Down
3 changes: 3 additions & 0 deletions skrub/_reporting/_data/templates/icons/arrow-down.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions skrub/_reporting/_data/templates/icons/arrow-up.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions skrub/_reporting/_data/templates/icons/sort-alpha-down.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions skrub/_reporting/_data/templates/icons/sort-numeric-down.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions skrub/_reporting/_data/templates/report.css
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
{% include "copybutton.css" %}
{% include "column-summaries.css" %}
{% include "dataframe-sample.css" %}
{% include "summary-statistics.css" %}
{% include "tabs.css" %}
{% include "toggletip.css" %}
{% include "tooltip.css" %}
Expand Down
66 changes: 66 additions & 0 deletions skrub/_reporting/_data/templates/report.js
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,72 @@ if (customElements.get('skrub-table-report') === undefined) {
}
SkrubTableReport.register(TabList);

class sortableTable extends Manager {
constructor(elem, exchange) {
super(elem, exchange);
this.elem.querySelectorAll("button[data-role='sort-button']").forEach(
b => b.addEventListener("click", e => this.sort(e)));
}

getVal(row, tableColIdx) {
const td = row.querySelectorAll("td")[tableColIdx];
if (!td.hasAttribute("data-value")) {
return td.textContent;
}
let value = td.dataset.value;
if (td.hasAttribute("data-numeric")) {
value = Number(value);
}
return value;
}

compare(rowA, rowB, tableColIdx, ascending) {
let valA = this.getVal(rowA, tableColIdx);
let valB = this.getVal(rowB, tableColIdx);
// NaNs go at the bottom regardless of sorting order
if(typeof(valA) === "number" && typeof(valB) === "number"){
if(isNaN(valA) && !isNaN(valB)){
return 1;
}
if(isNaN(valB) && !isNaN(valA)){
return -1;
}
}
// When the values are equal, keep the original dataframe column
// order
if (!(valA > valB || valB > valA)) {
valA = Number(rowA.dataset.dataframeColumnIdx);
valB = Number(rowB.dataset.dataframeColumnIdx);
return valA - valB;
}
// Sort
if (!ascending) {
[valA, valB] = [valB, valA];
}
return valA > valB ? 1 : -1;
}

sort(event) {
const colHeaders = Array.from(this.elem.querySelectorAll("thead tr th"));
const tableColIdx = colHeaders.indexOf(event.target.closest("th"));
const body = this.elem.querySelector("tbody");
const rows = Array.from(body.querySelectorAll("tr"));
const ascending = event.target.dataset.direction === "ascending";

rows.sort((a, b) => this.compare(a, b, tableColIdx, ascending));

this.elem.querySelectorAll("button").forEach(b => b.removeAttribute("data-is-active"));
event.target.dataset.isActive = "";

body.innerHTML = "";
for (let r of rows) {
body.appendChild(r);
}
}

}
SkrubTableReport.register(sortableTable);

class SelectedColumnsDisplay extends Manager {

constructor(elem, exchange) {
Expand Down
58 changes: 58 additions & 0 deletions skrub/_reporting/_data/templates/summary-statistics.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
.summary-stats-table {
margin: 2px;
}


th.sort-button-group-wrapper {
--btn-width: 2rem;
--btn-group-width: calc(var(--btn-width) * 2);
position: relative;
padding-top: var(--micro);
padding-bottom: var(--micro);
padding-right: calc(var(--tiny) + var(--btn-group-width));
}

.sort-button-group {
position: absolute;
top: -1px;
bottom: 0;
right: calc(-1 * var(--btn-group-width));
left: 100%;
transform: translateX(calc(-1 * var(--btn-group-width) + 1px));
display: flex;
gap: 0px;
padding: 0px;
}

.sort-button {
margin: 0;
box-sizing: border-box;
height: 100%;
flex-grow: 1;
border-radius: 0;
border: 1px solid #aaa;
background: #e0e0e0;
color: #222;
padding: var(--micro);
}

.sort-button-group > .sort-button:focus-visible {
z-index: 2;
}

.sort-button-group > .sort-button ~ .sort-button {
margin-left: -1px;
}

.sort-button:hover {
background: #eeeeee;
}

.sort-button:active {
background: #cccccc;
}

.sort-button[data-is-active]{
background: var(--lightgreen);
color: black;
}
101 changes: 72 additions & 29 deletions skrub/_reporting/_data/templates/summary-statistics.html
Original file line number Diff line number Diff line change
@@ -1,57 +1,100 @@
{% macro th(name, ascending, descending, is_numeric) %}
<th class="sort-button-group-wrapper" scope="col">
{% if name %}
<span class="margin-r-t">{{ name }}</span>
{% endif %}
<div class="sort-button-group">
<button class="sort-button" type="button" data-role="sort-button"
data-direction="ascending" title="Sort {{ ascending }}">
{% if is_numeric %}
{% include "icons/sort-numeric-down.svg" %}
{% else %}
{% include "icons/sort-alpha-down.svg" %}
{% endif %}
</button>
<button class="sort-button" type="button" data-role="sort-button"
data-direction="descending" title="Sort {{ descending }}">
{% if is_numeric %}
{% include "icons/sort-numeric-down-alt.svg" %}
{% else %}
{% include "icons/sort-alpha-down-alt.svg" %}
{% endif %}
</button>
</div>
</th>
{% endmacro %}

{% macro th1(name, low, high, is_numeric) %}
{{ th(name, "from columns with " + low + " to columns with " + high, "from columns with " + high + " to columns with " + low, is_numeric) }}
{% endmacro %}

<article class="wrapper" data-show-on="NON_EMPTY_COLUMN_FILTER_SELECTED"
data-hide-on="EMPTY_COLUMN_FILTER_SELECTED">
data-hide-on="EMPTY_COLUMN_FILTER_SELECTED">
<div class="horizontal-scroll">
<table class="pure-table pure-table-striped">
<table class="pure-table pure-table-bordered summary-stats-table"
data-manager="sortableTable">
<thead>
<tr>
<th scope="col">Column name</th>
<th scope="col">dtype</th>
<th scope="col">Null values</th>
<th scope="col">Unique values</th>
<th scope="col">Mean</th>
<th scope="col">Std</th>
<th scope="col">Min</th>
<th scope="col">Median</th>
<th scope="col">Max</th>
{{ th("Column", "from first column to last column", "from last column to first column", True) }}
{{ th("Column name", "by column name from A to Z", "by column name from Z to A", False) }}
{{ th("dtype", "by dtype from A to Z", "by dtype from Z to A", False) }}
{{ th1("Null values", "the fewest null values", "the most null values", True) }}
{{ th1("Unique values", "the fewest unique values", "the most unique values", True) }}
{{ th1("Mean", "the lowest mean", "the highest mean", True) }}
{{ th1("Std", "the lowest standard deviation", "the highest standard deviation", True) }}
{{ th1("Min", "the lowest minimum value", "the highest minimum value", True) }}
{{ th1("Median", "the lowest median", "the highest median", True) }}
{{ th1("Max", "the lowest maximum value", "the highest maximum value", True) }}
</tr>
</thead>
<tbody>
{% for column in summary.columns %}
<tr data-manager="FilterableColumn"
data-column-name="{{ column.name }}">
<tr data-manager="FilterableColumn" data-column-name="{{ column.name }}"
data-dataframe-column-idx="{{ loop.index0 }}">
<td data-value="{{ loop.index0 }}" data-numeric>{{ loop.index0 }}
</td>
<td>{{ column.name }}</td>
<td>{{ column.dtype }}</td>
<td class="{{ column.nulls_level }}">
<td class="{{ column.nulls_level }}"
data-value="{{ column.null_count }}" data-numeric>
{{ column.null_count }} ({{ column.null_proportion | format_percent }})
</td>

{% if column.n_unique %}
<td>{{ column.n_unique }} ({{ column.unique_proportion | format_percent }})
<td data-value="{{ column.n_unique }}" data-numeric>
{{ column.n_unique }} ({{ column.unique_proportion | format_percent }})
</td>
{% else %}
<td></td>
<td data-value="nan" data-numeric></td>
{% endif %}

{% if "mean" in column %}
<td>{{ column["mean"] | format_number }}</td>
<td>{{ column["standard_deviation"] | format_number }}</td>
<td data-value="{{ column['mean'] }}" data-numeric>
{{ column["mean"] | format_number }}</td>
<td data-value="{{ column['standard_deviation'] }}" data-numeric>
{{ column["standard_deviation"] | format_number }}</td>
{% else %}
<td></td>
<td></td>
<td data-value="nan" data-numeric></td>
<td data-value="nan" data-numeric></td>
{% endif %}

{% if column.quantiles %}
<td>{{ column.quantiles[0.0] | format_number }}</td>
<td>{{ column.quantiles[0.5] | format_number }}</td>
<td>{{ column.quantiles[1.0] | format_number }}</td>
<td data-value="{{ column.quantiles[0.0] }}" data-numeric>
{{ column.quantiles[0.0] | format_number }}</td>
<td data-value="{{ column.quantiles[0.5] }}" data-numeric>
{{ column.quantiles[0.5] | format_number }}</td>
<td data-value="{{ column.quantiles[1.0] }}" data-numeric>
{{ column.quantiles[1.0] | format_number }}</td>
{% elif "min" in column %}
<td>{{ column.min | format_number }}</td>
<td></td>
<td>{{ column.max | format_number }}</td>
<td data-value="{{ column.min }}" data-numeric>
{{ column.min | format_number }}</td>
<td data-value="nan" data-numeric></td>
<td data-value="{{ column.max }}" data-numeric>
{{ column.max | format_number }}</td>
{% else %}
<td></td>
<td></td>
<td></td>
<td data-value="nan" data-numeric></td>
<td data-value="nan" data-numeric></td>
<td data-value="nan" data-numeric></td>
{% endif %}

</tr>
Expand Down
29 changes: 29 additions & 0 deletions skrub/_reporting/js_tests/cypress/e2e/summary-statistics.cy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
describe('test sorting the summary stats columns', () => {
it('sorts the table when clicking arrows', () => {
cy.get('@report').find('[data-test="summary-statistics-tab"]')
.click();
cy.get('@report').find('.summary-stats-table').as('table');
cy.get('@table').find('tbody tr').first().should('have.attr',
'data-column-name', 'gender');
cy.get('@report').contains('Column name').as('colName');
cy.get('@colName').parent().find('button').first().as('colNameButton').click();
cy.get('@colNameButton').should('have.attr', 'data-is-active');
cy.get('@table').find('tbody tr').first().should('have.attr',
'data-column-name', 'assignment_category');
cy.get('@report').find('th').contains('Unique values').as(
'unique');
cy.get('@unique').parent().find('button').first().as('uniqueButton').click();
cy.get('@uniqueButton').should('have.attr', 'data-is-active');
cy.get('@colNameButton').should('not.have.attr', 'data-is-active');
cy.get('@table').find('tbody tr').first().should('have.attr',
'data-column-name', 'gender');
cy.get('@table').find('tbody tr').last().should('have.attr',
'data-column-name', 'year_first_hired');
cy.get('@unique').parent().find('button').first().next()
.click();
cy.get('@table').find('tbody tr').first().should('have.attr',
'data-column-name', 'date_first_hired');
cy.get('@table').find('tbody tr').last().should('have.attr',
'data-column-name', 'year_first_hired');
});
});

0 comments on commit a043903

Please sign in to comment.