Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sorting of the summary statistics table #1068

Merged
merged 12 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ Minor changes

* A "stats" panel has been added to the TableReport, showing summary statistics
for all columns (number of missing values, mean, etc. -- similar to
``pandas.info()`` ) in a table.
:pr:`1056` by :user:`Jérôme Dockès <jeromedockes>`.
``pandas.info()`` ) in a table. It can be sorted by each column.
:pr:`1056` and :pr:`1068` by :user:`Jérôme Dockès <jeromedockes>`.

* The selection in the TableReport's sample table can now be manipulated with
the keyboard. :pr:`1065` by :user:`Jérôme Dockès <jeromedockes>`.
Expand Down
1 change: 0 additions & 1 deletion examples/00_getting_started.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Getting Started
===============


This guide showcases the features of ``skrub``, an open-source package that aims at
bridging the gap between tabular data sources and machine-learning models.

Expand Down
3 changes: 3 additions & 0 deletions skrub/_reporting/_data/templates/icons/arrow-down.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions skrub/_reporting/_data/templates/icons/arrow-up.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions skrub/_reporting/_data/templates/icons/sort-alpha-down.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions skrub/_reporting/_data/templates/icons/sort-numeric-down.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions skrub/_reporting/_data/templates/report.css
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
{% include "copybutton.css" %}
{% include "column-summaries.css" %}
{% include "dataframe-sample.css" %}
{% include "summary-statistics.css" %}
{% include "tabs.css" %}
{% include "tooltip.css" %}
{% include "column-filter.css" %}
Expand Down
66 changes: 66 additions & 0 deletions skrub/_reporting/_data/templates/report.js
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,72 @@ if (customElements.get('skrub-table-report') === undefined) {
}
SkrubTableReport.register(TabList);

class sortableTable extends Manager {
constructor(elem, exchange) {
super(elem, exchange);
this.elem.querySelectorAll("button[data-role='sort-button']").forEach(
b => b.addEventListener("click", e => this.sort(e)));
}

getVal(row, tableColIdx) {
const td = row.querySelectorAll("td")[tableColIdx];
if (!td.hasAttribute("data-value")) {
return td.textContent;
}
let value = td.dataset.value;
if (td.hasAttribute("data-numeric")) {
value = Number(value);
}
return value;
}

compare(rowA, rowB, tableColIdx, ascending) {
let valA = this.getVal(rowA, tableColIdx);
let valB = this.getVal(rowB, tableColIdx);
// NaNs go at the bottom regardless of sorting order
if(typeof(valA) === "number" && typeof(valB) === "number"){
if(isNaN(valA) && !isNaN(valB)){
return 1;
}
if(isNaN(valB) && !isNaN(valA)){
return -1;
}
}
// When the values are equal, keep the original dataframe column
// order
if (!(valA > valB || valB > valA)) {
valA = Number(rowA.dataset.dataframeColumnIdx);
valB = Number(rowB.dataset.dataframeColumnIdx);
return valA - valB;
}
// Sort
if (!ascending) {
[valA, valB] = [valB, valA];
}
return valA > valB ? 1 : -1;
}

sort(event) {
const colHeaders = Array.from(this.elem.querySelectorAll("thead tr th"));
const tableColIdx = colHeaders.indexOf(event.target.closest("th"));
const body = this.elem.querySelector("tbody");
const rows = Array.from(body.querySelectorAll("tr"));
const ascending = event.target.dataset.direction === "ascending";

rows.sort((a, b) => this.compare(a, b, tableColIdx, ascending));

this.elem.querySelectorAll("button").forEach(b => b.removeAttribute("data-is-active"));
event.target.dataset.isActive = "";

body.innerHTML = "";
for (let r of rows) {
body.appendChild(r);
}
}

}
SkrubTableReport.register(sortableTable);

class SelectedColumnsDisplay extends Manager {

constructor(elem, exchange) {
Expand Down
58 changes: 58 additions & 0 deletions skrub/_reporting/_data/templates/summary-statistics.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
.summary-stats-table {
margin: 2px;
}


th.sort-button-group-wrapper {
--btn-width: 2rem;
--btn-group-width: calc(var(--btn-width) * 2);
position: relative;
padding-top: var(--micro);
padding-bottom: var(--micro);
padding-right: calc(var(--tiny) + var(--btn-group-width));
}

.sort-button-group {
position: absolute;
top: -1px;
bottom: 0;
right: calc(-1 * var(--btn-group-width));
left: 100%;
transform: translateX(calc(-1 * var(--btn-group-width) + 1px));
display: flex;
gap: 0px;
padding: 0px;
}

.sort-button {
margin: 0;
box-sizing: border-box;
height: 100%;
flex-grow: 1;
border-radius: 0;
border: 1px solid #aaa;
background: #e0e0e0;
color: #222;
padding: var(--micro);
}

.sort-button-group > .sort-button:focus-visible {
z-index: 2;
}

.sort-button-group > .sort-button ~ .sort-button {
margin-left: -1px;
}

.sort-button:hover {
background: #eeeeee;
}

.sort-button:active {
background: #cccccc;
}

.sort-button[data-is-active]{
background: var(--lightgreen);
color: black;
}
101 changes: 72 additions & 29 deletions skrub/_reporting/_data/templates/summary-statistics.html
Original file line number Diff line number Diff line change
@@ -1,57 +1,100 @@
{% macro th(name, ascending, descending, is_numeric) %}
<th class="sort-button-group-wrapper" scope="col">
{% if name %}
<span class="margin-r-t">{{ name }}</span>
{% endif %}
<div class="sort-button-group">
<button class="sort-button" type="button" data-role="sort-button"
data-direction="ascending" title="Sort {{ ascending }}">
{% if is_numeric %}
{% include "icons/sort-numeric-down.svg" %}
{% else %}
{% include "icons/sort-alpha-down.svg" %}
{% endif %}
</button>
<button class="sort-button" type="button" data-role="sort-button"
data-direction="descending" title="Sort {{ descending }}">
{% if is_numeric %}
{% include "icons/sort-numeric-down-alt.svg" %}
{% else %}
{% include "icons/sort-alpha-down-alt.svg" %}
{% endif %}
</button>
</div>
</th>
{% endmacro %}

{% macro th1(name, low, high, is_numeric) %}
{{ th(name, "from columns with " + low + " to columns with " + high, "from columns with " + high + " to columns with " + low, is_numeric) }}
{% endmacro %}

<article class="wrapper" data-show-on="NON_EMPTY_COLUMN_FILTER_SELECTED"
data-hide-on="EMPTY_COLUMN_FILTER_SELECTED">
data-hide-on="EMPTY_COLUMN_FILTER_SELECTED">
<div class="horizontal-scroll">
<table class="pure-table pure-table-striped">
<table class="pure-table pure-table-bordered summary-stats-table"
data-manager="sortableTable">
<thead>
<tr>
<th scope="col">Column name</th>
<th scope="col">dtype</th>
<th scope="col">Null values</th>
<th scope="col">Unique values</th>
<th scope="col">Mean</th>
<th scope="col">Std</th>
<th scope="col">Min</th>
<th scope="col">Median</th>
<th scope="col">Max</th>
{{ th("Column", "from first column to last column", "from last column to first column", True) }}
{{ th("Column name", "by column name from A to Z", "by column name from Z to A", False) }}
{{ th("dtype", "by dtype from A to Z", "by dtype from Z to A", False) }}
{{ th1("Null values", "the fewest null values", "the most null values", True) }}
{{ th1("Unique values", "the fewest unique values", "the most unique values", True) }}
{{ th1("Mean", "the lowest mean", "the highest mean", True) }}
{{ th1("Std", "the lowest standard deviation", "the highest standard deviation", True) }}
{{ th1("Min", "the lowest minimum value", "the highest minimum value", True) }}
{{ th1("Median", "the lowest median", "the highest median", True) }}
{{ th1("Max", "the lowest maximum value", "the highest maximum value", True) }}
</tr>
</thead>
<tbody>
{% for column in summary.columns %}
<tr data-manager="FilterableColumn"
data-column-name="{{ column.name }}">
<tr data-manager="FilterableColumn" data-column-name="{{ column.name }}"
data-dataframe-column-idx="{{ loop.index0 }}">
<td data-value="{{ loop.index0 }}" data-numeric>{{ loop.index0 }}
</td>
<td>{{ column.name }}</td>
<td>{{ column.dtype }}</td>
<td class="{{ column.nulls_level }}">
<td class="{{ column.nulls_level }}"
data-value="{{ column.null_count }}" data-numeric>
{{ column.null_count }} ({{ column.null_proportion | format_percent }})
</td>

{% if column.n_unique %}
<td>{{ column.n_unique }} ({{ column.unique_proportion | format_percent }})
<td data-value="{{ column.n_unique }}" data-numeric>
{{ column.n_unique }} ({{ column.unique_proportion | format_percent }})
</td>
{% else %}
<td></td>
<td data-value="nan" data-numeric></td>
{% endif %}

{% if "mean" in column %}
<td>{{ column["mean"] | format_number }}</td>
<td>{{ column["standard_deviation"] | format_number }}</td>
<td data-value="{{ column['mean'] }}" data-numeric>
{{ column["mean"] | format_number }}</td>
<td data-value="{{ column['standard_deviation'] }}" data-numeric>
{{ column["standard_deviation"] | format_number }}</td>
{% else %}
<td></td>
<td></td>
<td data-value="nan" data-numeric></td>
<td data-value="nan" data-numeric></td>
{% endif %}

{% if column.quantiles %}
<td>{{ column.quantiles[0.0] | format_number }}</td>
<td>{{ column.quantiles[0.5] | format_number }}</td>
<td>{{ column.quantiles[1.0] | format_number }}</td>
<td data-value="{{ column.quantiles[0.0] }}" data-numeric>
{{ column.quantiles[0.0] | format_number }}</td>
<td data-value="{{ column.quantiles[0.5] }}" data-numeric>
{{ column.quantiles[0.5] | format_number }}</td>
<td data-value="{{ column.quantiles[1.0] }}" data-numeric>
{{ column.quantiles[1.0] | format_number }}</td>
{% elif "min" in column %}
<td>{{ column.min | format_number }}</td>
<td></td>
<td>{{ column.max | format_number }}</td>
<td data-value="{{ column.min }}" data-numeric>
{{ column.min | format_number }}</td>
<td data-value="nan" data-numeric></td>
<td data-value="{{ column.max }}" data-numeric>
{{ column.max | format_number }}</td>
{% else %}
<td></td>
<td></td>
<td></td>
<td data-value="nan" data-numeric></td>
<td data-value="nan" data-numeric></td>
<td data-value="nan" data-numeric></td>
{% endif %}

</tr>
Expand Down
29 changes: 29 additions & 0 deletions skrub/_reporting/js_tests/cypress/e2e/summary-statistics.cy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
describe('test sorting the summary stats columns', () => {
it('sorts the table when clicking arrows', () => {
cy.get('@report').find('[data-test="summary-statistics-tab"]')
.click();
cy.get('@report').find('.summary-stats-table').as('table');
cy.get('@table').find('tbody tr').first().should('have.attr',
'data-column-name', 'gender');
cy.get('@report').contains('Column name').as('colName');
cy.get('@colName').parent().find('button').first().as('colNameButton').click();
cy.get('@colNameButton').should('have.attr', 'data-is-active');
cy.get('@table').find('tbody tr').first().should('have.attr',
'data-column-name', 'assignment_category');
cy.get('@report').find('th').contains('Unique values').as(
'unique');
cy.get('@unique').parent().find('button').first().as('uniqueButton').click();
cy.get('@uniqueButton').should('have.attr', 'data-is-active');
cy.get('@colNameButton').should('not.have.attr', 'data-is-active');
cy.get('@table').find('tbody tr').first().should('have.attr',
'data-column-name', 'gender');
cy.get('@table').find('tbody tr').last().should('have.attr',
'data-column-name', 'year_first_hired');
cy.get('@unique').parent().find('button').first().next()
.click();
cy.get('@table').find('tbody tr').first().should('have.attr',
'data-column-name', 'date_first_hired');
cy.get('@table').find('tbody tr').last().should('have.attr',
'data-column-name', 'year_first_hired');
});
});