Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

show the pandas index in tablereport #1074

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ Minor changes
* The selection in the TableReport's sample table can now be manipulated with
the keyboard. :pr:`1065` by :user:`Jérôme Dockès <jeromedockes>`.

* The :class:`TableReport` now shows the index for pandas dataframes.
:pr:`1074` by :user:`Jérôme Dockès <jeromedockes>`.

Release 0.3.0
=============

Expand Down
1 change: 0 additions & 1 deletion examples/00_getting_started.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Getting Started
===============


This guide showcases the features of ``skrub``, an open-source package that aims at
bridging the gap between tabular data sources and machine-learning models.

Expand Down
26 changes: 20 additions & 6 deletions skrub/_reporting/_data/templates/dataframe-sample.css
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,28 @@
/* The column containing the selected cell has green stripes. */
/* The rest of the table has gray stripes added by pure.css */

tr:nth-child(2n -1) .table-cell[data-is-in-active-column] {
background-color: var(--mediumgreen);
.table-cell[data-is-in-active-column] {
/* !important to override pure.css */
background-color: var(--active-column-color) !important;
}

.table-cell[data-is-in-active-column] {
background-color: var(--lightgreen);
tbody tr {
--active-column-color: var(--lightgreen);
}

tbody tr:nth-child(2n -1) {
--active-column-color: var(--mediumgreen);
}

tr:nth-child(2n -1) th.table-cell[data-is-in-active-column] {
background-color: var(--darkgreen);
thead {
--active-column-color: var(--darkgreen);
}


th.dataframe-index {
--active-column-color: white;
background: white;
border: 1px solid #cbcbcb; /* the pure-table border color */
}

.table-cell[data-is-active] {
Expand All @@ -30,6 +42,8 @@ tr:nth-child(2n -1) th.table-cell[data-is-in-active-column] {
}

.table-cell[data-is-active][data-just-copied]:focus {
/* !important to temporarily override the default focus ring
(data-just-copied is removed after 200ms) */
outline: 2px dashed Highlight !important;
outline: 2px dashed -webkit-focus-ring-color !important;
}
Expand Down
61 changes: 38 additions & 23 deletions skrub/_reporting/_data/templates/dataframe-sample.html
Original file line number Diff line number Diff line change
@@ -1,32 +1,46 @@
{% macro kbd(text) %}
<span class="keyboard-key">{{ text }}</span>
{% endmacro %}
{% set has_index = summary['head']['index'] is not none %}

<article class="wrapper" data-show-on="NON_EMPTY_COLUMN_FILTER_SELECTED"
data-hide-on="EMPTY_COLUMN_FILTER_SELECTED">
{% include "table-bar.html" %}

<div class="horizontal-scroll">
<table class="pure-table pure-table-striped table-with-selectable-cells"
data-manager="SampleTable"
data-n-head-rows="{{ summary['head']['data'].__len__() }}"
data-n-tail-rows="{{ summary['tail']['data'].__len__() }}"
data-n-cols="{{ summary.head.header.__len__() }}">
data-manager="SampleTable" {% if has_index %} data-has-index {% endif %}
data-n-head-rows="{{ summary['head']['data'].__len__() }}"
data-n-tail-rows="{{ summary['tail']['data'].__len__() }}"
data-n-cols="{{ summary.head.header.__len__() }}">
<thead>
<tr>
{% if has_index %}
{% set index_name = summary.head.index_name %}
<th class="dataframe-index table-cell clickable ellided-short"
scope="col" class="clickable ellided table-cell"
id="sample-table-cell-head-header-index"
data-manager="SampleTableCell" data-role="clickable-table-cell"
data-column-idx="-1" data-row-idx-in-table-part="-1"
data-table-part="head"
data-value-str="{{ index_name.__str__() }}"
data-value-repr="{{ index_name.__repr__() }}">
{% if not index_name | is_null %}
{{ index_name }}
{% endif %}
</th>
{% endif %}
{% for idx in range(summary.head.header.__len__()) %}
{% set column_name = summary.head.header[idx] %}

<th scope="col" class="clickable ellided table-cell"
id="sample-table-cell-head-header-{{ idx }}"
data-manager="SampleTableCell FilterableColumn"
data-column-name="{{ column_name }}"
data-column-idx="{{ idx }}"
data-row-idx-in-table-part="{{ -1 }}"
data-table-part="head"
data-column-name="{{ column_name }}" data-column-idx="{{ idx }}"
data-row-idx-in-table-part="-1" data-table-part="head"
data-value-str="{{ column_name.__str__() }}"
data-value-repr="{{ column_name.__repr__() }}"
>{{ column_name }}</th>
data-value-repr="{{ column_name.__repr__() }}">{{ column_name }}
</th>
{% endfor %}
</tr>
</thead>
Expand All @@ -50,21 +64,22 @@
</div>

<div class="table-footer">
<div>
<strong>{{ summary.n_rows | format_number }}</strong> rows ✕
<strong data-manager="ColumnFilterMatchCount"
data-test="n-columns-display">{{ summary.n_columns | format_number }}</strong> columns.
</div>
<div class="keyboard-hints">
<div>
{{ kbd("Ctrl-C") }}
{{ kbd("←") }}
{{ kbd("↑") }}
{{ kbd("↓") }}
{{ kbd("→") }}
{{ kbd("Esc") }}
<strong>{{ summary.n_rows | format_number }}</strong> rows ✕
<strong data-manager="ColumnFilterMatchCount"
data-test="n-columns-display">{{ summary.n_columns | format_number }}</strong>
columns.
</div>
<div class="keyboard-hints">
<div>
{{ kbd("Ctrl-C") }}
{{ kbd("←") }}
{{ kbd("↑") }}
{{ kbd("↓") }}
{{ kbd("→") }}
{{ kbd("Esc") }}
</div>
</div>
</div>
</div>


Expand Down
14 changes: 10 additions & 4 deletions skrub/_reporting/_data/templates/report.js
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,8 @@ if (customElements.get('skrub-table-report') === undefined) {
constructor(elem, exchange) {
super(elem, exchange);
this.root = this.elem.getRootNode();
this.hasIndex = this.elem.dataset.hasIndex;
this.minColIdx = this.elem.hasAttribute("data-has-index") ? -1 : 0;
this.nHeadRows = this.elem.dataset.nHeadRows;
this.nTailRows = this.elem.dataset.nTailRows;
this.nCols = this.elem.dataset.nCols;
Expand Down Expand Up @@ -371,12 +373,16 @@ if (customElements.get('skrub-table-report') === undefined) {
return row === -1 ? "header" : String(row);
}

colName(col) {
return col === -1 ? "index" : String(col);
}

findCellLeft(tablePart, row, col) {
let newCol = col;
while (newCol > 0) {
while (newCol > this.minColIdx) {
newCol -= 1;
let newCellId =
`sample-table-cell-${tablePart}-${this.rowName(row)}-${newCol}`;
`sample-table-cell-${tablePart}-${this.rowName(row)}-${this.colName(newCol)}`;
let newCell = this.root.getElementById(newCellId);
if (newCell === null) {
return null;
Expand Down Expand Up @@ -421,7 +427,7 @@ if (customElements.get('skrub-table-report') === undefined) {
newRow += 1;
}
let newCellId =
`sample-table-cell-${newTablePart}-${this.rowName(newRow)}-${col}`;
`sample-table-cell-${newTablePart}-${this.rowName(newRow)}-${this.colName(col)}`;
let newCell = this.root.getElementById(newCellId);
if (newCell === null) {
return null;
Expand All @@ -442,7 +448,7 @@ if (customElements.get('skrub-table-report') === undefined) {
newRow -= 1;
}
let newCellId =
`sample-table-cell-${newTablePart}-${this.rowName(newRow)}-${col}`;
`sample-table-cell-${newTablePart}-${this.rowName(newRow)}-${this.colName(col)}`;
let newCell = this.root.getElementById(newCellId);
if (newCell === null) {
return null;
Expand Down
15 changes: 15 additions & 0 deletions skrub/_reporting/_data/templates/table-part.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@
{% for row_idx in range(summary[table_part]['data'].__len__()) %}
{% set row = summary[table_part]['data'][row_idx] %}
<tr>
{% if has_index %}
{% set index_value = summary[table_part]['index'][row_idx] %}
<th scope="row"
class="table-cell clickable ellided-short dataframe-index"
id="sample-table-cell-{{ table_part }}-{{ row_idx }}-index"
data-manager="SampleTableCell"
data-role="clickable-table-cell"
data-column-idx="-1"
data-row-idx-in-table-part="{{ row_idx }}"
data-table-part="{{ table_part }}"
data-value-str="{{ index_value.__str__() }}"
data-value-repr="{{ index_value.__repr__() }}">
{{ index_value }}
</th>
{% endif %}
{% for idx in range(row.__len__()) %}
{% set column_name = summary.head.header[idx] %}
<td class="table-cell clickable ellided-short"
Expand Down
13 changes: 12 additions & 1 deletion skrub/_reporting/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,18 @@ def to_row_list(dataframe):
rows = []
for row_idx in range(sbd.shape(dataframe)[0]):
rows.append([col[row_idx] for col in columns.values()])
return {"header": list(columns.keys()), "data": rows}
index = sbd.index(dataframe)
if index is not None:
index_name = index.name
index = index.to_list()
else:
index_name = None
return {
"header": list(columns.keys()),
"data": rows,
"index": index,
"index_name": index_name,
}


def top_k_value_counts(column, k):
Expand Down
Loading