-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
index.js
201 lines (187 loc) · 8.06 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
/**
* @license
* Copyright 2019 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
import * as tf from '@tensorflow/tfjs';
import * as ui from './ui';
// Boston Housing CSV
const BOSTON_HOUSING_CSV_URL =
'https://storage.googleapis.com/tfjs-examples/multivariate-linear-regression/data/boston-housing-train.csv';
// Jena Climate CSV
const JENA_CLIMATE_CSV_URL =
'https://storage.googleapis.com/learnjs-data/jena_climate/jena_climate_2009_2016.csv';
// Dresses Sales data
// Originally from https://www.openml.org/d/23381
const DRESSES_SALES_CSV_URL =
'https://storage.googleapis.com/learnjs-data/csv-datasets/dresses-sales-openml.csv';
// State University of New York Campus Data from NYS.gov
// Originally from
// https://data.ny.gov/Education/State-University-of-New-York-SUNY-Campus-Locations/3cij-nwhw
const SUNY_CSV_URL =
'https://storage.googleapis.com/learnjs-data/csv-datasets/State_University_of_New_York__SUNY__Campus_Locations_with_Websites__Enrollment_and_Select_Program_Offerings.csv';
/**
* Builds a CSV Dataset object using the URL specified in the UI. Then iterates
* over all elements in that dataset to count them. Updates the UI accordingly.
*/
async function countRowsHandler() {
const url = ui.getQueryElement().value;
ui.updateStatus(`Building data object to connect to ${url}`);
const myData = tf.data.csv(url);
let i = 0;
ui.updateRowCountOutput(`counting...`);
const updateFn = x => {
i += 1;
if (i % 1000 === 0) {
ui.updateStatus(`Counting ... ${i} rows of data in the CSV so far...`);
}
};
try {
ui.updateStatus('Attempting to count records in CSV.');
// Note that `tf.data.Dataset.forEachAsync()` is an async function. Without
// the `await` here, there is no control over when the updataFn's will
// execute, thus, they will likely execute *after* we update the status with
// the final count, resulting in a display of "Counted 0 rows.".
await myData.forEachAsync(x => updateFn(x));
} catch (e) {
const errorMsg = `Caught an error iterating over ${url}. ` +
`This URL might not be valid or might not support CORS requests.` +
` Check the developer console for CORS errors.` + e;
ui.updateRowCountOutput(errorMsg);
return;
}
ui.updateStatus(`Done counting rows.`);
ui.updateRowCountOutput(`Counted ${i} rows of data in the CSV.`);
};
/**
* Builds a CSV Dataset object using the URL specified in the UI. Then connects
* with the dataset object to retrieve the column names. Updates the UI
* accordingly.
*/
async function getColumnNamesHandler() {
ui.updateColumnNamesOutput([]);
const url = ui.getQueryElement().value;
ui.updateStatus(`Attempting to connect to CSV resource at ${url}`);
const myData = tf.data.csv(url);
ui.updateStatus('Got the data connection ... determining the column names');
ui.updateColumnNamesMessage('Determining column names.');
try {
const columnNames = await myData.columnNames();
ui.updateStatus('Done getting column names.');
ui.updateColumnNamesMessage('');
ui.updateColumnNamesOutput(columnNames);
} catch (e) {
const errorMsg = `Caught an error retrieving column names from ${url}. ` +
`This URL might not be valid or might not support CORS requests.` +
` Check the developer console for CORS errors.` + e;
ui.updateColumnNamesMessage(errorMsg);
return;
}
};
/**
* Accesses the CSV to collect a single specified row. The row index
* is specified by the UI element managed in the ui library.
*/
async function getSampleRowHandler() {
const url = ui.getQueryElement().value;
ui.updateStatus(`Attempting to connect to CSV resource at ${url}`);
const myData = tf.data.csv(url);
ui.updateStatus('Got the data connection ... getting requested sample');
const sampleIndex = ui.getSampleIndex();
if (sampleIndex < 0 || isNaN(sampleIndex)) {
const msg = `Can not get samples with negative or NaN index. (Requested ${
sampleIndex}).`;
ui.updateStatus(msg);
ui.updateSampleRowMessage(msg);
ui.updateSampleRowOutput([]);
return;
}
let sample;
try {
sample = await myData.skip(sampleIndex).take(1).toArray();
} catch (e) {
let errorMsg = `Caught an error retrieving sample from ${url}. `;
errorMsg +=
'This URL might not be valid or might not support CORS requests.';
errorMsg += ' Check the developer console for CORS errors.';
errorMsg += e;
ui.updateSampleRowMessage(errorMsg);
return;
}
if (sample.length === 0) {
// When samples are requested beyond the end of the CSV, the data will
// return empty.
const msg = `Can not get sample index ${
sampleIndex}. This may be beyond the end of the dataset.`;
ui.updateStatus(msg);
ui.updateSampleRowMessage(msg);
ui.updateSampleRowOutput([]);
return;
}
ui.updateStatus(`Done getting sample ${sampleIndex}.`);
ui.updateSampleRowMessage(`Done getting sample ${sampleIndex}.`);
ui.updateSampleRowOutput(sample[0]);
};
/** Clears output messages and tables. */
const resetOutputMessages = () => {
ui.updateRowCountOutput('click "Count rows"');
ui.updateColumnNamesMessage('click "Get column names"');
ui.updateColumnNamesOutput([]);
ui.updateSampleRowMessage('select an index and click "Get a sample row"');
ui.updateSampleRowOutput([]);
};
/** Sets up handlers for the user affordences, including all buttons. */
document.addEventListener('DOMContentLoaded', async () => {
resetOutputMessages();
// Helper to connect preset URL buttons.
const connectURLButton = (buttonId, url, statusMessage) => {
document.getElementById(buttonId).addEventListener('click', async () => {
ui.getQueryElement().value = url;
resetOutputMessages();
ui.updateStatus(statusMessage);
}, false);
};
connectURLButton(
'jena-climate-button', JENA_CLIMATE_CSV_URL,
`Jena climate data is a record of atmospheric conditions taken over a ` +
`period of time. In this dataset, 14 different quantities (such ` +
`as air temperature, atmospheric pressure, humidity, wind ` +
`direction, and so on) were recorded every 10 minutes, over ` +
`several years. Note that counting all the rows of this dataset` +
`might take a while`);
connectURLButton(
'boston-button', BOSTON_HOUSING_CSV_URL,
`"Boston Housing" is a commonly used dataset in introductory ML problems.`);
connectURLButton(
'dresses-button', DRESSES_SALES_CSV_URL,
`This dataset contains attributes of dresses and their recommendations ` +
`according to their sales. Provided courtesy of OpenML. Find more ` +
`curated ML datasets at https://www.openml.org/d/23381`);
connectURLButton(
'suny-button', SUNY_CSV_URL,
`Campuses which comprise the State University of New York (SUNY) System. ` +
`Highlights information on Undergraduate and Graduate enrollment ` +
`as well as some program area offerings. Find more datasets at ` +
`https://data.ny.gov/`);
// Connect action buttons.
document.getElementById('count-rows')
.addEventListener('click', countRowsHandler, false);
document.getElementById('get-column-names')
.addEventListener('click', getColumnNamesHandler, false);
document.getElementById('get-sample-row')
.addEventListener('click', getSampleRowHandler, false);
// Connect sample index to fetch on change.
document.getElementById('which-sample-input')
.addEventListener('change', getSampleRowHandler, false);
}, false);