From 0837d3abfe2c7fa0c62263ff5ab73c193b185cc5 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 02:10:05 -0500 Subject: [PATCH 01/36] fix more lint issues, including isort update dependencies in setup.py --- atm/__init__.py | 3 ++- atm/config.py | 25 +++++++++++++------------ atm/constants.py | 25 ++++++++++++++++--------- atm/database.py | 25 +++++++++---------------- atm/encoder.py | 21 ++++++++++++--------- atm/enter_data.py | 8 +++----- atm/method.py | 8 ++++---- atm/metrics.py | 17 ++++++----------- atm/model.py | 29 ++++++++++++++--------------- atm/utilities.py | 15 ++++++++------- atm/worker.py | 29 ++++++++++++----------------- dev-requirements.txt | 1 + requirements.txt | 1 - setup.cfg | 11 +++++++---- setup.py | 25 ++++++++++++++++++++++--- test-requirements.txt | 4 +++- tox.ini | 3 +-- 17 files changed, 133 insertions(+), 117 deletions(-) diff --git a/atm/__init__.py b/atm/__init__.py index 132d885..7d9f7a4 100644 --- a/atm/__init__.py +++ b/atm/__init__.py @@ -8,4 +8,5 @@ # reference files relative to there. PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) -from . import config, constants, database, enter_data, method, metrics, model, utilities, worker +__all__ = ['config', 'constants', 'database', 'enter_data', 'method', 'metrics', + 'model', 'utilities', 'worker'] diff --git a/atm/config.py b/atm/config.py index 0103e5e..dcf71b0 100644 --- a/atm/config.py +++ b/atm/config.py @@ -1,8 +1,9 @@ -import re import os -import yaml +import re from argparse import ArgumentError, ArgumentTypeError, RawTextHelpFormatter +import yaml + from atm.constants import * @@ -192,14 +193,14 @@ def add_arguments_aws_ec2(parser): pass # AWS EC2 configurations - parser.add-argument('--num-instances', help='Number of EC2 instances to start') - parser.add-argument('--num-workers-per-instance', help='Number of ATM workers per instances') - parser.add-argument('--ec2-region', help='Region to start instances in') - parser.add-argument('--ec2-ami', help='Name of ATM AMI') - parser.add-argument('--ec2-key-pair', help='AWS key pair to use for EC2 instances') - parser.add-argument('--ec2-keyfile', help='Local path to key file (must match ec2-key-pair)') - parser.add-argument('--ec2-instance-type', help='Type of EC2 instance to start') - parser.add-argument('--ec2-username', help='Username to log into EC2 instance') + parser.add_argument('--num-instances', help='Number of EC2 instances to start') + parser.add_argument('--num-workers-per-instance', help='Number of ATM workers per instances') + parser.add_argument('--ec2-region', help='Region to start instances in') + parser.add_argument('--ec2-ami', help='Name of ATM AMI') + parser.add_argument('--ec2-key-pair', help='AWS key pair to use for EC2 instances') + parser.add_argument('--ec2-keyfile', help='Local path to key file (must match ec2-key-pair)') + parser.add_argument('--ec2-instance-type', help='Type of EC2 instance to start') + parser.add_argument('--ec2-username', help='Username to log into EC2 instance') return parser @@ -327,7 +328,7 @@ def add_arguments_datarun(parser): 'performance on a test dataset, and "mu_sigma" will use ' 'the lower confidence bound on the CV performance.') - ## AutoML Arguments ####################################################### + ## AutoML Arguments ###################################################### ############################################################################ # hyperparameter selection strategy # How should ATM sample hyperparameters from a given hyperpartition? @@ -370,7 +371,7 @@ def add_arguments_datarun(parser): # # train using sample criteria # else # # train using uniform (baseline) - parser.add_argument('--r-min', type=int, + parser.add_argument('--r-min', type=int, help='number of random runs to perform before tuning can occur') # k is number that xxx-k methods use. It is similar to r_min, except it is diff --git a/atm/constants.py b/atm/constants.py index a7d23a5..c1aced9 100644 --- a/atm/constants.py +++ b/atm/constants.py @@ -1,12 +1,13 @@ import os + from atm import PROJECT_ROOT -# sample tuners -from btb.tuning import Uniform as UniformTuner, GP, GPEi, GPEiVelocity -# hyperpartition selectors -from btb.selection import Uniform as UniformSelector, UCB1,\ - BestKReward, BestKVelocity, RecentKReward,\ - RecentKVelocity, HierarchicalByAlgorithm,\ - PureBestKVelocity + +from btb.selection import Uniform as UniformSelector +from btb.selection import (UCB1, BestKReward, BestKVelocity, + HierarchicalByAlgorithm, PureBestKVelocity, + RecentKReward, RecentKVelocity) +from btb.tuning import Uniform as UniformTuner +from btb.tuning import GP, GPEi, GPEiVelocity # A bunch of constants which are used throughout the project, mostly for config. # TODO: convert these lists and classes to something more elegant, like enums @@ -33,6 +34,8 @@ CUSTOM_CLASS_REGEX = '(.*\.py):(\w+)$' JSON_REGEX = '(.*\.json)$' +N_FOLDS_DEFAULT = 10 + TUNERS_MAP = { 'uniform': UniformTuner, 'gp': GP, @@ -68,26 +71,31 @@ 'ada': 'adaboost.json' } + class ClassifierStatus: RUNNING = 'running' ERRORED = 'errored' COMPLETE = 'complete' + class RunStatus: PENDING = 'pending' RUNNING = 'running' COMPLETE = 'complete' + class PartitionStatus: INCOMPLETE = 'incomplete' GRIDDING_DONE = 'gridding_done' ERRORED = 'errored' + class FileType: LOCAL = 'local' S3 = 's3' HTTP = 'http' + # these are the strings that are used to index into results dictionaries class Metrics: ACCURACY = 'accuracy' @@ -104,6 +112,7 @@ class Metrics: PR_CURVE = 'pr_curve' ROC_CURVE = 'roc_curve' + METRICS_BINARY = [ Metrics.ACCURACY, Metrics.COHEN_KAPPA, @@ -124,5 +133,3 @@ class Metrics: ] METRICS = list(set(METRICS_BINARY + METRICS_MULTICLASS)) - -N_FOLDS_DEFAULT = 10 diff --git a/atm/database.py b/atm/database.py index eea0ab4..50e97a0 100644 --- a/atm/database.py +++ b/atm/database.py @@ -1,23 +1,17 @@ from __future__ import print_function -from sqlalchemy import (create_engine, Column, String, ForeignKey, Integer, - Boolean, DateTime, Enum, MetaData, Numeric, Table, Text) -from sqlalchemy.orm import sessionmaker, relationship -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.engine.url import URL -from sqlalchemy import func, and_ -import traceback -import random, sys -import os -import warnings -import pdb from datetime import datetime from operator import attrgetter +from sqlalchemy import (Column, DateTime, Enum, ForeignKey, Integer, MetaData, + Numeric, String, Text, and_, create_engine, func) +from sqlalchemy.engine.url import URL +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship, sessionmaker + from atm.constants import * from atm.utilities import * - MAX_HYPERPARTITION_ERRORS = 3 @@ -90,9 +84,8 @@ def _define_tables(self): exist, it will not be updated with new schema -- after schema changes, the database must be destroyed and reinialized. """ - metadata = MetaData(bind=self.engine) - Base = declarative_base() + Base = declarative_base(metadata=metadata) class Dataset(Base): __tablename__ = 'datasets' @@ -541,8 +534,8 @@ def mark_classifier_errored(self, classifier_id, error_msg): classifier.error_msg = error_msg classifier.status = ClassifierStatus.ERRORED classifier.completed = datetime.now() - if (self.get_number_of_hyperpartition_errors(classifier.hyperpartition_id) - > MAX_HYPERPARTITION_ERRORS): + if (self.get_number_of_hyperpartition_errors(classifier.hyperpartition_id) > + MAX_HYPERPARTITION_ERRORS): self.mark_hyperpartition_errored(classifier.hyperpartition_id) @try_with_session(commit=True) diff --git a/atm/encoder.py b/atm/encoder.py index c1c3809..80968d0 100644 --- a/atm/encoder.py +++ b/atm/encoder.py @@ -1,13 +1,6 @@ -import pandas as pd import numpy as np -import os - -from sklearn.feature_extraction import DictVectorizer +import pandas as pd from sklearn.preprocessing import LabelEncoder, OneHotEncoder -from sklearn.pipeline import Pipeline -from sklearn.model_selection import train_test_split -from sklearn_pandas import DataFrameMapper -from atm.utilities import ensure_directory class MetaData(object): @@ -108,8 +101,18 @@ def transform(self, data): return X, y - def inverse_transform(self, data): + def inverse_transform(self, X, y): + """ + Convert an encoded feature matrix and label array to the original, + human-readable data format. + """ data = pd.DataFrame(columns=self.feature_columns) + features = self.feature_encoder.inverse_transform(X) + for i, (column, encoder) in self.column_encoders.items(): + data[column] = encoder.transform(features[i]) + + data[self.label_column] = self.label_encoder.inverse_transform(y) + return data def fit_transform(self, data): """ Process data into a form that ATM can use. """ diff --git a/atm/enter_data.py b/atm/enter_data.py index 9c4328c..c67b38e 100755 --- a/atm/enter_data.py +++ b/atm/enter_data.py @@ -1,18 +1,16 @@ from __future__ import print_function + import argparse import os import warnings -import yaml - from datetime import datetime, timedelta -from boto.s3.connection import S3Connection, Key as S3Key from atm.config import * from atm.constants import * from atm.database import Database from atm.encoder import MetaData from atm.method import Method -from atm.utilities import ensure_directory, hash_nested_tuple, download_data +from atm.utilities import download_data warnings.filterwarnings("ignore") @@ -61,7 +59,7 @@ def create_datarun(db, dataset, run_config): run_config: RunConfig object describing the datarun to create """ # describe the datarun by its tuner and selector - run_description = '__'.join([run_config.tuner, run_config.selector]) + run_description = '__'.join([run_config.tuner, run_config.selector]) # set the deadline, if applicable deadline = run_config.deadline diff --git a/atm/method.py b/atm/method.py index f53ad3b..0acb935 100644 --- a/atm/method.py +++ b/atm/method.py @@ -1,10 +1,11 @@ -from builtins import object, str as newstr - import json +from builtins import str as newstr +from builtins import object from os.path import join +from atm.constants import METHOD_PATH, METHODS_MAP + import btb -from atm.constants import METHODS_MAP, METHOD_PATH class HyperParameter(object): @@ -155,7 +156,6 @@ def __init__(self, method): # create hyperparameters from the parameter config self.parameters = {} - lists = [] for k, v in config['hyperparameters'].items(): param_type = HYPERPARAMETER_TYPES[v['type']] self.parameters[k] = param_type(name=k, **v) diff --git a/atm/metrics.py b/atm/metrics.py index 57af7c2..3b6c134 100644 --- a/atm/metrics.py +++ b/atm/metrics.py @@ -1,13 +1,9 @@ -from sklearn.model_selection import StratifiedKFold -from sklearn.preprocessing import LabelEncoder, OneHotEncoder -from sklearn.metrics import f1_score, precision_recall_curve, auc, roc_curve,\ - accuracy_score, cohen_kappa_score, roc_auc_score,\ - average_precision_score, matthews_corrcoef - import numpy as np import pandas as pd -import itertools -import pdb +from sklearn.metrics import (accuracy_score, average_precision_score, + cohen_kappa_score, f1_score, matthews_corrcoef, + precision_recall_curve, roc_auc_score, roc_curve) +from sklearn.model_selection import StratifiedKFold from atm.constants import * @@ -25,7 +21,8 @@ def rank_n_accuracy(y_true, y_prob_mat, n=0.33): # round to nearest int before casting n = int(round(n_classes * n)) - rankings = np.argsort(-y_prob_mat) # negative because we want highest value first + # sort the rankings in descending order, then take the top n + rankings = np.argsort(-y_prob_mat) rankings = rankings[:, :n] num_samples = len(y_true) @@ -126,7 +123,6 @@ def get_metrics_multiclass(y_true, y_pred, y_pred_probs, results[Metrics.RANK_ACCURACY] = rank_n_accuracy(y_true=y_true, y_prob_mat=y_pred_probs) - # if possible, compute multi-label AUC metrics present_classes = np.unique(y_true) all_labels_same = len(present_classes) == 1 @@ -231,4 +227,3 @@ def cross_validate_pipeline(pipeline, X, y, binary=True, results.append(split_results) return df, results - diff --git a/atm/model.py b/atm/model.py index 849085d..2c09b13 100644 --- a/atm/model.py +++ b/atm/model.py @@ -4,23 +4,24 @@ """ from __future__ import print_function -import numpy as np -import pandas as pd -import time -import pdb + import re -from importlib import import_module +import time from collections import defaultdict +from importlib import import_module -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler, MinMaxScaler -from sklearn.model_selection import train_test_split +import numpy as np +import pandas as pd from sklearn import decomposition -from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern, \ - ExpSineSquared, RationalQuadratic +from sklearn.gaussian_process.kernels import (RBF, ConstantKernel, + ExpSineSquared, Matern, + RationalQuadratic) +from sklearn.model_selection import train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import MinMaxScaler, StandardScaler from atm.constants import * -from atm.encoder import MetaData, DataEncoder +from atm.encoder import DataEncoder, MetaData from atm.method import Method from atm.metrics import cross_validate_pipeline, test_pipeline @@ -105,8 +106,7 @@ def make_pipeline(self): self.dimensions = self.num_features if Model.PCA in atm_params and atm_params[Model.PCA]: - whiten = (Model.WHITEN in atm_params and - atm_params[Model.WHITEN]) + whiten = (Model.WHITEN in atm_params and atm_params[Model.WHITEN]) pca_dims = atm_params[Model.PCA_DIMS] # PCA dimension in atm_params is a float reprsenting percentages of # features to use @@ -156,7 +156,6 @@ def test_final_model(self, X, y): """ # time the prediction start_time = time.time() - y_preds = self.pipeline.predict(X) total = time.time() - start_time self.avg_predict_time = total / float(len(y)) @@ -229,7 +228,7 @@ def predict(self, data): """ X, _ = self.encoder.transform(data) predictions = self.pipeline.predict(X) - return self.encoder + return self.encoder.inverse_transform(X, predictions) def special_conversions(self, params): """ diff --git a/atm/utilities.py b/atm/utilities.py index 3376479..386702e 100644 --- a/atm/utilities.py +++ b/atm/utilities.py @@ -1,17 +1,19 @@ from __future__ import print_function + +import base64 +import hashlib import json +import os import pickle +import re import urllib2 -import hashlib + import numpy as np -import os -import base64 -import re +from boto.s3.connection import Key, S3Connection -from boto.s3.connection import S3Connection, Key +from atm.constants import * from btb import ParamTypes -from atm.constants import * # global variable storing this machine's public IP address # (so we only have to fetch it once) @@ -239,7 +241,6 @@ def get_local_data_path(data_path): m = re.match(S3_PREFIX, data_path) if m: path = data_path[len(m.group()):].split('/') - bucket = path.pop(0) return os.path.join(DATA_DL_PATH, path[-1]), FileType.S3 m = re.match(HTTP_PREFIX, data_path) diff --git a/atm/worker.py b/atm/worker.py index 2b53a61..53d2be9 100755 --- a/atm/worker.py +++ b/atm/worker.py @@ -1,32 +1,27 @@ #!/usr/bin/python2.7 from __future__ import print_function -from atm.config import * -from atm.constants import * -from atm.utilities import * -from atm.model import Model -from atm.database import Database, ClassifierStatus, db_session -from btb.tuning.constants import Tuners import argparse -import ast import datetime import imp import os -import pdb import random import socket -import sys import time import traceback import warnings -import joblib from collections import defaultdict -from decimal import Decimal from operator import attrgetter import numpy as np -import pandas as pd -from boto.s3.connection import S3Connection, Key as S3Key +from boto.s3.connection import Key as S3Key +from boto.s3.connection import S3Connection + +from atm.config import * +from atm.constants import * +from atm.database import ClassifierStatus, Database, db_session +from atm.model import Model +from atm.utilities import * # shhh warnings.filterwarnings('ignore') @@ -236,7 +231,7 @@ def select_hyperpartition(self): # that haven't been scored yet. hyperpartition_scores = {fs.id: [] for fs in hyperpartitions} classifiers = self.db.get_classifiers(datarun_id=self.datarun.id) - #status=ClassifierStatus.COMPLETE) + for c in classifiers: # ignore hyperpartitions for which gridding is done if c.hyperpartition_id not in hyperpartition_scores: @@ -388,7 +383,7 @@ def run_classifier(self, hyperpartition_id=None): # use tuner to choose a set of parameters for the hyperpartition params = self.tune_parameters(hyperpartition) - except Exception as e: + except Exception: _log('Error choosing hyperparameters: datarun=%s' % str(self.datarun)) _log(traceback.format_exc()) raise ClassifierError() @@ -413,7 +408,7 @@ def run_classifier(self, hyperpartition_id=None): model, metrics = self.test_classifier(hyperpartition.method, params) _log('Saving classifier...') self.save_classifier(classifier.id, model, metrics) - except Exception as e: + except Exception: msg = traceback.format_exc() _log('Error testing classifier: datarun=%s' % str(self.datarun)) _log(msg) @@ -484,7 +479,7 @@ def work(db, datarun_ids=None, save_files=False, choose_randomly=True, verbose_metrics=verbose_metrics) try: worker.run_classifier() - except ClassifierError as e: + except ClassifierError: # the exception has already been handled; just wait a sec so we # don't go out of control reporting errors _log('Something went wrong. Sleeping %d seconds.' % LOOP_WAIT) diff --git a/dev-requirements.txt b/dev-requirements.txt index 7dcc9ff..350f0a1 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,5 @@ -r test-requirements.txt + Sphinx==1.6.5 sphinx-rtd-theme==0.2.4 sphinxcontrib-websupport==1.0.1 diff --git a/requirements.txt b/requirements.txt index ceb49a6..f03d282 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,3 @@ mysql-python==1.2.5 pyyaml==3.12 joblib==0.11 -e git+https://github.com/hdi-project/btb.git#egg=btb --e . diff --git a/setup.cfg b/setup.cfg index aa60c18..4d4f13b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,9 +7,12 @@ addopts = -m "not requires_training_data and not requires_credentials" python_files = test/tests/* [flake8] exclude = docs/* -ignore = E501,F999 # line too long error and star import error +# E501: line too long error +# E266: comment beginning with ## error +# F403 and F405: 'from module import *' error +ignore = E501,E266,F403,F405 # automatically sort and indent imports [isort] -forced_separate=atm -# vertical hanging indent -multi_line_output=3 +forced_separate=atm,btb +# Grid multiline output +multi_line_output=0 diff --git a/setup.py b/setup.py index 67c1ec1..3ea6fc7 100644 --- a/setup.py +++ b/setup.py @@ -63,13 +63,32 @@ # your project is installed. For an analysis of "install_requires" vs pip's # requirements files see: # https://packaging.python.org/en/latest/requirements.html - # TODO: don't pull in requirements! this is bad practice - #install_requires=open('requirements.txt').readlines(), + install_requires=[ + 'sqlalchemy>=1.1', + 'numpy>=1.13', + 'boto>=2.48', + 'pandas>=0.20', + 'scikit-learn>=0.18', + 'scipy>=0.19', + 'sklearn-pandas>=1.5', + 'mysql-python>=1.2', + 'pyyaml>=3.12', + 'joblib>=0.11', + 'btb', + ], + dependency_links=[ + 'git+https://github.com/hdi-project/btb.git#egg=btb', + ], # This variable is used to specify requirements for *this file* to run. setup_requires=[], test_suite='test/tests', - tests_require=open('test-requirements.txt').readlines(), + tests_require=[ + 'pytest>=3.2', + 'mock>=2', + 'pytest-xdist>=1.20', + 'pytest-runner>=3', + ] ) diff --git a/test-requirements.txt b/test-requirements.txt index 9fd4912..1114532 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,6 @@ +-r requirements.txt + pytest==3.2.3 mock==2.0.0 pytest-xdist==1.20.1 -pytest-cov===2.5.1 +pytest-runner==3.0 diff --git a/tox.ini b/tox.ini index dd9b1be..8fe9833 100644 --- a/tox.ini +++ b/tox.ini @@ -4,8 +4,7 @@ envlist = clean,py27 [testenv] commands= py.test --cov=atm -deps= - -rtest-requirements.txt +deps= -rtest-requirements.txt [testenv:clean] commands= From e6c9c0f49b2cd944996d5b5d57aa1728a6fbdac9 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 12:10:28 -0500 Subject: [PATCH 02/36] move some requirements around. tox should work now. --- dev-requirements.txt | 1 - requirements.txt | 1 + setup.py | 4 ++-- test-requirements.txt | 1 + tox.ini | 4 ++-- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 350f0a1..030a23d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -3,7 +3,6 @@ Sphinx==1.6.5 sphinx-rtd-theme==0.2.4 sphinxcontrib-websupport==1.0.1 -pytest-cov===2.5.1 codecov==2.0.9 flake8==3.4.1 isort==4.2.15 diff --git a/requirements.txt b/requirements.txt index f03d282..ceb49a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ mysql-python==1.2.5 pyyaml==3.12 joblib==0.11 -e git+https://github.com/hdi-project/btb.git#egg=btb +-e . diff --git a/setup.py b/setup.py index 3ea6fc7..037fbaa 100644 --- a/setup.py +++ b/setup.py @@ -74,10 +74,10 @@ 'mysql-python>=1.2', 'pyyaml>=3.12', 'joblib>=0.11', - 'btb', + 'future>=0.16', ], dependency_links=[ - 'git+https://github.com/hdi-project/btb.git#egg=btb', + 'git+ssh://git@github.com/hdi-project/btb.git#egg=btb', ], # This variable is used to specify requirements for *this file* to run. diff --git a/test-requirements.txt b/test-requirements.txt index 1114532..b779c99 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -4,3 +4,4 @@ pytest==3.2.3 mock==2.0.0 pytest-xdist==1.20.1 pytest-runner==3.0 +pytest-cov===2.5.1 diff --git a/tox.ini b/tox.ini index 8fe9833..e12007e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,9 @@ [tox] skipsdist = {env:TOXBUILD:false} -envlist = clean,py27 +envlist = clean,py27 [testenv] -commands= py.test --cov=atm +commands= pytest --cov=atm deps= -rtest-requirements.txt [testenv:clean] From ffe0e4a15c4138704787ae8288a7514db12a78e9 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 13:11:53 -0500 Subject: [PATCH 03/36] add apt requirements to Makefile --- Makefile | 1 + setup.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/Makefile b/Makefile index b747586..ade8884 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ test: lint python $(TEST_CMD) installdeps: + apt install sqlite3 mysql-server mysql-client libmysqlclient-dev pip install --upgrade pip pip install -e . pip install -r dev-requirements.txt diff --git a/setup.py b/setup.py index 037fbaa..3c01dc8 100644 --- a/setup.py +++ b/setup.py @@ -76,6 +76,9 @@ 'joblib>=0.11', 'future>=0.16', ], + # TODO: this is deprecated. Figure out how to accomplish the same thing with + # the proper tools. + # https://www.python.org/dev/peps/pep-0440/#direct-references dependency_links=[ 'git+ssh://git@github.com/hdi-project/btb.git#egg=btb', ], From d2bf368b7cc9d7a2b8444bcfe8ab2ac35219595a Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 13:13:21 -0500 Subject: [PATCH 04/36] apt -> apt-get --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ade8884..c5c5244 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ test: lint python $(TEST_CMD) installdeps: - apt install sqlite3 mysql-server mysql-client libmysqlclient-dev + apt-get install sqlite3 mysql-server mysql-client libmysqlclient-dev pip install --upgrade pip pip install -e . pip install -r dev-requirements.txt From f68354716c0b5bc184b1cbcb9e45d8a5c0857a0a Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 13:14:14 -0500 Subject: [PATCH 05/36] try with sudo --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c5c5244..c55ddb4 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ test: lint python $(TEST_CMD) installdeps: - apt-get install sqlite3 mysql-server mysql-client libmysqlclient-dev + sudo apt-get install sqlite3 mysql-server mysql-client libmysqlclient-dev pip install --upgrade pip pip install -e . pip install -r dev-requirements.txt From 8b2ee82452c0b96c3483b367c7e5ff27a80c9999 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 13:16:14 -0500 Subject: [PATCH 06/36] move from makefile to circleci yaml --- .circleci/config.yml | 1 + Makefile | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c3e7e70..3b89acd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,6 +11,7 @@ jobs: - image: themattrix/tox steps: - checkout + - run: apt-get install sqlite3 mysql-server mysql-client libmysqlclient-dev - run: pyenv local 2.7.13 # 3.5.2 3.6.0 - run: make installdeps - run: make lint && tox && codecov diff --git a/Makefile b/Makefile index c55ddb4..b747586 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,6 @@ test: lint python $(TEST_CMD) installdeps: - sudo apt-get install sqlite3 mysql-server mysql-client libmysqlclient-dev pip install --upgrade pip pip install -e . pip install -r dev-requirements.txt From 3996423af60712124d137364c8ad15dba47f6ad4 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 13:18:47 -0500 Subject: [PATCH 07/36] try updating first --- .circleci/config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3b89acd..8d7212e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,8 @@ jobs: - image: themattrix/tox steps: - checkout - - run: apt-get install sqlite3 mysql-server mysql-client libmysqlclient-dev + - run: sudo apt-get update + - run: sudo apt-get install sqlite3 mysql-server mysql-client libmysqlclient-dev - run: pyenv local 2.7.13 # 3.5.2 3.6.0 - run: make installdeps - run: make lint && tox && codecov From 4c6712216b7a8b49e4bb36a0c487061d7bd00f80 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 13:54:45 -0500 Subject: [PATCH 08/36] still fiddling with circleci setup --- .circleci/config.yml | 12 ++++---- config/test/btb/run.yaml | 40 -------------------------- config/test/btb/sql.yaml | 14 --------- config/test/end_to_end/run.yaml | 50 --------------------------------- config/test/end_to_end/sql.yaml | 14 --------- config/test/method/run.yaml | 37 ------------------------ config/test/method/sql.yaml | 14 --------- test/scripts/end_to_end_test.py | 6 ++-- test/scripts/evaluate_btb.py | 6 ++-- test/scripts/method_test.py | 6 ++-- 10 files changed, 16 insertions(+), 183 deletions(-) delete mode 100644 config/test/btb/run.yaml delete mode 100644 config/test/btb/sql.yaml delete mode 100644 config/test/end_to_end/run.yaml delete mode 100644 config/test/end_to_end/sql.yaml delete mode 100644 config/test/method/run.yaml delete mode 100644 config/test/method/sql.yaml diff --git a/.circleci/config.yml b/.circleci/config.yml index 8d7212e..8c0c975 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,5 +1,5 @@ -# This is heavily based on the featuretools test setup. Most of the logic is -# pushed to pytest and tox. +# This is heavily based on the featuretools test setup. Most of the setup logic +# is punted to pytest and tox. # # See https://github.com/Featuretools/featuretools # @@ -8,11 +8,13 @@ jobs: build: working_directory: ~/atm docker: - - image: themattrix/tox + - image: themattrix/tox + - image: mysql:5.7 + environment: + MYSQL_USER: ubuntu + MYSQL_DATABASE: atm steps: - checkout - - run: sudo apt-get update - - run: sudo apt-get install sqlite3 mysql-server mysql-client libmysqlclient-dev - run: pyenv local 2.7.13 # 3.5.2 3.6.0 - run: make installdeps - run: make lint && tox && codecov diff --git a/config/test/btb/run.yaml b/config/test/btb/run.yaml deleted file mode 100644 index dddf5b6..0000000 --- a/config/test/btb/run.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# this will be overridden by the test script -train_path: -test_path: -data_description: -label_column: class - -# use every algorithm we have -methods: - - logreg - - dt - - knn -# directory to store trained models; will be created if it doesn't exist -models_dir: models/ -# priority (higher number is more important) -priority: 1 -# Should there be a classifier or walltime budget? -budget_type: classifier -# If budget_type is classifier, how many classifiers to try? -budget: 100 -# How should ATM sample hyperparameters from a given frozen set? -tuner: gp -# r_min is the number of random runs performed in each hyperpartition before -# allowing bayesian opt to select parameters. -r_min: 2 -# gridding determines whether or not sample selection will happen on a grid. -gridding: 0 -# How should ATM select a particular hyperpartition (frozen set) from the -# set of all hyperpartitions? -selector: bestk -# k is number that xxx_k methods use. It is similar to r_min, except it is -# called k_window and determines how much "history" ATM considers for certain -# frozen selection logics. -k_window: 5 -# Which field to use for judgment of performance -# options: f1, roc_auc, accuracy -metric: f1 -# Which data to use for computing judgment score -# cv = Cross_Validated performance on training data -# test = Performance on test data -score_target: cv diff --git a/config/test/btb/sql.yaml b/config/test/btb/sql.yaml deleted file mode 100644 index 501b213..0000000 --- a/config/test/btb/sql.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# SQL dialect -dialect: sqlite -# Name of the database -database: test/atm.db -# Username to gain access to the database -username: -# Password to gain access to the database -password: -# Host name of the device hosting the database -host: -# Port on host listening for database connections -port: -# Optional field for specifying login details -query: diff --git a/config/test/end_to_end/run.yaml b/config/test/end_to_end/run.yaml deleted file mode 100644 index 309194d..0000000 --- a/config/test/end_to_end/run.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# this will be overridden by the test script -train_path: -test_path: -data_description: -label_column: class - -# use every method we have -methods: - - logreg - - svm - - sgd - - dt - - et - - rf - - gnb - - mnb - - bnb - - gp - - pa - - knn - - mlp -# directory to store trained models; will be created if it doesn't exist -models_dir: models/ -# priority (higher number is more important) -priority: 1 -# Should there be a classifier or walltime budget? -budget_type: classifier -# If budget_type is classifier, how many classifiers to try? -budget: 100 -# How should ATM sample hyperparameters from a given frozen set? -tuner: gp -# r_min is the number of random runs performed in each hyperpartition before -# allowing bayesian opt to select parameters. -r_min: 2 -# gridding determines whether or not sample selection will happen on a grid. -gridding: 0 -# How should ATM select a particular hyperpartition (frozen set) from the -# set of all hyperpartitions? -selector: bestk -# k is number that xxx_k methods use. It is similar to r_min, except it is -# called k_window and determines how much "history" ATM considers for certain -# frozen selection logics. -k_window: 5 -# Which field to use for judgment of performance -# options: f1, roc_auc, accuracy -metric: f1 -# Which data to use for computing judgment score -# cv = Cross_Validated performance on training data -# test = Performance on test data -score_target: cv diff --git a/config/test/end_to_end/sql.yaml b/config/test/end_to_end/sql.yaml deleted file mode 100644 index 501b213..0000000 --- a/config/test/end_to_end/sql.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# SQL dialect -dialect: sqlite -# Name of the database -database: test/atm.db -# Username to gain access to the database -username: -# Password to gain access to the database -password: -# Host name of the device hosting the database -host: -# Port on host listening for database connections -port: -# Optional field for specifying login details -query: diff --git a/config/test/method/run.yaml b/config/test/method/run.yaml deleted file mode 100644 index e803c97..0000000 --- a/config/test/method/run.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# this will be overridden by the test script -train_path: -test_path: -data_description: -label_column: class - -# methods have to be specified by the test script -methods: -# directory to store trained models; will be created if it doesn't exist -models_dir: models/ -# priority (higher number is more important) -priority: 1 -# Should there be a classifier or walltime budget? -budget_type: classifier -# If budget_type is classifier, how many classifiers to try? -budget: 1 -# How should ATM sample hyperparameters from a given frozen set? -tuner: gp -# r_min is the number of random runs performed in each hyperpartition before -# allowing bayesian opt to select parameters. -r_min: 2 -# gridding determines whether or not sample selection will happen on a grid. -gridding: 0 -# How should ATM select a particular hyperpartition (frozen set) from the -# set of all hyperpartitions? -selector: bestk -# k is number that xxx_k tuners use. It is similar to r_min, except it is -# called k_window and determines how much "history" ATM considers for certain -# frozen selection logics. -k_window: 5 -# Which field to use for judgment of performance -# options: f1, roc_auc, accuracy -metric: f1 -# Which data to use for computing judgment score -# cv = Cross_Validated performance on training data -# test = Performance on test data -score_target: cv diff --git a/config/test/method/sql.yaml b/config/test/method/sql.yaml deleted file mode 100644 index 501b213..0000000 --- a/config/test/method/sql.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# SQL dialect -dialect: sqlite -# Name of the database -database: test/atm.db -# Username to gain access to the database -username: -# Password to gain access to the database -password: -# Host name of the device hosting the database -host: -# Port on host listening for database connections -port: -# Optional field for specifying login details -query: diff --git a/test/scripts/end_to_end_test.py b/test/scripts/end_to_end_test.py index 5fe3949..9063b71 100644 --- a/test/scripts/end_to_end_test.py +++ b/test/scripts/end_to_end_test.py @@ -15,10 +15,10 @@ from utilities import * -CONF_DIR = os.path.join(PROJECT_ROOT, 'config/test/end_to_end/') +CONF_DIR = os.path.join(PROJECT_ROOT, 'config/test/') DATA_DIR = os.path.join(PROJECT_ROOT, 'data/test/') -RUN_CONFIG = join(CONF_DIR, 'run.yaml') -SQL_CONFIG = join(CONF_DIR, 'sql.yaml') +RUN_CONFIG = join(CONF_DIR, 'run-all.yaml') +SQL_CONFIG = join(CONF_DIR, 'sql-sqlite.yaml') DATASETS_MAX_MIN = [ 'wholesale-customers_1.csv', diff --git a/test/scripts/evaluate_btb.py b/test/scripts/evaluate_btb.py index 8ac3c7f..97f78a7 100644 --- a/test/scripts/evaluate_btb.py +++ b/test/scripts/evaluate_btb.py @@ -12,9 +12,9 @@ from utilities import * -CONF_DIR = os.path.join(PROJECT_ROOT, 'config/test/btb/') -RUN_CONFIG = join(CONF_DIR, 'run.yaml') -SQL_CONFIG = join(CONF_DIR, 'sql.yaml') +CONF_DIR = os.path.join(PROJECT_ROOT, 'config/test/') +RUN_CONFIG = join(CONF_DIR, 'run-default.yaml') +SQL_CONFIG = join(CONF_DIR, 'sql-sqlite.yaml') DATASETS_MAX_FIRST = [ 'collins_1.csv', diff --git a/test/scripts/method_test.py b/test/scripts/method_test.py index b924d08..eca2fab 100644 --- a/test/scripts/method_test.py +++ b/test/scripts/method_test.py @@ -15,10 +15,10 @@ from utilities import * -CONF_DIR = os.path.join(PROJECT_ROOT, 'config/test/method/') +CONF_DIR = os.path.join(PROJECT_ROOT, 'config/test/') DATA_DIR = os.path.join(PROJECT_ROOT, 'data/test/') -RUN_CONFIG = join(CONF_DIR, 'run.yaml') -SQL_CONFIG = join(CONF_DIR, 'sql.yaml') +RUN_CONFIG = join(CONF_DIR, 'run-default.yaml') +SQL_CONFIG = join(CONF_DIR, 'sql-sqlite.yaml') DATASETS = [ 'iris.data.csv', 'pollution_1.csv', From 11a02675f0847f9d6eef5aeda860f7dd4210d478 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 13:55:24 -0500 Subject: [PATCH 09/36] move test config files around --- config/test/run-all.yaml | 50 ++++++++++++++++++++++++++++++++++++ config/test/run-default.yaml | 40 +++++++++++++++++++++++++++++ config/test/sql-mysql.yaml | 14 ++++++++++ config/test/sql-sqlite.yaml | 14 ++++++++++ 4 files changed, 118 insertions(+) create mode 100644 config/test/run-all.yaml create mode 100644 config/test/run-default.yaml create mode 100644 config/test/sql-mysql.yaml create mode 100644 config/test/sql-sqlite.yaml diff --git a/config/test/run-all.yaml b/config/test/run-all.yaml new file mode 100644 index 0000000..309194d --- /dev/null +++ b/config/test/run-all.yaml @@ -0,0 +1,50 @@ +# this will be overridden by the test script +train_path: +test_path: +data_description: +label_column: class + +# use every method we have +methods: + - logreg + - svm + - sgd + - dt + - et + - rf + - gnb + - mnb + - bnb + - gp + - pa + - knn + - mlp +# directory to store trained models; will be created if it doesn't exist +models_dir: models/ +# priority (higher number is more important) +priority: 1 +# Should there be a classifier or walltime budget? +budget_type: classifier +# If budget_type is classifier, how many classifiers to try? +budget: 100 +# How should ATM sample hyperparameters from a given frozen set? +tuner: gp +# r_min is the number of random runs performed in each hyperpartition before +# allowing bayesian opt to select parameters. +r_min: 2 +# gridding determines whether or not sample selection will happen on a grid. +gridding: 0 +# How should ATM select a particular hyperpartition (frozen set) from the +# set of all hyperpartitions? +selector: bestk +# k is number that xxx_k methods use. It is similar to r_min, except it is +# called k_window and determines how much "history" ATM considers for certain +# frozen selection logics. +k_window: 5 +# Which field to use for judgment of performance +# options: f1, roc_auc, accuracy +metric: f1 +# Which data to use for computing judgment score +# cv = Cross_Validated performance on training data +# test = Performance on test data +score_target: cv diff --git a/config/test/run-default.yaml b/config/test/run-default.yaml new file mode 100644 index 0000000..dddf5b6 --- /dev/null +++ b/config/test/run-default.yaml @@ -0,0 +1,40 @@ +# this will be overridden by the test script +train_path: +test_path: +data_description: +label_column: class + +# use every algorithm we have +methods: + - logreg + - dt + - knn +# directory to store trained models; will be created if it doesn't exist +models_dir: models/ +# priority (higher number is more important) +priority: 1 +# Should there be a classifier or walltime budget? +budget_type: classifier +# If budget_type is classifier, how many classifiers to try? +budget: 100 +# How should ATM sample hyperparameters from a given frozen set? +tuner: gp +# r_min is the number of random runs performed in each hyperpartition before +# allowing bayesian opt to select parameters. +r_min: 2 +# gridding determines whether or not sample selection will happen on a grid. +gridding: 0 +# How should ATM select a particular hyperpartition (frozen set) from the +# set of all hyperpartitions? +selector: bestk +# k is number that xxx_k methods use. It is similar to r_min, except it is +# called k_window and determines how much "history" ATM considers for certain +# frozen selection logics. +k_window: 5 +# Which field to use for judgment of performance +# options: f1, roc_auc, accuracy +metric: f1 +# Which data to use for computing judgment score +# cv = Cross_Validated performance on training data +# test = Performance on test data +score_target: cv diff --git a/config/test/sql-mysql.yaml b/config/test/sql-mysql.yaml new file mode 100644 index 0000000..ded7a92 --- /dev/null +++ b/config/test/sql-mysql.yaml @@ -0,0 +1,14 @@ +# SQL dialect +dialect: mysql +# Name of the database +database: atm +# Username to gain access to the database +username: ubuntu +# Password to gain access to the database +password: ubuntu +# Host name of the device hosting the database +host: 127.0.0.1 +# Port on host listening for database connections +port: 3306 +# Optional field for specifying login details +query: diff --git a/config/test/sql-sqlite.yaml b/config/test/sql-sqlite.yaml new file mode 100644 index 0000000..501b213 --- /dev/null +++ b/config/test/sql-sqlite.yaml @@ -0,0 +1,14 @@ +# SQL dialect +dialect: sqlite +# Name of the database +database: test/atm.db +# Username to gain access to the database +username: +# Password to gain access to the database +password: +# Host name of the device hosting the database +host: +# Port on host listening for database connections +port: +# Optional field for specifying login details +query: From 25f345cc18ac3bba80f83b99e7f2b8434b9266ea Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 14:10:54 -0500 Subject: [PATCH 10/36] add root password --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8c0c975..5f40ec5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -12,6 +12,7 @@ jobs: - image: mysql:5.7 environment: MYSQL_USER: ubuntu + MYSQL_ROOT_PASSWORD: ubuntu MYSQL_DATABASE: atm steps: - checkout From ca7e6214555c9899b0b2c93b937bab3c472be679 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 26 Jan 2018 17:16:19 -0500 Subject: [PATCH 11/36] change file names to work with tox dockerfile; add install-prereqs.sh --- install-prereqs.sh | 2 ++ dev-requirements.txt => requirements-dev.txt | 2 +- test-requirements.txt => requirements-test.txt | 0 requirements.txt | 2 +- tox.ini | 2 +- 5 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 install-prereqs.sh rename dev-requirements.txt => requirements-dev.txt (85%) rename test-requirements.txt => requirements-test.txt (100%) diff --git a/install-prereqs.sh b/install-prereqs.sh new file mode 100644 index 0000000..316609b --- /dev/null +++ b/install-prereqs.sh @@ -0,0 +1,2 @@ +# This will be copied into the tox docker build and run during setup. +sudo apt-get install mysql-client libmysqlclient-dev diff --git a/dev-requirements.txt b/requirements-dev.txt similarity index 85% rename from dev-requirements.txt rename to requirements-dev.txt index 030a23d..089fd10 100644 --- a/dev-requirements.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ --r test-requirements.txt +-r requirements-test.txt Sphinx==1.6.5 sphinx-rtd-theme==0.2.4 diff --git a/test-requirements.txt b/requirements-test.txt similarity index 100% rename from test-requirements.txt rename to requirements-test.txt diff --git a/requirements.txt b/requirements.txt index ceb49a6..1a64bc3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ pandas==0.20.3 scikit-learn==0.18.2 scipy==0.19.1 sklearn-pandas==1.5.0 -mysql-python==1.2.5 +mysqlclient pyyaml==3.12 joblib==0.11 -e git+https://github.com/hdi-project/btb.git#egg=btb diff --git a/tox.ini b/tox.ini index e12007e..24d1e1c 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ envlist = clean,py27 [testenv] commands= pytest --cov=atm -deps= -rtest-requirements.txt +deps= -rrequirements-test.txt [testenv:clean] commands= From daaafdef1fe4ff0641a7fc5409d3b701f4465950 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Mon, 29 Jan 2018 15:02:23 -0500 Subject: [PATCH 12/36] move to relative imports; fix requirements path in Makefile --- Makefile | 2 +- atm/config.py | 3 ++- atm/constants.py | 3 ++- atm/database.py | 6 +++--- atm/enter_data.py | 14 +++++++------- atm/method.py | 3 ++- atm/metrics.py | 3 ++- atm/model.py | 10 +++++----- atm/utilities.py | 4 ++-- atm/worker.py | 12 ++++++------ install-prereqs.sh | 3 ++- requirements.txt | 3 +-- setup.py | 3 ++- 13 files changed, 37 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index b747586..793b7da 100644 --- a/Makefile +++ b/Makefile @@ -14,5 +14,5 @@ test: lint installdeps: pip install --upgrade pip pip install -e . - pip install -r dev-requirements.txt + pip install -r requirements-dev.txt diff --git a/atm/config.py b/atm/config.py index dcf71b0..1366417 100644 --- a/atm/config.py +++ b/atm/config.py @@ -1,10 +1,11 @@ +from __future__ import absolute_import import os import re from argparse import ArgumentError, ArgumentTypeError, RawTextHelpFormatter import yaml -from atm.constants import * +from .constants import * class Config(object): diff --git a/atm/constants.py b/atm/constants.py index c1aced9..f0c8ecb 100644 --- a/atm/constants.py +++ b/atm/constants.py @@ -1,6 +1,7 @@ +from __future__ import absolute_import import os -from atm import PROJECT_ROOT +from . import PROJECT_ROOT from btb.selection import Uniform as UniformSelector from btb.selection import (UCB1, BestKReward, BestKVelocity, diff --git a/atm/database.py b/atm/database.py index 50e97a0..7d225e2 100644 --- a/atm/database.py +++ b/atm/database.py @@ -1,4 +1,4 @@ -from __future__ import print_function +from __future__ import absolute_import, print_function from datetime import datetime from operator import attrgetter @@ -9,8 +9,8 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship, sessionmaker -from atm.constants import * -from atm.utilities import * +from .constants import * +from .utilities import * MAX_HYPERPARTITION_ERRORS = 3 diff --git a/atm/enter_data.py b/atm/enter_data.py index c67b38e..7410399 100755 --- a/atm/enter_data.py +++ b/atm/enter_data.py @@ -1,16 +1,16 @@ -from __future__ import print_function +from __future__ import absolute_import, print_function import argparse import os import warnings from datetime import datetime, timedelta -from atm.config import * -from atm.constants import * -from atm.database import Database -from atm.encoder import MetaData -from atm.method import Method -from atm.utilities import download_data +from .config import * +from .constants import * +from .database import Database +from .encoder import MetaData +from .method import Method +from .utilities import download_data warnings.filterwarnings("ignore") diff --git a/atm/method.py b/atm/method.py index 0acb935..e703429 100644 --- a/atm/method.py +++ b/atm/method.py @@ -1,9 +1,10 @@ +from __future__ import absolute_import import json from builtins import str as newstr from builtins import object from os.path import join -from atm.constants import METHOD_PATH, METHODS_MAP +from .constants import METHOD_PATH, METHODS_MAP import btb diff --git a/atm/metrics.py b/atm/metrics.py index 3b6c134..b0b3d7e 100644 --- a/atm/metrics.py +++ b/atm/metrics.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import numpy as np import pandas as pd from sklearn.metrics import (accuracy_score, average_precision_score, @@ -5,7 +6,7 @@ precision_recall_curve, roc_auc_score, roc_curve) from sklearn.model_selection import StratifiedKFold -from atm.constants import * +from .constants import * def rank_n_accuracy(y_true, y_prob_mat, n=0.33): diff --git a/atm/model.py b/atm/model.py index 2c09b13..37e1729 100644 --- a/atm/model.py +++ b/atm/model.py @@ -3,7 +3,7 @@ :synopsis: Model around classification method. """ -from __future__ import print_function +from __future__ import absolute_import, print_function import re import time @@ -20,10 +20,10 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler, StandardScaler -from atm.constants import * -from atm.encoder import DataEncoder, MetaData -from atm.method import Method -from atm.metrics import cross_validate_pipeline, test_pipeline +from .constants import * +from .encoder import DataEncoder, MetaData +from .method import Method +from .metrics import cross_validate_pipeline, test_pipeline class Model(object): diff --git a/atm/utilities.py b/atm/utilities.py index 386702e..ee1f3f7 100644 --- a/atm/utilities.py +++ b/atm/utilities.py @@ -1,4 +1,4 @@ -from __future__ import print_function +from __future__ import absolute_import, print_function import base64 import hashlib @@ -11,7 +11,7 @@ import numpy as np from boto.s3.connection import Key, S3Connection -from atm.constants import * +from .constants import * from btb import ParamTypes diff --git a/atm/worker.py b/atm/worker.py index 53d2be9..d2ed205 100755 --- a/atm/worker.py +++ b/atm/worker.py @@ -1,5 +1,5 @@ #!/usr/bin/python2.7 -from __future__ import print_function +from __future__ import absolute_import, print_function import argparse import datetime @@ -17,11 +17,11 @@ from boto.s3.connection import Key as S3Key from boto.s3.connection import S3Connection -from atm.config import * -from atm.constants import * -from atm.database import ClassifierStatus, Database, db_session -from atm.model import Model -from atm.utilities import * +from .config import * +from .constants import * +from .database import ClassifierStatus, Database, db_session +from .model import Model +from .utilities import * # shhh warnings.filterwarnings('ignore') diff --git a/install-prereqs.sh b/install-prereqs.sh index 316609b..01b2793 100644 --- a/install-prereqs.sh +++ b/install-prereqs.sh @@ -1,2 +1,3 @@ # This will be copied into the tox docker build and run during setup. -sudo apt-get install mysql-client libmysqlclient-dev +apt-get -qq update +apt-get -qq -y install mysql-client libmysqlclient-dev diff --git a/requirements.txt b/requirements.txt index 1a64bc3..779f0fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,8 +5,7 @@ pandas==0.20.3 scikit-learn==0.18.2 scipy==0.19.1 sklearn-pandas==1.5.0 -mysqlclient +mysqlclient>=1.2 pyyaml==3.12 joblib==0.11 -e git+https://github.com/hdi-project/btb.git#egg=btb --e . diff --git a/setup.py b/setup.py index 3c01dc8..07ab0a3 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ 'scikit-learn>=0.18', 'scipy>=0.19', 'sklearn-pandas>=1.5', - 'mysql-python>=1.2', + 'mysqlclient>=1.2', 'pyyaml>=3.12', 'joblib>=0.11', 'future>=0.16', @@ -92,6 +92,7 @@ 'mock>=2', 'pytest-xdist>=1.20', 'pytest-runner>=3', + 'pytest-cov>=2.5', ] ) From 936b22cecf2c2db5cdc2d2a08bbbf8700b7043e3 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Mon, 29 Jan 2018 15:11:41 -0500 Subject: [PATCH 13/36] add apt-get install to config.yml --- .circleci/config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5f40ec5..8ee5670 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,6 +16,8 @@ jobs: MYSQL_DATABASE: atm steps: - checkout + - run: apt-get -qq update + - run: apt-get -qq -y install mysql-client libmysqlclient-dev - run: pyenv local 2.7.13 # 3.5.2 3.6.0 - run: make installdeps - run: make lint && tox && codecov From ea3751e31f53642e6e3885f85333e314f4298ede Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Mon, 29 Jan 2018 15:14:39 -0500 Subject: [PATCH 14/36] install git in config.yml --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8ee5670..9568fd9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -17,7 +17,7 @@ jobs: steps: - checkout - run: apt-get -qq update - - run: apt-get -qq -y install mysql-client libmysqlclient-dev + - run: apt-get -qq -y install git mysql-client libmysqlclient-dev - run: pyenv local 2.7.13 # 3.5.2 3.6.0 - run: make installdeps - run: make lint && tox && codecov From d9e69d8e9656c551da0faa87d2076e5bfc853e39 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Mon, 29 Jan 2018 15:17:24 -0500 Subject: [PATCH 15/36] isort --- atm/config.py | 1 + atm/constants.py | 1 + atm/method.py | 1 + atm/metrics.py | 1 + 4 files changed, 4 insertions(+) diff --git a/atm/config.py b/atm/config.py index 1366417..24c3ee7 100644 --- a/atm/config.py +++ b/atm/config.py @@ -1,4 +1,5 @@ from __future__ import absolute_import + import os import re from argparse import ArgumentError, ArgumentTypeError, RawTextHelpFormatter diff --git a/atm/constants.py b/atm/constants.py index f0c8ecb..f3bb7f7 100644 --- a/atm/constants.py +++ b/atm/constants.py @@ -1,4 +1,5 @@ from __future__ import absolute_import + import os from . import PROJECT_ROOT diff --git a/atm/method.py b/atm/method.py index e703429..c186cf9 100644 --- a/atm/method.py +++ b/atm/method.py @@ -1,4 +1,5 @@ from __future__ import absolute_import + import json from builtins import str as newstr from builtins import object diff --git a/atm/metrics.py b/atm/metrics.py index b0b3d7e..ea5be66 100644 --- a/atm/metrics.py +++ b/atm/metrics.py @@ -1,4 +1,5 @@ from __future__ import absolute_import + import numpy as np import pandas as pd from sklearn.metrics import (accuracy_score, average_precision_score, From b32f9f6791551a1426ff1ce2f301ac382ffebc2e Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Mon, 29 Jan 2018 15:58:34 -0500 Subject: [PATCH 16/36] move config and data directories to atm/ --- README.md | 18 +- atm/__init__.py | 2 +- .../config/templates/aws.yaml | 0 .../config/templates/run.yaml | 0 .../config/templates/sql.yaml | 0 {config => atm/config}/test/run-all.yaml | 0 .../config/test/run-basic.yaml | 0 {config => atm/config}/test/sql-mysql.yaml | 0 {config => atm/config}/test/sql-sqlite.yaml | 0 {data => atm/data}/test/iris.data.csv | 0 atm/data/test/pitchfork_genres.csv | 251 ++++++++++++++++++ {data => atm/data}/test/pollution_1.csv | 0 atm/tests/__init__.py | 0 atm/tests/integration_tests/__init__.py | 0 atm/tests/unit_tests/__init__.py | 0 atm/tests/unit_tests/test_enter_data.py | 126 +++++++++ atm/tests/unit_tests/test_method.py | 37 +++ atm/tests/unit_tests/test_worker.py | 14 + install-prereqs.sh | 3 - test/tests/unit_tests/test_enter_data.py | 2 +- 20 files changed, 439 insertions(+), 14 deletions(-) rename config/templates/aws_config.yaml => atm/config/templates/aws.yaml (100%) rename config/templates/run_config.yaml => atm/config/templates/run.yaml (100%) rename config/templates/sql_config.yaml => atm/config/templates/sql.yaml (100%) rename {config => atm/config}/test/run-all.yaml (100%) rename config/test/run-default.yaml => atm/config/test/run-basic.yaml (100%) rename {config => atm/config}/test/sql-mysql.yaml (100%) rename {config => atm/config}/test/sql-sqlite.yaml (100%) rename {data => atm/data}/test/iris.data.csv (100%) create mode 100644 atm/data/test/pitchfork_genres.csv rename {data => atm/data}/test/pollution_1.csv (100%) create mode 100644 atm/tests/__init__.py create mode 100644 atm/tests/integration_tests/__init__.py create mode 100644 atm/tests/unit_tests/__init__.py create mode 100644 atm/tests/unit_tests/test_enter_data.py create mode 100644 atm/tests/unit_tests/test_method.py create mode 100644 atm/tests/unit_tests/test_worker.py delete mode 100644 install-prereqs.sh diff --git a/README.md b/README.md index 53d6041..e6db6f1 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ Below we will give a quick tutorial of how to run atm on your desktop. We will u ``` $ python atm/enter_data.py ``` - This command will create a ``datarun``. In ATM, a *datarun* is a single logical machine learning task. If you run the above command without any arguments, it will use the default settings found in the `config/templates/\*_config.yaml` files to create a new SQLite3 database at `./atm.db`, create a new `dataset` instance which refers to the data above, and create a `datarun` instance which points to that dataset. More about what is stored in this database and what is it used for can be found [here](https://cyphe.rs/static/atm.pdf). + This command will create a ``datarun``. In ATM, a *datarun* is a single logical machine learning task. If you run the above command without any arguments, it will use the default settings found in the `config/templates/\*.yaml` files to create a new SQLite3 database at `./atm.db`, create a new `dataset` instance which refers to the data above, and create a `datarun` instance which points to that dataset. More about what is stored in this database and what is it used for can be found [here](https://cyphe.rs/static/atm.pdf). The command should produce a lot of output, the end of which looks something like this: @@ -147,9 +147,9 @@ That means there are two ways to pass configuration to the command. $ vim config/*.yaml ``` - `run_config.yaml` contains all the settings for a single Dataset and Datarun. Specify the `train_path` to point to your own dataset. + `run.yaml` contains all the settings for a single Dataset and Datarun. Specify the `train_path` to point to your own dataset. - `sql_config.yaml` contains the settings for the ModelHub SQL database. The default configuration will connect to (and create if necessary) a SQLite database at `./atm.db` relative to the directory from which `enter_data.py` is run. If you are using a MySQL database, you will need to change the file to something like this: + `sql.yaml` contains the settings for the ModelHub SQL database. The default configuration will connect to (and create if necessary) a SQLite database at `./atm.db` relative to the directory from which `enter_data.py` is run. If you are using a MySQL database, you will need to change the file to something like this: ``` dialect: mysql database: atm @@ -160,13 +160,13 @@ That means there are two ways to pass configuration to the command. query: ``` - `aws_config.yaml` should contain the settings for running ATM in the cloud. This is not necessary for local operation. + `aws.yaml` should contain the settings for running ATM in the cloud. This is not necessary for local operation. Once your YAML files have been updated, run the datarun creation script and pass it the paths to your new config files: ``` - $ python atm/enter_data.py --sql-config config/sql_config.yaml \ - > --aws-config config/aws_config.yaml \ - > --run-config config/run_config.yaml + $ python atm/enter_data.py --sql-config config/sql.yaml \ + > --aws-config config/aws.yaml \ + > --run-config config/run.yaml ``` 2. **Using command line arguments** @@ -183,8 +183,8 @@ That means there are two ways to pass configuration to the command. Once you've created your custom datarun, start a worker, specifying your config files and the datarun(s) you'd like to compute on. ``` -$ python atm/worker.py --sql-config config/sql_config.yaml \ -> --aws-config config/aws_config.yaml --dataruns 1 +$ python atm/worker.py --sql-config config/sql.yaml \ +> --aws-config config/aws.yaml --dataruns 1 ``` It's important that the SQL configuration used by the worker matches the configuration you passed to `enter_data.py` -- otherwise, the worker will be looking in the wrong ModelHub database for its datarun! diff --git a/atm/__init__.py b/atm/__init__.py index 7d9f7a4..dc4db97 100644 --- a/atm/__init__.py +++ b/atm/__init__.py @@ -6,7 +6,7 @@ # Get the path of the project root, so that the rest of the project can # reference files relative to there. -PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) +PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) __all__ = ['config', 'constants', 'database', 'enter_data', 'method', 'metrics', 'model', 'utilities', 'worker'] diff --git a/config/templates/aws_config.yaml b/atm/config/templates/aws.yaml similarity index 100% rename from config/templates/aws_config.yaml rename to atm/config/templates/aws.yaml diff --git a/config/templates/run_config.yaml b/atm/config/templates/run.yaml similarity index 100% rename from config/templates/run_config.yaml rename to atm/config/templates/run.yaml diff --git a/config/templates/sql_config.yaml b/atm/config/templates/sql.yaml similarity index 100% rename from config/templates/sql_config.yaml rename to atm/config/templates/sql.yaml diff --git a/config/test/run-all.yaml b/atm/config/test/run-all.yaml similarity index 100% rename from config/test/run-all.yaml rename to atm/config/test/run-all.yaml diff --git a/config/test/run-default.yaml b/atm/config/test/run-basic.yaml similarity index 100% rename from config/test/run-default.yaml rename to atm/config/test/run-basic.yaml diff --git a/config/test/sql-mysql.yaml b/atm/config/test/sql-mysql.yaml similarity index 100% rename from config/test/sql-mysql.yaml rename to atm/config/test/sql-mysql.yaml diff --git a/config/test/sql-sqlite.yaml b/atm/config/test/sql-sqlite.yaml similarity index 100% rename from config/test/sql-sqlite.yaml rename to atm/config/test/sql-sqlite.yaml diff --git a/data/test/iris.data.csv b/atm/data/test/iris.data.csv similarity index 100% rename from data/test/iris.data.csv rename to atm/data/test/iris.data.csv diff --git a/atm/data/test/pitchfork_genres.csv b/atm/data/test/pitchfork_genres.csv new file mode 100644 index 0000000..cba84cd --- /dev/null +++ b/atm/data/test/pitchfork_genres.csv @@ -0,0 +1,251 @@ +,band rock punk guitar bands,rap hop hip rapper beats,pop love record voice life,metal black death doom riffs,tracks track sounds electronic work,class +2487,0.00222691041106,0.0,0.0311393409098,0.0,0.0167012188404,folk/country +5217,0.0,0.0145015332088,0.0135657633129,0.00130857497881,0.0239426664866,pop/r&b +5114,0.0165899455295,0.00375316125879,0.00949364323633,0.0,0.0190751989018,electronic +11506,0.0304395572158,0.000884456955076,0.0119797572671,0.0154910669617,0.0,rock +1720,0.0,0.00658630500668,0.0344361380317,0.0,0.00809478192679,pop/r&b +6214,0.0132869850736,0.00099360024374,0.0142193039363,0.0,0.0131884647071,rock +5144,0.0,0.0,0.00974429634673,0.0,0.0287883860915,experimental +8470,0.0,0.0334195479454,0.0,0.0,0.0314491109279,electronic +8218,0.00039425145129,0.000424228249712,0.00890644282925,0.0,0.0410225853998,jazz +6878,0.00393746637535,0.0,0.0248284125041,0.00154860017817,0.0217176437028,rock +12163,0.0135528601078,0.0,0.00690668459428,0.0120665004566,0.0281901867229,global +5568,0.00175980665998,0.0567185120918,0.0154670210709,0.0,0.0,pop/r&b +7470,0.00474012998886,0.0,0.0161973267488,0.0,0.0215111381725,experimental +12114,0.00807993952185,0.0,0.0120099825865,0.0,0.0187239040438,jazz +9648,0.00905871261722,0.0443151046373,0.00652531447039,0.0,0.020854528234,jazz +164,0.00311349576044,0.00211444185093,0.0456161309439,0.0,0.0,pop/r&b +4587,0.0243117150914,0.00523134728807,0.0140093508855,0.0464679110778,0.0109323395527,metal +287,0.01953198151,0.00843153378563,0.014087339204,0.0,0.0204890334158,jazz +109,0.00196234460264,0.0,0.0118910696571,0.000799068477723,0.024784362582,folk/country +8679,0.00116647838639,0.00427929425869,0.0299315278518,0.0,0.0114045070168,rock +14016,0.0,0.0261026150574,0.0168725494194,0.0,0.0093837212047,metal +4578,0.0,0.00379909571296,0.0300893213258,0.0,0.020824609336,pop/r&b +14743,0.0,0.00245231460355,0.0192264112613,0.0,0.0226681277335,jazz +1238,0.0150831993339,0.0,0.0114886527227,0.0671930705956,0.00187592523292,metal +7517,0.0125385320209,0.0,0.00488395478355,0.102521604056,0.0,metal +14435,0.00210412624281,0.00927315184445,0.0282481104776,0.00291528056596,0.0106531994634,rock +9107,0.0414410013082,0.0,0.00595258033217,0.0250697162624,0.00830509371021,global +1088,0.00443243258987,0.00252594578836,0.0201317306257,0.0,0.00694073802952,global +3984,0.0,0.063232926471,0.0119595950158,0.0,0.00640162664453,rap +8713,0.0154196641547,0.0,0.00882126606758,0.0,0.0279673388654,folk/country +3803,0.00318042318233,0.0322756477218,0.00721940858862,0.000716956412884,0.0238892499815,rap +10053,0.0,0.0,0.0199076667933,0.0,0.0217962315955,folk/country +5395,0.00121309269495,0.0115163261058,0.00542565775289,0.0125440939432,0.0377086006812,electronic +919,0.0119920461079,0.00212671228648,0.00596062697349,0.0,0.043453539212,jazz +2245,0.00976588471421,0.0,0.00810490283043,0.0165022271765,0.0252434914368,metal +12132,0.00210402791165,0.0563226267538,0.00473286267395,0.0,0.0101149870275,rap +14812,0.0295312568233,0.0,0.0335599878739,0.0,0.000661201403022,metal +2306,0.0,0.0051388730496,0.00143100738534,0.0,0.0400439200187,electronic +10263,0.0,0.0084127527011,0.0393785998755,0.00182039127487,0.0067159054577,rock +1147,0.0,0.0,0.0276284720191,0.0,0.0174551204393,pop/r&b +4794,0.00752984125585,0.00795942614968,0.021027994704,0.0176370488346,0.00954702414665,metal +8890,0.00477604438113,0.0,0.0204838911831,0.0,0.0166109924742,global +12828,0.00249352552279,0.0,0.0103728372942,0.0565896153733,0.0283061320528,metal +9062,0.00371860894614,0.00529796523741,0.0251213840691,0.000513596454586,0.00826756271073,folk/country +1194,0.0,0.0,0.0133202753564,0.0,0.0171443283864,global +15050,0.0,0.0320464743445,0.000322523805465,0.0,0.0318127097995,electronic +3435,0.00725712893137,0.00118791990083,0.00530212803229,0.0789363851814,0.00672124801582,metal +15506,0.0,0.00906255562614,0.0134339815781,0.0,0.0241859407161,pop/r&b +9675,0.0,0.000683090113102,0.00630341662444,0.0,0.0282640040447,folk/country +5596,0.0,0.0481216289347,0.00181048469505,0.0,0.0326697866587,rap +1174,0.00982546460554,0.0,0.0161081208474,0.0229533647121,0.0178986416259,metal +4368,0.0147385138566,0.0,0.0237395571123,0.0161474352123,0.0222135280019,metal +6817,0.0,0.0,0.0288701883228,0.0,0.00943452361721,folk/country +14611,0.00499549265376,0.004506029725,0.0241118242618,0.00193670174015,0.0145694283786,folk/country +10131,0.0,0.0,0.0170052023063,0.00802386688944,0.0272954415446,experimental +10010,0.0225515712147,0.00391277974834,0.0267809943153,0.0,0.0,rock +8150,0.00460218243812,0.071111035697,0.0141328558462,0.0047467837946,0.0,rap +15200,0.00735267777,0.0,0.0242236331591,0.0,0.0239975955789,rock +9732,0.0125809846149,0.0256109171183,0.00642976506731,0.0,0.0371699633796,rock +689,0.00922208689778,0.0,0.017850024807,0.0,0.0108947585309,folk/country +11933,0.0,0.058375330318,0.0125361982253,0.0,0.00195285243229,rap +12258,0.0,0.0,0.0116514673114,0.0,0.0342643759294,experimental +2559,0.0304036509926,0.0,0.0115843308246,0.039661330091,0.016536877266,metal +5155,0.0,0.000792931534253,0.0144392238774,0.0217323934213,0.0251234119422,rock +10784,0.0,0.00981873598295,0.0101187875156,0.0,0.0298392054084,global +2307,0.0,0.0227632096732,0.0133115658149,0.0,0.0134265372142,rap +12327,0.002155653793,0.0,0.00838807395363,0.0,0.0281573431005,electronic +13749,0.00547647856858,0.0,0.0258674619523,0.0,0.0175606398498,folk/country +9932,0.0189549751272,0.0,0.0285579321787,0.0,0.0114316166477,rock +7847,0.00635726706409,0.0204468687663,0.0179166014464,0.00196999809917,0.00841449976979,pop/r&b +1013,0.0110119435911,0.0255936171433,0.0181754316955,0.0,0.0112322761578,global +6451,0.0147719275983,0.0,0.0330142889409,0.0,0.0,rock +4601,0.0,0.00301267945071,0.0218263885009,0.00760382619047,0.0234863295446,global +322,0.0,0.00015019762677,0.00880672466497,0.0,0.0393764314746,experimental +6048,0.00942870897701,0.0,0.0100492219917,0.0,0.0200293334024,pop/r&b +11096,0.0,0.0,0.024645599607,0.0290240828512,0.0110265292439,folk/country +15256,0.0286675542632,0.00583311537234,0.0143265114886,3.85006597642e-05,0.0220840143024,experimental +12077,0.0103852205836,0.0,0.0250666756212,0.0,0.00476725553002,folk/country +14945,0.0025760481776,0.0,0.00660209484801,0.0,0.053927263596,electronic +9698,0.00743840407441,0.00930975253914,0.0274176266135,0.0363302584565,0.0,electronic +10289,0.0,0.00721519906644,0.00972489345528,0.0,0.0404197720801,experimental +4797,0.0,0.0624172119218,0.0135228787748,0.000293525085481,4.06035460304e-05,rap +731,0.00406837078579,0.0,0.00300900169612,0.0,0.035029103745,experimental +14331,0.0,0.0,0.0036027551449,0.0,0.0362926582735,experimental +8587,0.00249190117586,0.0,0.0123117517991,0.0,0.0345788800615,electronic +14491,0.019272064138,0.00681832544188,0.0240294679187,0.00135890449974,0.00542145301026,global +4000,0.00569341426988,0.0522104401007,0.0167062466329,0.0,0.0130024630786,rap +11617,0.0168810561879,0.0,0.0270386103275,0.0,0.0156076575068,folk/country +963,0.00450891439701,0.0,0.010646261719,0.0,0.0356081801149,experimental +6178,0.00722654512063,0.0517105731491,0.0,0.00279963725721,0.0279231916204,rap +10000,0.00525805234739,0.00383259042658,0.02043541268,0.0,0.0225170727546,rock +6181,0.0048298164434,0.00614067839952,0.012923820693,0.0,0.02137594865,electronic +3794,0.0223873396396,0.00187339101233,0.00278142778519,0.0587313714759,0.019860647591,metal +3060,0.000979118442433,0.0,0.0414225180602,0.0,0.0,folk/country +10062,0.062459104869,0.0,0.00504545810986,0.0,0.0,rock +11885,0.0221251347613,0.00513932388567,0.00663132496382,0.0,0.0171673905825,global +2643,0.0041351717842,0.0505798980815,0.00271043961827,0.0,0.0112280201119,rap +12931,0.0102648012912,0.00429879221001,0.0100481811097,0.0,0.0294819963405,jazz +1258,0.000772967816184,0.0,0.0171459512857,0.0379410903122,0.0188620993077,metal +8933,0.0217261981863,0.00303790668509,0.0103106355444,0.0,0.016750819282,global +12460,0.0023326531013,0.000643000298032,0.0381857442311,0.0,0.0116703348357,pop/r&b +2897,0.0239552139722,0.0,0.0126998084621,0.0209571126941,0.0175659829849,metal +4511,0.00973157996017,0.0,0.00331760795779,0.0,0.0409154558266,electronic +10267,0.0,0.08768706972,0.0101613305676,0.00129381171581,0.0,rap +1981,0.000751972780055,0.0,0.0,0.0,0.0470883020317,electronic +15383,0.0135071827402,0.00328634217975,0.0346459965696,0.0,0.000942383039833,rock +1819,0.0,0.00262737919403,0.0155257246891,0.0,0.0361762810497,pop/r&b +6572,0.0402042496199,0.0,0.00670501079898,0.0104933604789,0.0119886187125,global +15181,0.0393009210777,0.00499156208632,0.011798486004,0.0,0.0208055601573,jazz +8555,0.00352200014757,0.0,0.00657164654298,0.00450296821214,0.037210922891,electronic +15099,0.0,0.0,0.0,0.00162668182212,0.0418331252405,electronic +14413,0.00158802872053,0.0,0.0319253595281,0.0,0.0126857887585,pop/r&b +7261,0.0300300391583,0.0,0.0156322373064,0.0,0.0130822338798,global +8818,0.0,0.0,0.025779552934,0.0142980081657,0.00849955564651,folk/country +7585,0.0227912481666,0.00276126133923,0.00997966403862,0.00125055977291,0.0161792561491,jazz +6378,0.0,0.0695679508134,0.0105145950329,0.0,0.00305823900675,rap +8504,0.0354614860238,0.00332517722296,0.0129992919358,0.0,0.0,rock +1151,0.00375643734751,0.0,0.0270894876987,0.0,0.0157709311821,rock +3076,0.00616193322366,0.021484811922,0.0218678271713,0.0,0.00917036439952,rap +6204,0.0,0.0,0.0360009355776,0.0,0.0163926795543,folk/country +1905,0.0152788222624,0.00598003421132,0.0241200534998,0.00102744375419,0.0111316680322,rock +5950,0.0,0.00580943885014,0.0109175549724,0.0,0.0336463212943,global +13453,0.0,0.0,0.0338554982654,0.0,0.0159006690276,experimental +3259,0.0,0.0479645113095,0.00698957305321,0.0,0.0251704885066,rap +9412,0.00489246384929,0.0,0.0166243850329,0.0,0.0168112959639,jazz +6965,0.00414580531787,0.0,0.0166422750762,0.0,0.0162226722051,global +8479,0.000349869115439,0.0831909136569,0.0,0.00479175690923,0.0047856490324,rap +7880,0.0,0.0,0.040111304267,0.0,0.00486286492591,folk/country +15904,0.0,0.0,0.0317939708251,0.0,0.00480114963741,rock +1744,0.0,0.0082047727723,0.0409387290893,0.0,0.0,folk/country +174,0.0,0.0,0.00340704927625,0.0128065047662,0.0341518365382,experimental +380,0.0,0.00110663100334,0.0186157870285,5.12835353424e-05,0.0367709169067,electronic +5505,0.0,0.0,0.00904313009613,0.0052555274768,0.0337178641395,electronic +11745,0.0123274894169,0.0,0.0111475390483,0.00590134497093,0.0118189272198,metal +2714,0.00929645028776,0.0,0.019613490305,0.0,0.0110900899049,experimental +14612,0.00482124141487,0.00738169191402,0.0288375342038,0.0,0.0108449363187,pop/r&b +10863,0.0,0.0,0.0204893396122,0.0,0.0257612117471,electronic +8992,0.0155801763336,0.00206145227634,0.0144319685461,0.0,0.0172861577391,rock +101,0.0,0.0,0.0023705868668,0.0,0.0426077447334,experimental +5674,0.0,0.00967703514159,0.0130450763773,0.0,0.03032160348,jazz +5399,0.0,0.0,0.0,0.012214856149,0.0345674644478,experimental +7538,0.00278883017428,0.00244963066726,0.00880371650773,0.00054569832902,0.0302019451344,global +14161,0.0204525562395,0.0,0.0348932788412,0.0,0.0,rock +11057,0.00390333354475,0.0,0.034119755568,0.00124754340503,0.00156181356581,folk/country +5469,0.00678726712382,0.0,0.03084126811,0.0,0.0063904939127,folk/country +2681,0.0,0.0620299594387,0.0149878252382,0.00446271105601,0.00196948888342,rap +7511,0.0,0.0,0.0214746641096,0.0,0.025528251561,global +13434,0.0,0.0034055695238,0.00749146703302,0.0,0.0445426590443,electronic +2883,0.0114831421805,0.0,0.0215514723602,0.0,0.0328421379817,experimental +7650,0.00333666757303,0.00152109142169,0.0119066670419,0.0852402070157,0.0169539576585,metal +2752,0.0,0.0126075036612,0.00631992919733,0.0,0.0347914858686,electronic +12392,0.00593992955649,0.00508696401179,0.0147735370891,0.0,0.0192597658674,electronic +5201,0.0,0.0,0.0296621515837,0.0,0.0115360608512,pop/r&b +2642,0.0,0.0154341765409,0.0388346861162,0.0,0.00533733910949,rap +14917,0.0,0.000559671597505,0.0281083435511,0.0689350360677,0.00171713785688,metal +11395,0.0333084951116,0.00134846468162,0.0156665793767,0.0390092205231,0.00337593824299,metal +9041,0.0,0.0,0.0186272686989,0.0,0.0259483068014,electronic +2276,0.0,0.0,0.00990482044326,0.0,0.0255685853562,experimental +15624,0.0,0.00745421608323,0.0149067035133,0.0,0.0278221046586,jazz +7846,0.0071832260027,0.00880376958171,0.0173544695781,0.00105493625598,0.0204055210211,folk/country +12771,0.0,0.0024326773679,0.00818852492406,0.0,0.0352988789734,jazz +10835,0.0112278636296,0.0,0.00937636987855,0.006492092273,0.0370136499052,experimental +15202,0.0100130982825,0.0,0.0189380867266,0.00555919202684,0.0259096540733,folk/country +10300,0.000771245287737,0.0152033268665,0.0261881612441,0.0,0.0204973331765,electronic +7559,0.000572768005556,0.0,0.0225086941767,0.0155651315235,0.0168060793243,experimental +14006,0.00608599753647,0.0,0.0232778596015,0.000185464070442,0.0176155223884,rock +1018,0.0,0.0472027301812,0.033340746164,0.0,0.0,rap +10702,0.00881218596308,0.00205313538201,0.0254975485535,0.0,0.0152956189325,metal +5851,0.00584269093479,0.00033938264815,0.029676838217,0.00152433580411,0.0132720556196,folk/country +13263,0.000886084044975,0.0141086770683,0.0256671084509,0.00347800407595,0.0,jazz +3843,0.0163798603509,0.0,0.00416541935647,0.0187589641826,0.0226935903863,experimental +2646,0.00790923902736,0.0,0.0289217844147,0.0,0.0148110184557,folk/country +5607,0.0,0.0310898825797,0.0,0.0,0.0313638846437,rap +3031,0.0,0.0,0.0269912478742,0.0,0.0142175329107,folk/country +250,0.00272080266218,0.0,0.0407475771329,0.0,0.00292359074096,rock +4094,0.000209637459699,0.00187612644748,0.0114759576557,0.0,0.0333450911266,electronic +10143,0.0191943139745,0.0,0.0131051622463,0.00279555360467,0.0222906663131,folk/country +1621,0.0,0.0,0.0204013868455,0.00206613161636,0.0314086444885,global +5056,0.0,0.023640993728,0.0204997791439,0.0,0.0124265311246,electronic +10005,0.0109401008802,0.0,0.00202092395318,0.000346361306288,0.0365345225769,global +3827,0.0,0.0,0.013152361952,0.0,0.0358210240942,electronic +760,0.00350856518599,0.0,0.0350084352508,0.00206925951605,0.00396841439294,pop/r&b +652,0.0,0.0591581637941,0.01443168314,0.0,0.00543802939439,rap +8211,0.000796261953468,0.0100849446218,0.0293564021784,0.00220763753632,0.0103326049684,global +9556,0.0,0.00291564329576,0.0317824948915,0.0,0.0142798098719,global +6477,0.0,0.00225533517462,0.0145401469267,0.0,0.0224677556084,pop/r&b +15899,0.0,0.0139359991772,0.0154032199717,0.000738847254119,0.0227452172223,electronic +9637,0.00652972206252,0.0375401221471,0.00276119783767,0.0,0.0327283470869,rap +11789,0.028582005686,0.0,0.0155868149917,0.0,0.0,rock +2108,0.0219375762767,0.000649372456488,0.0104507464628,0.0518735431778,0.00436386730457,metal +996,0.0144236016551,0.0,0.00927490089536,0.0,0.0292680169724,experimental +6012,0.0,0.00491239703789,0.000344961745338,0.0,0.0508317254748,electronic +4763,0.000780986056719,0.0,0.0443055701463,0.0,0.00960014768362,folk/country +722,0.0,0.00270514579736,0.0324219238218,0.0,0.0,pop/r&b +5943,0.00916416355022,0.0,0.0182910064599,0.0,0.0262675035978,folk/country +2403,0.00785415142186,0.0,0.0210598706871,0.0,0.0312739125122,jazz +6334,0.0137227664007,0.0,0.000175777777184,0.011570946477,0.0435721763559,electronic +11907,0.0,0.00344763537863,0.0239534465568,0.0,0.00810234194653,electronic +9184,0.00315135472419,0.00341108235433,0.0228023205841,0.0,0.00708182707651,folk/country +4594,0.0326769997006,0.00346372471254,0.0144945566582,0.0616699955996,0.00260912378154,metal +8346,0.00673716542015,0.0,0.0186359918827,0.0,0.0166479180533,folk/country +2719,0.0246754742294,0.000959547017658,0.0112998416589,0.0687082697986,0.00545621511878,metal +4215,0.0053975616547,0.00748650733678,0.0160871859946,0.00272188343002,0.0264235805465,experimental +10630,0.0191614874431,0.0,0.0151852087323,0.0,0.0111802365569,folk/country +1379,0.0,0.0151720936012,0.0,0.0,0.035311354938,electronic +2095,0.000932758021661,0.0375816556025,0.0317789003664,0.0,0.0,rap +8060,0.0,0.0,0.00792621381606,0.0,0.0315363850849,experimental +14655,0.00374207497704,0.0,0.0158105214199,0.00105439353783,0.0372391434793,electronic +8508,0.0,0.0547663772309,0.0238390468214,0.0,0.0,pop/r&b +9247,0.021113957024,0.0,0.00804373658672,0.0,0.020438641175,global +12648,0.0,0.0,0.0164336816812,0.0,0.0212175284892,pop/r&b +1209,0.00283819927637,0.0177193291956,0.0227158036332,0.0,0.0031042419216,rap +3027,0.0240562017741,0.0,0.00535703257563,0.00202724315522,0.0217260581371,jazz +14318,0.0147837130005,0.0,0.013845409066,0.0,0.0172375610467,experimental +2249,0.0,0.0442764444525,0.00706138594771,0.0,0.00981827584906,rap +2308,0.0,0.0747931303144,0.00336332002359,0.0,0.00201016708881,rap +10296,0.00108921908273,0.0,0.00891636581941,0.0156017193988,0.0348398124611,metal +2912,0.0114400290492,0.0,0.00564711060527,0.0,0.0347419841882,experimental +1768,0.0187952954321,0.0015448225855,0.0175491771293,0.00539212846246,0.0237635941502,metal +10146,0.0278698923163,0.0,0.00852362301142,0.0,0.00668632532485,rock +2965,0.000705170110217,0.0,0.0423768089241,0.00163642023693,0.000514293547908,folk/country +10350,0.0,0.0468259416155,0.0229114596435,0.0,0.00398064279243,rap +2952,0.0215590963643,0.001619070653,0.0180272082171,0.0,0.0083132062175,experimental +9852,0.00605345263244,0.0322592315902,0.0113302802731,0.0,0.0138615113768,rap +15560,0.000540993291304,0.0176110049303,0.023526950631,0.0,0.0128341437078,jazz +5645,0.0200503895919,0.0,0.0227209598486,4.72406501278e-05,0.00279936674109,pop/r&b +14184,0.0,0.042744639529,0.0,0.00457260378217,0.0286936600924,rap +4031,0.0,0.0667912063471,0.0117837411137,0.0,0.0,rap +11557,0.00762689124846,0.0118264547076,0.0195588036145,0.0,0.0223864096255,global +6747,0.0311279746095,0.0,0.0,0.0,0.0257853292634,jazz +13802,0.010616566921,0.0119253693906,0.00709666083694,0.023885248226,0.0337195661573,jazz +2399,0.0,0.0341353061704,0.0279328505473,0.00104897910668,0.0,rap +7918,0.025000169704,0.0,0.0142728306786,0.0,0.0163312715608,folk/country +10780,0.0138200654193,0.0,0.00502994965812,0.0432127249695,0.022329035568,metal +8232,0.00982131030283,0.00298444811864,0.0169344650055,0.00379766092104,0.0069511089062,rock +1637,0.0,0.0,0.0,0.0,0.0410690194555,experimental +28,0.0369969645841,0.0067826505917,0.0233583862529,0.0326556113299,0.00099325567376,metal +106,0.0319426343035,0.00222432999452,0.0268273943495,0.00400234110937,0.00300840890217,metal +10111,0.0117073899775,0.0,0.0133688844411,0.0,0.0325990193458,electronic +11608,0.0,0.00555875439745,0.0295275283663,0.00141150534832,0.0144673711731,global +8280,0.0020209639421,0.0,0.0292243146337,0.0,0.00840211376744,rock +529,0.0,0.0167210267331,0.0389765874512,0.0157648125398,0.0,pop/r&b +3186,0.0,0.0212934044762,0.0318814376493,0.0,0.0127066077765,global +15587,0.0122598060346,0.0185268543656,0.012296788878,0.0,0.00824517262215,electronic +11836,0.00481490960616,0.00125473799562,0.0280613055314,0.00496499549164,0.00958384746874,jazz +10226,0.00727123170678,0.0,0.019524893368,0.00769195501215,0.0122673521715,experimental +4345,0.0107687157084,4.32837165369e-05,0.0311928305671,0.0,0.0112029446975,global +3278,0.0,0.0152722495332,0.0445861424371,0.0,0.00141519267517,pop/r&b +8415,0.0,0.0,0.0323890017921,0.00191823471227,0.0121558712403,experimental +6855,0.0142122860056,0.0,0.0127570559367,0.0,0.0269246944141,jazz diff --git a/data/test/pollution_1.csv b/atm/data/test/pollution_1.csv similarity index 100% rename from data/test/pollution_1.csv rename to atm/data/test/pollution_1.csv diff --git a/atm/tests/__init__.py b/atm/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/atm/tests/integration_tests/__init__.py b/atm/tests/integration_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/atm/tests/unit_tests/__init__.py b/atm/tests/unit_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/atm/tests/unit_tests/test_enter_data.py b/atm/tests/unit_tests/test_enter_data.py new file mode 100644 index 0000000..be1f335 --- /dev/null +++ b/atm/tests/unit_tests/test_enter_data.py @@ -0,0 +1,126 @@ +import os +import json +import pytest + +from atm import constants, PROJECT_ROOT +from atm.config import SQLConfig, RunConfig +from atm.database import Database, db_session +from atm.enter_data import enter_data, create_dataset, create_datarun +from atm.utilities import get_local_data_path + + +DB_PATH = '/tmp/atm.db' +DATA_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/downloaded/' +BASELINE_PATH = os.path.join(PROJECT_ROOT, 'test/baselines/best_so_far/') +BASELINE_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/best_so_far/' + +METHOD_HYPERPARTS = { + 'logreg': 6, + 'svm': 4, + 'sgd': 24, + 'dt': 2, + 'et': 2, + 'rf': 2, + 'gnb': 1, + 'mnb': 1, + 'bnb': 1, + 'gp': 8, + 'pa': 4, + 'knn': 24, + 'mlp': 60, +} + + +@pytest.fixture +def db(): + return Database(dialect='sqlite', database=DB_PATH) + + +@pytest.fixture +def dataset(db): + ds = db.get_dataset(1) + if ds: + return ds + else: + data_path = os.path.join(PROJECT_ROOT, 'data/test/pollution_1.csv') + return create_dataset(db, 'class', data_path) + + +def test_create_dataset(db): + train_url = DATA_URL + 'pollution_1_train.csv' + test_url = DATA_URL + 'pollution_1_test.csv' + + train_path_local, _ = get_local_data_path(train_url) + if os.path.exists(train_path_local): + os.remove(train_path_local) + + test_path_local, _ = get_local_data_path(test_url) + if os.path.exists(test_path_local): + os.remove(test_path_local) + + run_conf = RunConfig(train_path=train_url, + test_path=test_url, + data_description='test', + label_column='class') + dataset = create_dataset(db, run_conf) + dataset = db.get_dataset(dataset.id) + + assert os.path.exists(train_path_local) + assert os.path.exists(test_path_local) + + assert dataset.train_path == train_url + assert dataset.test_path == test_url + assert dataset.description == 'test' + assert dataset.label_column == 'class' + assert dataset.n_examples == 60 + assert dataset.d_features == 16 + assert dataset.k_classes == 2 + assert dataset.majority >= 0.5 + + +def test_enter_data_by_methods(dataset): + sql_conf = SQLConfig(database=DB_PATH) + db = Database(**vars(sql_conf)) + run_conf = RunConfig(dataset_id=dataset.id) + + for method, n_parts in METHOD_HYPERPARTS.items(): + run_conf.methods = [method] + run_id = enter_data(sql_conf, run_conf) + + assert db.get_datarun(run_id) + with db_session(db): + run = db.get_datarun(run_id) + assert run.dataset.id == dataset.id + assert len(run.hyperpartitions) == n_parts + + +def test_enter_data_all(dataset): + sql_conf = SQLConfig(database=DB_PATH) + db = Database(**vars(sql_conf)) + run_conf = RunConfig(dataset_id=dataset.id, + methods=METHOD_HYPERPARTS.keys()) + + run_id = enter_data(sql_conf, run_conf) + + with db_session(db): + run = db.get_datarun(run_id) + assert run.dataset.id == dataset.id + assert len(run.hyperpartitions) == sum(METHOD_HYPERPARTS.values()) + + +def test_run_per_partition(dataset): + sql_conf = SQLConfig(database=DB_PATH) + db = Database(**vars(sql_conf)) + run_conf = RunConfig(dataset_id=dataset.id, methods=['logreg']) + + run_ids = enter_data(sql_conf, run_conf, run_per_partition=True) + + with db_session(db): + runs = [] + for run_id in run_ids: + run = db.get_datarun(run_id) + if run is not None: + runs.append(run) + + assert len(runs) == METHOD_HYPERPARTS['logreg'] + assert all([len(run.hyperpartitions) == 1 for run in runs]) diff --git a/atm/tests/unit_tests/test_method.py b/atm/tests/unit_tests/test_method.py new file mode 100644 index 0000000..dd9d3a1 --- /dev/null +++ b/atm/tests/unit_tests/test_method.py @@ -0,0 +1,37 @@ +#!/usr/bin/python2.7 +import pytest +import json + +from atm.method import Method + + +def test_enumerate(): + js = {'name': 'test', 'class': 'test'} + js['hyperparameters'] = { + 'a': {'type': 'int_cat', 'values': [0, 3]}, + 'b': {'type': 'int', 'range': [0, 3]}, + 'c': {'type': 'bool', 'values': [True, False]}, + 'd': {'type': 'string', 'values': ['x', 'y']}, + 'e': {'type': 'float_cat', 'values': [-0.5, 0.5, 1.0]}, + 'f': {'type': 'float', 'range': [0.5]}, + 'g': {'type': 'list', + 'list_length': [1, 2, 3], + 'element': {'type': 'int_exp', 'range': [1e-3, 1e3]}} + } + js['root_hyperparameters'] = ['a', 'f'] + js['conditional_hyperparameters'] = { + 'a': {'0': ['b'], '3': ['c']}, + 'c': {'True': ['d'], 'False': ['e', 'g']}, + } + + config_path = '/tmp/method.json' + with open(config_path, 'w') as f: + json.dump(js, f) + + hps = Method(config_path).get_hyperpartitions() + + assert len(hps) == 12 + assert all('a' in zip(*hp.categoricals)[0] for hp in hps) + assert all(('f', 0.5) in hp.constants for hp in hps) + assert len([hp for hp in hps if hp.tunables + and 'b' in zip(*hp.tunables)[0]]) == 1 diff --git a/atm/tests/unit_tests/test_worker.py b/atm/tests/unit_tests/test_worker.py new file mode 100644 index 0000000..bbfff12 --- /dev/null +++ b/atm/tests/unit_tests/test_worker.py @@ -0,0 +1,14 @@ +import pytest + +from atm.worker import Worker + +@pytest.fixture +def datarun(): + db = Database(**vars(sql_conf)) + sql_conf = SQLConfig(database=DB_PATH) + run_conf = RunConfig(dataset_id=dataset.id, methods=['logreg']) + +@pytest.fixture +def worker(): + worker = Worker() + diff --git a/install-prereqs.sh b/install-prereqs.sh deleted file mode 100644 index 01b2793..0000000 --- a/install-prereqs.sh +++ /dev/null @@ -1,3 +0,0 @@ -# This will be copied into the tox docker build and run during setup. -apt-get -qq update -apt-get -qq -y install mysql-client libmysqlclient-dev diff --git a/test/tests/unit_tests/test_enter_data.py b/test/tests/unit_tests/test_enter_data.py index a9d40a1..be1f335 100644 --- a/test/tests/unit_tests/test_enter_data.py +++ b/test/tests/unit_tests/test_enter_data.py @@ -9,7 +9,7 @@ from atm.utilities import get_local_data_path -DB_PATH = os.path.join(PROJECT_ROOT, 'test/atm.db') +DB_PATH = '/tmp/atm.db' DATA_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/downloaded/' BASELINE_PATH = os.path.join(PROJECT_ROOT, 'test/baselines/best_so_far/') BASELINE_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/best_so_far/' From 5e81cf75cba9558b8784e9add52f77e1ec8a579a Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Tue, 30 Jan 2018 10:56:10 -0500 Subject: [PATCH 17/36] make dependency links work; move method json to atm/ directory --- Makefile | 3 +- {methods => atm/methods}/adaboost.json | 0 .../methods}/bernoulli_naive_bayes.json | 0 {methods => atm/methods}/decision_tree.json | 0 {methods => atm/methods}/extra_trees.json | 0 .../methods}/gaussian_naive_bayes.json | 0 .../methods}/gaussian_process.json | 0 .../methods}/k_nearest_neighbors.json | 0 .../methods}/logistic_regression.json | 0 .../methods}/multi_layer_perceptron.json | 0 .../methods}/multinomial_naive_bayes.json | 0 .../methods}/passive_aggressive.json | 0 {methods => atm/methods}/random_forest.json | 0 .../methods}/stochastic_gradient_descent.json | 0 .../methods}/support_vector_machine.json | 0 requirements-test.txt | 2 - {test/scripts => scripts}/__init__.py | 0 {test/scripts => scripts}/end_to_end_test.py | 0 {test/scripts => scripts}/evaluate_btb.py | 0 {test/scripts => scripts}/method_test.py | 0 {test/scripts => scripts}/utilities.py | 0 setup.cfg | 5 +- setup.py | 7 +- test/tests/__init__.py | 0 test/tests/integration_tests/__init__.py | 0 test/tests/unit_tests/__init__.py | 0 test/tests/unit_tests/test_enter_data.py | 126 ------------------ test/tests/unit_tests/test_method.py | 37 ----- test/tests/unit_tests/test_worker.py | 14 -- 29 files changed, 8 insertions(+), 186 deletions(-) rename {methods => atm/methods}/adaboost.json (100%) rename {methods => atm/methods}/bernoulli_naive_bayes.json (100%) rename {methods => atm/methods}/decision_tree.json (100%) rename {methods => atm/methods}/extra_trees.json (100%) rename {methods => atm/methods}/gaussian_naive_bayes.json (100%) rename {methods => atm/methods}/gaussian_process.json (100%) rename {methods => atm/methods}/k_nearest_neighbors.json (100%) rename {methods => atm/methods}/logistic_regression.json (100%) rename {methods => atm/methods}/multi_layer_perceptron.json (100%) rename {methods => atm/methods}/multinomial_naive_bayes.json (100%) rename {methods => atm/methods}/passive_aggressive.json (100%) rename {methods => atm/methods}/random_forest.json (100%) rename {methods => atm/methods}/stochastic_gradient_descent.json (100%) rename {methods => atm/methods}/support_vector_machine.json (100%) rename {test/scripts => scripts}/__init__.py (100%) rename {test/scripts => scripts}/end_to_end_test.py (100%) rename {test/scripts => scripts}/evaluate_btb.py (100%) rename {test/scripts => scripts}/method_test.py (100%) rename {test/scripts => scripts}/utilities.py (100%) delete mode 100644 test/tests/__init__.py delete mode 100644 test/tests/integration_tests/__init__.py delete mode 100644 test/tests/unit_tests/__init__.py delete mode 100644 test/tests/unit_tests/test_enter_data.py delete mode 100644 test/tests/unit_tests/test_method.py delete mode 100644 test/tests/unit_tests/test_worker.py diff --git a/Makefile b/Makefile index 793b7da..0dfe8cd 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ clean: find . -name '*.pyc' -delete find . -name __pycache__ -delete find . -name '*~' -delete + find . -name '*.egg-info' -delete lint: flake8 atm && isort --check-only --recursive atm @@ -13,6 +14,6 @@ test: lint installdeps: pip install --upgrade pip - pip install -e . + pip install -e . --process-dependency-links pip install -r requirements-dev.txt diff --git a/methods/adaboost.json b/atm/methods/adaboost.json similarity index 100% rename from methods/adaboost.json rename to atm/methods/adaboost.json diff --git a/methods/bernoulli_naive_bayes.json b/atm/methods/bernoulli_naive_bayes.json similarity index 100% rename from methods/bernoulli_naive_bayes.json rename to atm/methods/bernoulli_naive_bayes.json diff --git a/methods/decision_tree.json b/atm/methods/decision_tree.json similarity index 100% rename from methods/decision_tree.json rename to atm/methods/decision_tree.json diff --git a/methods/extra_trees.json b/atm/methods/extra_trees.json similarity index 100% rename from methods/extra_trees.json rename to atm/methods/extra_trees.json diff --git a/methods/gaussian_naive_bayes.json b/atm/methods/gaussian_naive_bayes.json similarity index 100% rename from methods/gaussian_naive_bayes.json rename to atm/methods/gaussian_naive_bayes.json diff --git a/methods/gaussian_process.json b/atm/methods/gaussian_process.json similarity index 100% rename from methods/gaussian_process.json rename to atm/methods/gaussian_process.json diff --git a/methods/k_nearest_neighbors.json b/atm/methods/k_nearest_neighbors.json similarity index 100% rename from methods/k_nearest_neighbors.json rename to atm/methods/k_nearest_neighbors.json diff --git a/methods/logistic_regression.json b/atm/methods/logistic_regression.json similarity index 100% rename from methods/logistic_regression.json rename to atm/methods/logistic_regression.json diff --git a/methods/multi_layer_perceptron.json b/atm/methods/multi_layer_perceptron.json similarity index 100% rename from methods/multi_layer_perceptron.json rename to atm/methods/multi_layer_perceptron.json diff --git a/methods/multinomial_naive_bayes.json b/atm/methods/multinomial_naive_bayes.json similarity index 100% rename from methods/multinomial_naive_bayes.json rename to atm/methods/multinomial_naive_bayes.json diff --git a/methods/passive_aggressive.json b/atm/methods/passive_aggressive.json similarity index 100% rename from methods/passive_aggressive.json rename to atm/methods/passive_aggressive.json diff --git a/methods/random_forest.json b/atm/methods/random_forest.json similarity index 100% rename from methods/random_forest.json rename to atm/methods/random_forest.json diff --git a/methods/stochastic_gradient_descent.json b/atm/methods/stochastic_gradient_descent.json similarity index 100% rename from methods/stochastic_gradient_descent.json rename to atm/methods/stochastic_gradient_descent.json diff --git a/methods/support_vector_machine.json b/atm/methods/support_vector_machine.json similarity index 100% rename from methods/support_vector_machine.json rename to atm/methods/support_vector_machine.json diff --git a/requirements-test.txt b/requirements-test.txt index b779c99..f8a47b8 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,5 +1,3 @@ --r requirements.txt - pytest==3.2.3 mock==2.0.0 pytest-xdist==1.20.1 diff --git a/test/scripts/__init__.py b/scripts/__init__.py similarity index 100% rename from test/scripts/__init__.py rename to scripts/__init__.py diff --git a/test/scripts/end_to_end_test.py b/scripts/end_to_end_test.py similarity index 100% rename from test/scripts/end_to_end_test.py rename to scripts/end_to_end_test.py diff --git a/test/scripts/evaluate_btb.py b/scripts/evaluate_btb.py similarity index 100% rename from test/scripts/evaluate_btb.py rename to scripts/evaluate_btb.py diff --git a/test/scripts/method_test.py b/scripts/method_test.py similarity index 100% rename from test/scripts/method_test.py rename to scripts/method_test.py diff --git a/test/scripts/utilities.py b/scripts/utilities.py similarity index 100% rename from test/scripts/utilities.py rename to scripts/utilities.py diff --git a/setup.cfg b/setup.cfg index 4d4f13b..c786619 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,10 +3,9 @@ description-file = README.md [aliases] test=pytest [tool:pytest] -addopts = -m "not requires_training_data and not requires_credentials" -python_files = test/tests/* +python_files = atm/tests/* [flake8] -exclude = docs/* +exclude = docs/*,atm/tests/* # E501: line too long error # E266: comment beginning with ## error # F403 and F405: 'from module import *' error diff --git a/setup.py b/setup.py index 07ab0a3..e9fa7ba 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ """ -Stripped down from the example at: +Stripped down and modified from the example at: https://github.com/pypa/sampleproject """ @@ -75,18 +75,19 @@ 'pyyaml>=3.12', 'joblib>=0.11', 'future>=0.16', + 'btb>=0.0.1', ], # TODO: this is deprecated. Figure out how to accomplish the same thing with # the proper tools. # https://www.python.org/dev/peps/pep-0440/#direct-references dependency_links=[ - 'git+ssh://git@github.com/hdi-project/btb.git#egg=btb', + 'git+ssh://git@github.com/hdi-project/btb.git#egg=btb-0.0.1', ], # This variable is used to specify requirements for *this file* to run. setup_requires=[], - test_suite='test/tests', + test_suite='atm/tests', tests_require=[ 'pytest>=3.2', 'mock>=2', diff --git a/test/tests/__init__.py b/test/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/tests/integration_tests/__init__.py b/test/tests/integration_tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/tests/unit_tests/__init__.py b/test/tests/unit_tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/tests/unit_tests/test_enter_data.py b/test/tests/unit_tests/test_enter_data.py deleted file mode 100644 index be1f335..0000000 --- a/test/tests/unit_tests/test_enter_data.py +++ /dev/null @@ -1,126 +0,0 @@ -import os -import json -import pytest - -from atm import constants, PROJECT_ROOT -from atm.config import SQLConfig, RunConfig -from atm.database import Database, db_session -from atm.enter_data import enter_data, create_dataset, create_datarun -from atm.utilities import get_local_data_path - - -DB_PATH = '/tmp/atm.db' -DATA_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/downloaded/' -BASELINE_PATH = os.path.join(PROJECT_ROOT, 'test/baselines/best_so_far/') -BASELINE_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/best_so_far/' - -METHOD_HYPERPARTS = { - 'logreg': 6, - 'svm': 4, - 'sgd': 24, - 'dt': 2, - 'et': 2, - 'rf': 2, - 'gnb': 1, - 'mnb': 1, - 'bnb': 1, - 'gp': 8, - 'pa': 4, - 'knn': 24, - 'mlp': 60, -} - - -@pytest.fixture -def db(): - return Database(dialect='sqlite', database=DB_PATH) - - -@pytest.fixture -def dataset(db): - ds = db.get_dataset(1) - if ds: - return ds - else: - data_path = os.path.join(PROJECT_ROOT, 'data/test/pollution_1.csv') - return create_dataset(db, 'class', data_path) - - -def test_create_dataset(db): - train_url = DATA_URL + 'pollution_1_train.csv' - test_url = DATA_URL + 'pollution_1_test.csv' - - train_path_local, _ = get_local_data_path(train_url) - if os.path.exists(train_path_local): - os.remove(train_path_local) - - test_path_local, _ = get_local_data_path(test_url) - if os.path.exists(test_path_local): - os.remove(test_path_local) - - run_conf = RunConfig(train_path=train_url, - test_path=test_url, - data_description='test', - label_column='class') - dataset = create_dataset(db, run_conf) - dataset = db.get_dataset(dataset.id) - - assert os.path.exists(train_path_local) - assert os.path.exists(test_path_local) - - assert dataset.train_path == train_url - assert dataset.test_path == test_url - assert dataset.description == 'test' - assert dataset.label_column == 'class' - assert dataset.n_examples == 60 - assert dataset.d_features == 16 - assert dataset.k_classes == 2 - assert dataset.majority >= 0.5 - - -def test_enter_data_by_methods(dataset): - sql_conf = SQLConfig(database=DB_PATH) - db = Database(**vars(sql_conf)) - run_conf = RunConfig(dataset_id=dataset.id) - - for method, n_parts in METHOD_HYPERPARTS.items(): - run_conf.methods = [method] - run_id = enter_data(sql_conf, run_conf) - - assert db.get_datarun(run_id) - with db_session(db): - run = db.get_datarun(run_id) - assert run.dataset.id == dataset.id - assert len(run.hyperpartitions) == n_parts - - -def test_enter_data_all(dataset): - sql_conf = SQLConfig(database=DB_PATH) - db = Database(**vars(sql_conf)) - run_conf = RunConfig(dataset_id=dataset.id, - methods=METHOD_HYPERPARTS.keys()) - - run_id = enter_data(sql_conf, run_conf) - - with db_session(db): - run = db.get_datarun(run_id) - assert run.dataset.id == dataset.id - assert len(run.hyperpartitions) == sum(METHOD_HYPERPARTS.values()) - - -def test_run_per_partition(dataset): - sql_conf = SQLConfig(database=DB_PATH) - db = Database(**vars(sql_conf)) - run_conf = RunConfig(dataset_id=dataset.id, methods=['logreg']) - - run_ids = enter_data(sql_conf, run_conf, run_per_partition=True) - - with db_session(db): - runs = [] - for run_id in run_ids: - run = db.get_datarun(run_id) - if run is not None: - runs.append(run) - - assert len(runs) == METHOD_HYPERPARTS['logreg'] - assert all([len(run.hyperpartitions) == 1 for run in runs]) diff --git a/test/tests/unit_tests/test_method.py b/test/tests/unit_tests/test_method.py deleted file mode 100644 index dd9d3a1..0000000 --- a/test/tests/unit_tests/test_method.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/python2.7 -import pytest -import json - -from atm.method import Method - - -def test_enumerate(): - js = {'name': 'test', 'class': 'test'} - js['hyperparameters'] = { - 'a': {'type': 'int_cat', 'values': [0, 3]}, - 'b': {'type': 'int', 'range': [0, 3]}, - 'c': {'type': 'bool', 'values': [True, False]}, - 'd': {'type': 'string', 'values': ['x', 'y']}, - 'e': {'type': 'float_cat', 'values': [-0.5, 0.5, 1.0]}, - 'f': {'type': 'float', 'range': [0.5]}, - 'g': {'type': 'list', - 'list_length': [1, 2, 3], - 'element': {'type': 'int_exp', 'range': [1e-3, 1e3]}} - } - js['root_hyperparameters'] = ['a', 'f'] - js['conditional_hyperparameters'] = { - 'a': {'0': ['b'], '3': ['c']}, - 'c': {'True': ['d'], 'False': ['e', 'g']}, - } - - config_path = '/tmp/method.json' - with open(config_path, 'w') as f: - json.dump(js, f) - - hps = Method(config_path).get_hyperpartitions() - - assert len(hps) == 12 - assert all('a' in zip(*hp.categoricals)[0] for hp in hps) - assert all(('f', 0.5) in hp.constants for hp in hps) - assert len([hp for hp in hps if hp.tunables - and 'b' in zip(*hp.tunables)[0]]) == 1 diff --git a/test/tests/unit_tests/test_worker.py b/test/tests/unit_tests/test_worker.py deleted file mode 100644 index bbfff12..0000000 --- a/test/tests/unit_tests/test_worker.py +++ /dev/null @@ -1,14 +0,0 @@ -import pytest - -from atm.worker import Worker - -@pytest.fixture -def datarun(): - db = Database(**vars(sql_conf)) - sql_conf = SQLConfig(database=DB_PATH) - run_conf = RunConfig(dataset_id=dataset.id, methods=['logreg']) - -@pytest.fixture -def worker(): - worker = Worker() - From 86316b2954fa67350fa7ecab2b507da70394283f Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Tue, 30 Jan 2018 13:33:20 -0500 Subject: [PATCH 18/36] add configurable log directory --- atm/constants.py | 1 - atm/tests/unit_tests/test_enter_data.py | 2 +- atm/worker.py | 52 +++++++++++++------------ 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/atm/constants.py b/atm/constants.py index f3bb7f7..1e851cd 100644 --- a/atm/constants.py +++ b/atm/constants.py @@ -31,7 +31,6 @@ TIME_FMT = '%Y-%m-%d %H:%M' DATA_DL_PATH = os.path.join(PROJECT_ROOT, 'data/downloads') METHOD_PATH = os.path.join(PROJECT_ROOT, 'methods') -LOG_PATH = os.path.join(PROJECT_ROOT, 'logs') CUSTOM_CLASS_REGEX = '(.*\.py):(\w+)$' JSON_REGEX = '(.*\.json)$' diff --git a/atm/tests/unit_tests/test_enter_data.py b/atm/tests/unit_tests/test_enter_data.py index be1f335..59756ae 100644 --- a/atm/tests/unit_tests/test_enter_data.py +++ b/atm/tests/unit_tests/test_enter_data.py @@ -11,7 +11,7 @@ DB_PATH = '/tmp/atm.db' DATA_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/downloaded/' -BASELINE_PATH = os.path.join(PROJECT_ROOT, 'test/baselines/best_so_far/') +BASELINE_PATH = os.path.join(PROJECT_ROOT, 'data/baselines/best_so_far/') BASELINE_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/best_so_far/' METHOD_HYPERPARTS = { diff --git a/atm/worker.py b/atm/worker.py index d2ed205..7d8bf00 100755 --- a/atm/worker.py +++ b/atm/worker.py @@ -30,26 +30,14 @@ os.environ['GNUMPY_IMPLICIT_CONVERSION'] = 'allow' # get the file system in order -DEFAULT_MODEL_DIR = os.path.join(PROJECT_ROOT, 'models') -DEFAULT_METRIC_DIR = os.path.join(PROJECT_ROOT, 'metrics') - -# make sure we have directories where we need them -ensure_directory(LOG_PATH) - -# name log file after the local hostname -LOG_FILE = os.path.join(LOG_PATH, '%s.txt' % socket.gethostname()) +DEFAULT_MODEL_DIR = 'models' +DEFAULT_METRIC_DIR = 'metrics' +DEFAULT_LOG_DIR = 'logs' # how long to sleep between loops while waiting for new dataruns to be added LOOP_WAIT = 1 - -# TODO: use python's logging module instead of this -def _log(msg, stdout=True): - with open(LOG_FILE, 'a') as lf: - lf.write(msg + '\n') - if stdout: - print(msg) - +logger = logging.getLogger('atm') # Exception thrown when something goes wrong for the worker, but the worker # handles the error. @@ -60,7 +48,8 @@ class ClassifierError(Exception): class Worker(object): def __init__(self, database, datarun, save_files=True, cloud_mode=False, aws_config=None, model_dir=DEFAULT_MODEL_DIR, - metric_dir=DEFAULT_METRIC_DIR, verbose_metrics=False): + metric_dir=DEFAULT_METRIC_DIR, + log_dir=DEFAULT_LOG_DIR, verbose_metrics=False): """ database: Database object with connection information datarun: Datarun ORM object to work on. @@ -75,6 +64,10 @@ def __init__(self, database, datarun, save_files=True, cloud_mode=False, self.aws_config = aws_config self.verbose_metrics = verbose_metrics + ensure_directory(log_dir) + # name log file after the local hostname + self.log_file = os.path.join(self.log_dir, '%s.txt' % socket.gethostname()) + self.model_dir = model_dir self.metric_dir = metric_dir ensure_directory(self.model_dir) @@ -87,6 +80,13 @@ def __init__(self, database, datarun, save_files=True, cloud_mode=False, self.load_selector() self.load_tuner() + # TODO: use python's logging module instead of this + def _log(self, log_file, msg, stdout=True): + with open(log_file, 'a') as lf: + lf.write(msg + '\n') + if stdout: + print(msg) + def load_selector(self): """ Load and initialize the BTB class which will be responsible for @@ -418,7 +418,8 @@ def run_classifier(self, hyperpartition_id=None): def work(db, datarun_ids=None, save_files=False, choose_randomly=True, cloud_mode=False, aws_config=None, total_time=None, wait=True, - model_dir='models', metric_dir='metrics', verbose_metrics=False): + model_dir=DEFAULT_MODEL_DIR, metric_dir=DEFAULT_METRIC_DIR, + log_dir=DEFAULT_LOG_DIR, verbose_metrics=False): """ Check the ModelHub database for unfinished dataruns, and spawn workers to work on them as they are added. This process will continue to run until it @@ -476,7 +477,7 @@ def work(db, datarun_ids=None, save_files=False, choose_randomly=True, worker = Worker(db, run, save_files=save_files, cloud_mode=cloud_mode, aws_config=aws_config, model_dir=model_dir, metric_dir=metric_dir, - verbose_metrics=verbose_metrics) + log_dir=log_dir, verbose_metrics=verbose_metrics) try: worker.run_classifier() except ClassifierError: @@ -507,12 +508,12 @@ def work(db, datarun_ids=None, save_files=False, choose_randomly=True, parser.add_argument('--no-save', dest='save_files', default=True, action='store_const', const=False, help="don't save models and metrics for later") - parser.add_argument('--model-dir', dest='model_persist_dir', - default=DEFAULT_MODEL_DIR, + parser.add_argument('--model-dir', default=DEFAULT_MODEL_DIR, help='Directory where computed models will be saved') - parser.add_argument('--metric-dir', dest='metric_persist_dir', - default=DEFAULT_METRIC_DIR, + parser.add_argument('--metric-dir', default=DEFAULT_METRIC_DIR, help='Directory where model metrics will be saved') + parser.add_argument('--log-dir', default=DEFAULT_LOG_DIR, + help='Directory where logs will be saved') parser.add_argument('--verbose-metrics', default=False, action='store_true', help='If set, compute full ROC and PR curves and ' 'per-label metrics for each classifier') @@ -530,6 +531,7 @@ def work(db, datarun_ids=None, save_files=False, choose_randomly=True, aws_config=aws_config, total_time=args.time, wait=False, - model_dir=args.model_persist_dir, - metric_dir=args.metric_persist_dir, + model_dir=args.model_dir, + metric_dir=args.metric_dir, + log_dir=args.log_dir, verbose_metrics=args.verbose_metrics) From 824241abd4701606a3f8b9b81c54be3499094fb5 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Wed, 31 Jan 2018 14:50:38 -0500 Subject: [PATCH 19/36] add some worker tests and functions for saving/loading modelhub state --- atm/database.py | 46 ++++- atm/tests/unit_tests/test_worker.py | 87 ++++++++- atm/worker.py | 264 ++++++++++++++-------------- tox.ini | 1 + 4 files changed, 257 insertions(+), 141 deletions(-) diff --git a/atm/database.py b/atm/database.py index 7d225e2..5469a28 100644 --- a/atm/database.py +++ b/atm/database.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, print_function +import pandas as pd from datetime import datetime from operator import attrgetter @@ -12,6 +13,9 @@ from .constants import * from .utilities import * +# The maximum number of errors allowed in a single hyperpartition. If more than +# this many classifiers using a hyperpartition error, the hyperpartition will be +# considered broken and ignored for the rest of the datarun. MAX_HYPERPARTITION_ERRORS = 3 @@ -107,7 +111,7 @@ class Dataset(Base): size_kb = Column(Integer, nullable=False) def __repr__(self): - base = "<%s: %s, %d classes, %d features, %d examples>" + base = "<%s: %s, %d classes, %d features, %d rows>" return base % (self.name, self.description, self.k_classes, self.d_features, self.n_examples) @@ -282,6 +286,32 @@ def __repr__(self): Base.metadata.create_all(bind=self.engine) + ########################################################################### + ## Save/load the database ############################################### + ########################################################################### + + @try_with_session() + def to_csv(self, path): + """ + Save the entire ModelHub database as a set of CSVs in the given + directory. + """ + for table in ['datasets', 'dataruns', 'hyperpartitions', 'classifiers']: + df = pd.read_sql('SELECT * FROM %s' % table, self.session.bind) + df.to_csv(os.path.join(path, '%s.csv' % table)) + + @try_with_session(commit=True) + def from_csv(self, path): + """ + Load a snapshot of the ModelHub database from a set of CSVs in the given + directory. + """ + for table in ['datasets', 'dataruns', 'hyperpartitions', 'classifiers']: + df = pd.read_csv(os.path.join(path, '%s.csv' % table)) + for _, r in df.iterrows(): + create_func = getattr(self, 'create_%s' % table) + create_func(**r) + ########################################################################### ## Standard query methods ############################################### ########################################################################### @@ -484,12 +514,18 @@ def create_datarun(self, **kwargs): @try_with_session(commit=True) def create_hyperpartition(self, **kwargs): - part = self.Hyperpartition(**kwargs) - self.session.add(part) - return part + partition = self.Hyperpartition(**kwargs) + self.session.add(partition) + return partition + + @try_with_session(commit=True) + def create_classifier(self, **kwargs): + classifier = self.Classifier(**kwargs) + self.session.add(classifier) + return classifier @try_with_session(commit=True) - def create_classifier(self, hyperpartition_id, datarun_id, host, params): + def start_classifier(self, hyperpartition_id, datarun_id, host, params): """ Save a new, fully qualified classifier object to the database. Returns: the ID of the newly-created classifier diff --git a/atm/tests/unit_tests/test_worker.py b/atm/tests/unit_tests/test_worker.py index bbfff12..7bcbef9 100644 --- a/atm/tests/unit_tests/test_worker.py +++ b/atm/tests/unit_tests/test_worker.py @@ -2,13 +2,90 @@ from atm.worker import Worker +from btb.tuning import GCP +from btb.selection import HierarchicalByAlgorithm + +DB_PATH = '/tmp/atm.db' + +DT_PARAMS = {'criterion': 'gini', 'max_features': 0.5, 'max_depth': 3, + 'min_samples_split': 2, 'min_samples_leaf': 1} + + +@pytest.fixture +def db(): + os.remove(DB_PATH) + return Database(dialect='sqlite', database=DB_PATH) + + +@pytest.fixture +def dataset(db): + return db.get_dataset(1) + + @pytest.fixture def datarun(): - db = Database(**vars(sql_conf)) - sql_conf = SQLConfig(database=DB_PATH) - run_conf = RunConfig(dataset_id=dataset.id, methods=['logreg']) + return db.get_datarun(1) + @pytest.fixture -def worker(): - worker = Worker() +def model(datarun): + return Model(method=dt, params=DT_PARAMS, + judgment_metric='cv_judgment_metric', + label_column=datarun.label_column) + + +def get_worker(db, dataset, **kwargs): + kwargs['methods'] = kwargs.get('methods', ['logreg', 'dt']) + run_conf = RunConfig(**kwargs) + datarun = create_datarun(db, dataset, run_conf) + return Worker(db, datarun) + + +def test_load_selector_and_tuner(db, dataset): + worker = get_worker(db, dataset, selector='hieralg', k_window=7, + tuner='gcp', r_minimum=7, gridding=3) + assert type(worker.selector) == HierarchicalByAlgorithm + assert len(worker.selector.choices) == 6 + assert worker.selector.k == 7 + assert worker.selector.by_algorithm['logreg'] == 4 + assert worker.Tuner == GCP + + +def test_load_custom_selector_and_tuner(db, dataset): + tuner_path = './mytuner.py' + selector_path = './myselector.py' + worker = get_worker(db, dataset, selector=selector_path + ':MySelector', + tuner=tuner_path + ':MyTuner') + assert isinstance(worker.selector, CustomSelector) + assert issubclass(worker.Tuner, CustomTuner) + + +def test_select_and_tune(): + """ + This won't test that BTB is working correctly, just that the ATM-BTB + connection is working. + """ + worker = get_worker(db, dataset, selector='BestK', k_window=5) + part = worker.select_hyperpartition() + params = worker.tune_hyperparameters(part) + + +def test_tune_hyperparameters(): + pass + + +def test_test_classifier(db, dataset): + worker = get_worker(db, dataset, save_files=True) + + +def test_save_classifier(db, dataset, model): + worker = get_worker(db, dataset, save_files=True) + worker.save_classifier(1, ) + + +def test_is_datarun_finished(): + pass + +def test_run_classifier(): + pass diff --git a/atm/worker.py b/atm/worker.py index 7d8bf00..172c341 100755 --- a/atm/worker.py +++ b/atm/worker.py @@ -37,7 +37,16 @@ # how long to sleep between loops while waiting for new dataruns to be added LOOP_WAIT = 1 -logger = logging.getLogger('atm') +# TODO: use python's logging module instead of this +LOG_FILE = None + +def _log(msg, stdout=True): + if LOG_FILE: + with open(LOG_FILE, 'a') as lf: + lf.write(msg + '\n') + if stdout: + print(msg) + # Exception thrown when something goes wrong for the worker, but the worker # handles the error. @@ -48,8 +57,7 @@ class ClassifierError(Exception): class Worker(object): def __init__(self, database, datarun, save_files=True, cloud_mode=False, aws_config=None, model_dir=DEFAULT_MODEL_DIR, - metric_dir=DEFAULT_METRIC_DIR, - log_dir=DEFAULT_LOG_DIR, verbose_metrics=False): + metric_dir=DEFAULT_METRIC_DIR, verbose_metrics=False): """ database: Database object with connection information datarun: Datarun ORM object to work on. @@ -64,10 +72,6 @@ def __init__(self, database, datarun, save_files=True, cloud_mode=False, self.aws_config = aws_config self.verbose_metrics = verbose_metrics - ensure_directory(log_dir) - # name log file after the local hostname - self.log_file = os.path.join(self.log_dir, '%s.txt' % socket.gethostname()) - self.model_dir = model_dir self.metric_dir = metric_dir ensure_directory(self.model_dir) @@ -80,13 +84,6 @@ def __init__(self, database, datarun, save_files=True, cloud_mode=False, self.load_selector() self.load_tuner() - # TODO: use python's logging module instead of this - def _log(self, log_file, msg, stdout=True): - with open(log_file, 'a') as lf: - lf.write(msg + '\n') - if stdout: - print(msg) - def load_selector(self): """ Load and initialize the BTB class which will be responsible for @@ -133,90 +130,6 @@ def load_tuner(self): self.Tuner = getattr(mod, classname) _log('Tuner: %s' % self.Tuner) - def save_classifier(self, classifier_id, model, metrics): - """ - Update a classifier with metrics and model information and mark it as - "complete" - - classifier_id: ID of the classifier to save - model: Model object containing a serializable representation of the - final model generated by this classifier. - metrics: Dictionary containing cross-validation and test metrics data - for the model. - """ - # whether to save model and metrics data to the filesystem - if self.save_files: - # keep a database session open so that the utility functions can - # access the linked hyperpartitions and dataruns - with db_session(self.db): - classifier = self.db.get_classifier(classifier_id) - model_path = save_model(classifier, self.model_dir, model) - metric_path = save_metrics(classifier, self.metric_dir, metrics) - - # if necessary, save model and metrics to Amazon S3 bucket - if self.cloud_mode: - try: - self.save_classifier_cloud(model_path, metric_path) - except Exception: - msg = traceback.format_exc() - _log('Error in save_classifier_cloud()') - self.db.mark_classifier_errored(classifier_id, error_msg=msg) - else: - model_path = None - metric_path = None - - # update the classifier in the database - self.db.complete_classifier(classifier_id=classifier_id, - trainable_params=model.trainable_params, - dimensions=model.dimensions, - model_path=model_path, - metric_path=metric_path, - cv_score=model.cv_judgment_metric, - cv_stdev=model.cv_judgment_metric_stdev, - test_score=model.test_judgment_metric) - - # update this session's hyperpartition entry - _log('Saved classifier %d.' % classifier_id) - - def save_classifier_cloud(self, local_model_path, local_metric_path): - """ - Save a classifier to the S3 bucket supplied by aws_config. Saves a - serialized representaion of the model as well as a detailed set - of metrics. - - local_model_path: path to serialized model in the local file system - local_metric_path: path to serialized metrics in the local file system - """ - # TODO: This does not work - conn = S3Connection(self.aws_config.access_key, self.aws_config.secret_key) - bucket = conn.get_bucket(s3_bucket) - - if aws_folder: - aws_model_path = os.path.join(aws_folder, local_model_path) - aws_metric_path = os.path.join(aws_folder, local_metric_path) - else: - aws_model_path = local_model_path - aws_metric_path = local_metric_path - - kmodel = S3Key(bucket) - kmodel.key = aws_model_path - kmodel.set_contents_from_filename(local_model_path) - _log('Uploading model at %s to S3 bucket %s' % (s3_bucket, - local_model_path)) - - kmodel = S3Key(bucket) - kmodel.key = aws_metric_path - kmodel.set_contents_from_filename(local_metric_path) - _log('Uploading metrics at %s to S3 bucket %s' % (s3_bucket, - local_metric_path)) - - # delete the local copy of the model & metrics so that they don't fill - # up the worker instance's hard drive - _log('Deleting local copies of %s and %s' % (local_model_path, - local_metric_path)) - os.remove(local_model_path) - os.remove(local_metric_path) - def select_hyperpartition(self): """ Use the hyperpartition selection method specified by our datarun to choose a @@ -245,7 +158,7 @@ def select_hyperpartition(self): hyperpartition_id = self.selector.select(hyperpartition_scores) return self.db.get_hyperpartition(hyperpartition_id) - def tune_parameters(self, hyperpartition): + def tune_hyperparameters(self, hyperpartition): """ Use the hyperparameter tuning method specified by our datarun to choose a set of hyperparameters from the potential space. @@ -295,33 +208,6 @@ def tune_parameters(self, hyperpartition): categoricals=hyperpartition.categoricals, constants=hyperpartition.constants) - def is_datarun_finished(self): - """ - Check to see whether the datarun is finished. This could be due to the - budget being exhausted or due to hyperparameter gridding being done. - """ - hyperpartitions = self.db.get_hyperpartitions(datarun_id=self.datarun.id) - if not hyperpartitions: - _log('No incomplete hyperpartitions for datarun %d present in database.' - % self.datarun.id) - return True - - if self.datarun.budget_type == 'classifier': - # hyperpartition classifier counts are updated whenever a classifier - # is created, so this will count running, errored, and complete. - n_completed = len(self.db.get_classifiers(datarun_id=self.datarun.id)) - if n_completed >= self.datarun.budget: - _log('Classifier budget has run out!') - return True - - elif self.datarun.budget_type == 'walltime': - deadline = self.datarun.deadline - if datetime.datetime.now() > deadline: - _log('Walltime budget has run out!') - return True - - return False - def test_classifier(self, method, params): """ Given a set of fully-qualified hyperparameters, create and test a @@ -362,6 +248,117 @@ def metric_string(model): return model, metrics + def save_classifier(self, classifier_id, model, metrics): + """ + Update a classifier with metrics and model information and mark it as + "complete" + + classifier_id: ID of the classifier to save + model: Model object containing a serializable representation of the + final model generated by this classifier. + metrics: Dictionary containing cross-validation and test metrics data + for the model. + """ + # whether to save model and metrics data to the filesystem + if self.save_files: + # keep a database session open so that the utility functions can + # access the linked hyperpartitions and dataruns + with db_session(self.db): + classifier = self.db.get_classifier(classifier_id) + model_path = save_model(classifier, self.model_dir, model) + metric_path = save_metrics(classifier, self.metric_dir, metrics) + + # if necessary, save model and metrics to Amazon S3 bucket + if self.cloud_mode: + try: + self.save_classifier_cloud(model_path, metric_path) + except Exception: + msg = traceback.format_exc() + _log('Error in save_classifier_cloud()') + self.db.mark_classifier_errored(classifier_id, error_msg=msg) + else: + model_path = None + metric_path = None + + # update the classifier in the database + self.db.complete_classifier(classifier_id=classifier_id, + trainable_params=model.trainable_params, + dimensions=model.dimensions, + model_path=model_path, + metric_path=metric_path, + cv_score=model.cv_judgment_metric, + cv_stdev=model.cv_judgment_metric_stdev, + test_score=model.test_judgment_metric) + + # update this session's hyperpartition entry + _log('Saved classifier %d.' % classifier_id) + + def save_classifier_cloud(self, local_model_path, local_metric_path): + """ + Save a classifier to the S3 bucket supplied by aws_config. Saves a + serialized representaion of the model as well as a detailed set + of metrics. + + local_model_path: path to serialized model in the local file system + local_metric_path: path to serialized metrics in the local file system + """ + # TODO: This does not work + conn = S3Connection(self.aws_config.access_key, self.aws_config.secret_key) + bucket = conn.get_bucket(s3_bucket) + + if aws_folder: + aws_model_path = os.path.join(aws_folder, local_model_path) + aws_metric_path = os.path.join(aws_folder, local_metric_path) + else: + aws_model_path = local_model_path + aws_metric_path = local_metric_path + + kmodel = S3Key(bucket) + kmodel.key = aws_model_path + kmodel.set_contents_from_filename(local_model_path) + _log('Uploading model at %s to S3 bucket %s' % (s3_bucket, + local_model_path)) + + kmodel = S3Key(bucket) + kmodel.key = aws_metric_path + kmodel.set_contents_from_filename(local_metric_path) + _log('Uploading metrics at %s to S3 bucket %s' % (s3_bucket, + local_metric_path)) + + # delete the local copy of the model & metrics so that they don't fill + # up the worker instance's hard drive + _log('Deleting local copies of %s and %s' % (local_model_path, + local_metric_path)) + os.remove(local_model_path) + os.remove(local_metric_path) + + def is_datarun_finished(self): + """ + Check to see whether the datarun is finished. This could be due to the + budget being exhausted or due to hyperparameter gridding being done. + """ + hyperpartitions = self.db.get_hyperpartitions(datarun_id=self.datarun.id) + if not hyperpartitions: + _log('No incomplete hyperpartitions for datarun %d present in database.' + % self.datarun.id) + return True + + if self.datarun.budget_type == 'classifier': + # hyperpartition classifier counts are updated whenever a classifier + # is created, so this will count running, errored, and complete. + n_completed = len(self.db.get_classifiers(datarun_id=self.datarun.id)) + if n_completed >= self.datarun.budget: + _log('Classifier budget has run out!') + return True + + elif self.datarun.budget_type == 'walltime': + deadline = self.datarun.deadline + if datetime.datetime.now() > deadline: + _log('Walltime budget has run out!') + return True + + return False + def run_classifier(self, hyperpartition_id=None): """ Choose hyperparameters, then use them to test and save a Classifier. @@ -382,7 +379,7 @@ def run_classifier(self, hyperpartition_id=None): hyperpartition = self.select_hyperpartition() # use tuner to choose a set of parameters for the hyperpartition - params = self.tune_parameters(hyperpartition) + params = self.tune_hyperparameters(hyperpartition) except Exception: _log('Error choosing hyperparameters: datarun=%s' % str(self.datarun)) _log(traceback.format_exc()) @@ -398,10 +395,10 @@ def run_classifier(self, hyperpartition_id=None): _log('\t%s = %s' % (k, params[k])) _log('Creating classifier...') - classifier = self.db.create_classifier(hyperpartition_id=hyperpartition.id, - datarun_id=self.datarun.id, - host=get_public_ip(), - params=params) + classifier = self.db.start_classifier(hyperpartition_id=hyperpartition.id, + datarun_id=self.datarun.id, + host=get_public_ip(), + params=params) try: _log('Testing classifier...') @@ -443,6 +440,11 @@ def work(db, datarun_ids=None, save_files=False, choose_randomly=True, """ start_time = datetime.datetime.now() + ensure_directory(log_dir) + # name log file after the local hostname + global LOG_FILE + LOG_FILE = os.path.join(log_dir, '%s.txt' % socket.gethostname()) + # main loop while True: # get all pending and running dataruns, or all pending/running dataruns diff --git a/tox.ini b/tox.ini index 24d1e1c..7ab4f58 100644 --- a/tox.ini +++ b/tox.ini @@ -5,6 +5,7 @@ envlist = clean,py27 [testenv] commands= pytest --cov=atm deps= -rrequirements-test.txt +install_command = pip install --process-dependency-links {opts} {packages} [testenv:clean] commands= From 49c72a4b303b7899d9e6b1b286e0fbbbf88671da Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Wed, 31 Jan 2018 15:03:43 -0500 Subject: [PATCH 20/36] update yaml config with new field names --- atm/config/templates/run.yaml | 16 +++++++--------- atm/config/test/run-all.yaml | 10 ++++------ atm/config/test/run-basic.yaml | 10 ++++------ 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/atm/config/templates/run.yaml b/atm/config/templates/run.yaml index 9a8519f..37a74dc 100644 --- a/atm/config/templates/run.yaml +++ b/atm/config/templates/run.yaml @@ -1,9 +1,9 @@ ## Dataset arguments -train_path: data/test/pollution_1.csv +train_path: atm/data/test/pollution_1.csv # if test_path is not supplied, train_path is assumed to point to train/test data test_path: data_description: "Example dataset description" -label_column: class +class_column: class ## Datarun arguments dataset_id: @@ -11,8 +11,6 @@ methods: - rf - logreg - dt -# directory to store trained models; will be created if it doesn't exist -models_dir: models/ # priority (higher number is more important) priority: 1 # Should there be a classifier or walltime budget? @@ -21,17 +19,17 @@ budget_type: classifier budget: 100 # How should ATM sample hyperparameters from a given frozen set? tuner: gp -# r_min is the number of random runs performed in each hyperpartition before +# r_minimum is the number of random runs performed in each hyperpartition before # allowing bayesian opt to select parameters. -r_min: 2 +r_minimum: 2 # gridding determines whether or not sample selection will happen on a grid. gridding: 0 # How should ATM select a particular hyperpartition (frozen set) from the # set of all hyperpartitions? selector: bestk -# k is number that xxx_k methods use. It is similar to r_min, except it is -# called k_window and determines how much "history" ATM considers for certain -# frozen selection logics. +# k is number that xxx_k methods use. It is similar to r_minimum, except it +# determines how much "history" ATM considers for certain hyperpartition +# selection logic. k_window: 5 # Which field to use for judgment of performance # options: f1, roc_auc, accuracy diff --git a/atm/config/test/run-all.yaml b/atm/config/test/run-all.yaml index 309194d..babd60c 100644 --- a/atm/config/test/run-all.yaml +++ b/atm/config/test/run-all.yaml @@ -2,7 +2,7 @@ train_path: test_path: data_description: -label_column: class +class_column: class # use every method we have methods: @@ -19,8 +19,6 @@ methods: - pa - knn - mlp -# directory to store trained models; will be created if it doesn't exist -models_dir: models/ # priority (higher number is more important) priority: 1 # Should there be a classifier or walltime budget? @@ -29,15 +27,15 @@ budget_type: classifier budget: 100 # How should ATM sample hyperparameters from a given frozen set? tuner: gp -# r_min is the number of random runs performed in each hyperpartition before +# r_minimum is the number of random runs performed in each hyperpartition before # allowing bayesian opt to select parameters. -r_min: 2 +r_minimum: 2 # gridding determines whether or not sample selection will happen on a grid. gridding: 0 # How should ATM select a particular hyperpartition (frozen set) from the # set of all hyperpartitions? selector: bestk -# k is number that xxx_k methods use. It is similar to r_min, except it is +# k is number that xxx_k methods use. It is similar to r_minimum, except it is # called k_window and determines how much "history" ATM considers for certain # frozen selection logics. k_window: 5 diff --git a/atm/config/test/run-basic.yaml b/atm/config/test/run-basic.yaml index dddf5b6..11e5c19 100644 --- a/atm/config/test/run-basic.yaml +++ b/atm/config/test/run-basic.yaml @@ -2,15 +2,13 @@ train_path: test_path: data_description: -label_column: class +class_column: class # use every algorithm we have methods: - logreg - dt - knn -# directory to store trained models; will be created if it doesn't exist -models_dir: models/ # priority (higher number is more important) priority: 1 # Should there be a classifier or walltime budget? @@ -19,15 +17,15 @@ budget_type: classifier budget: 100 # How should ATM sample hyperparameters from a given frozen set? tuner: gp -# r_min is the number of random runs performed in each hyperpartition before +# r_minimum is the number of random runs performed in each hyperpartition before # allowing bayesian opt to select parameters. -r_min: 2 +r_minimum: 2 # gridding determines whether or not sample selection will happen on a grid. gridding: 0 # How should ATM select a particular hyperpartition (frozen set) from the # set of all hyperpartitions? selector: bestk -# k is number that xxx_k methods use. It is similar to r_min, except it is +# k is number that xxx_k methods use. It is similar to r_minimum, except it is # called k_window and determines how much "history" ATM considers for certain # frozen selection logics. k_window: 5 From 2a46de2f95a6cf1c4c121387cf67175c6eaa19f6 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Wed, 31 Jan 2018 15:11:50 -0500 Subject: [PATCH 21/36] get things working again --- atm/config.py | 2 +- atm/constants.py | 1 + atm/utilities.py | 4 ++-- atm/worker.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/atm/config.py b/atm/config.py index 8c16ad2..7b35f92 100644 --- a/atm/config.py +++ b/atm/config.py @@ -113,7 +113,7 @@ class RunConfig(Config): ] DEFAULTS = { - 'train_path': 'data/test/pollution_1.csv', + 'train_path': os.path.join(DATA_TEST_PATH, 'pollution_1.csv'), 'class_column': 'class', 'methods': ['logreg', 'dt', 'knn'], 'priority': 1, diff --git a/atm/constants.py b/atm/constants.py index 1e851cd..c84cbb0 100644 --- a/atm/constants.py +++ b/atm/constants.py @@ -29,6 +29,7 @@ HTTP_PREFIX = '^https?://' TIME_FMT = '%Y-%m-%d %H:%M' +DATA_TEST_PATH = os.path.join(PROJECT_ROOT, 'data/test') DATA_DL_PATH = os.path.join(PROJECT_ROOT, 'data/downloads') METHOD_PATH = os.path.join(PROJECT_ROOT, 'methods') diff --git a/atm/utilities.py b/atm/utilities.py index ee1f3f7..4d7430b 100644 --- a/atm/utilities.py +++ b/atm/utilities.py @@ -170,7 +170,7 @@ def _make_save_path_old(dir, classifier, suffix): based on the classifier's dataset name and hyperparameters. """ run_hash = hash_string(classifier.datarun.dataset.name) - params_hash = hash_dict(classifier.params) + params_hash = hash_dict(classifier.hyperparameter_values) filename = "%s-%s-%s.%s" % (run_hash, params_hash, classifier.datarun.description, suffix) return os.path.join(dir, filename) @@ -183,7 +183,7 @@ def make_save_path(dir, classifier, suffix): """ run_name = "".join([c for c in classifier.datarun.dataset.name if c.isalnum() or c in (' ', '-', '_')]).rstrip() - params_hash = hash_dict(classifier.params)[:8] + params_hash = hash_dict(classifier.hyperparameter_values)[:8] filename = "%s-%s.%s" % (run_name, params_hash, suffix) return os.path.join(dir, filename) diff --git a/atm/worker.py b/atm/worker.py index 4c1fb60..5d8701a 100755 --- a/atm/worker.py +++ b/atm/worker.py @@ -479,7 +479,7 @@ def work(db, datarun_ids=None, save_files=False, choose_randomly=True, worker = Worker(db, run, save_files=save_files, cloud_mode=cloud_mode, aws_config=aws_config, model_dir=model_dir, metric_dir=metric_dir, - log_dir=log_dir, verbose_metrics=verbose_metrics) + verbose_metrics=verbose_metrics) try: worker.run_classifier() except ClassifierError: From 235e1b996067922ade10e624af0672fe09b532e8 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Wed, 31 Jan 2018 16:01:27 -0500 Subject: [PATCH 22/36] add cached modelhub data and keep working on worker tests --- atm/data/modelhub/test/classifiers.csv | 144 +++++++++++++++++++++ atm/data/modelhub/test/dataruns.csv | 3 + atm/data/modelhub/test/datasets.csv | 2 + atm/data/modelhub/test/hyperpartitions.csv | 41 ++++++ atm/database.py | 2 +- atm/tests/unit_tests/test_worker.py | 29 +++-- 6 files changed, 210 insertions(+), 11 deletions(-) create mode 100644 atm/data/modelhub/test/classifiers.csv create mode 100644 atm/data/modelhub/test/dataruns.csv create mode 100644 atm/data/modelhub/test/datasets.csv create mode 100644 atm/data/modelhub/test/hyperpartitions.csv diff --git a/atm/data/modelhub/test/classifiers.csv b/atm/data/modelhub/test/classifiers.csv new file mode 100644 index 0000000..f9462a5 --- /dev/null +++ b/atm/data/modelhub/test/classifiers.csv @@ -0,0 +1,144 @@ +id,datarun_id,hyperpartition_id,host,model_location,metrics_location,hyperparameter_values_64,cv_judgment_metric,cv_judgment_metric_stdev,test_judgment_metric,start_time,end_time,status,error_message +1,1,10,73.61.20.41,,,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIwCnNWbWV0cmljCnAyClMnZXVjbGlkZWFuJwpwMwpzVndlaWdodHMKcDQKUyd1bmlmb3JtJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZhbGdvcml0aG0KcDcKUydicnV0ZScKcDgKcy4=,,,,2018-01-31 15:10:10.523058,2018-01-31 15:10:10.618173,errored,"Traceback (most recent call last): + File ""/home/bcyphers/work/fl/atm/atm/worker.py"", line 407, in run_classifier + self.save_classifier(classifier.id, model, metrics) + File ""/home/bcyphers/work/fl/atm/atm/worker.py"", line 269, in save_classifier + model_path = save_model(classifier, self.model_dir, model) + File ""atm/utilities.py"", line 197, in save_model + path = make_save_path(model_dir, classifier, 'model') + File ""atm/utilities.py"", line 186, in make_save_path + params_hash = hash_dict(classifier.params)[:8] +AttributeError: 'Classifier' object has no attribute 'params' +" +2,1,15,73.61.20.41,models/pollution_1-c69d0fd0.model,metrics/pollution_1-c69d0fd0.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTExCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtYW5oYXR0YW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ2Rpc3RhbmNlJwpwOApzVmxlYWZfc2l6ZQpwOQpJNDcKcy4=,0.7381673881673882,0.14674842248033693,0.6666666666666666,2018-01-31 15:10:42.083226,2018-01-31 15:10:42.201867,complete, +3,1,1,73.61.20.41,models/pollution_1-212f5a52.model,metrics/pollution_1-212f5a52.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTkKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ21pbmtvd3NraScKcDYKc1ZwCnA3CkkyCnNWd2VpZ2h0cwpwOApTJ3VuaWZvcm0nCnA5CnNWbGVhZl9zaXplCnAxMApJMQpzLg==,0.761111111111111,0.08888888888888893,0.5,2018-01-31 15:10:42.242283,2018-01-31 15:10:42.326565,complete, +4,1,6,73.61.20.41,models/pollution_1-ca46da9f.model,metrics/pollution_1-ca46da9f.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEyCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnZXVjbGlkZWFuJwpwNgpzVndlaWdodHMKcDcKUyd1bmlmb3JtJwpwOApzVmxlYWZfc2l6ZQpwOQpJMjIKcy4=,0.6742857142857143,0.27321218486587245,0.761904761904762,2018-01-31 15:10:42.364623,2018-01-31 15:10:42.442905,complete, +5,1,4,73.61.20.41,models/pollution_1-f23240b9.model,metrics/pollution_1-f23240b9.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIwCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydjaGVieXNoZXYnCnA2CnNWd2VpZ2h0cwpwNwpTJ3VuaWZvcm0nCnA4CnNWbGVhZl9zaXplCnA5Ckk3CnMu,0.7205128205128205,0.086493437519051,0.7368421052631579,2018-01-31 15:10:42.486974,2018-01-31 15:10:42.567315,complete, +6,1,23,73.61.20.41,models/pollution_1-4b96d570.model,metrics/pollution_1-4b96d570.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEKc1ZtZXRyaWMKcDIKUydtYW5oYXR0YW4nCnAzCnNWd2VpZ2h0cwpwNApTJ2Rpc3RhbmNlJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZhbGdvcml0aG0KcDcKUydicnV0ZScKcDgKcy4=,0.75,0.07745966692414838,0.7368421052631579,2018-01-31 15:10:42.611947,2018-01-31 15:10:42.693034,complete, +7,1,13,73.61.20.41,models/pollution_1-459fe4e9.model,metrics/pollution_1-459fe4e9.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE4CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtaW5rb3dza2knCnA2CnNWcApwNwpJMwpzVndlaWdodHMKcDgKUydkaXN0YW5jZScKcDkKc1ZsZWFmX3NpemUKcDEwCkkxOQpzLg==,0.722121212121212,0.051027999660101374,0.7777777777777777,2018-01-31 15:10:42.744834,2018-01-31 15:10:42.827119,complete, +8,1,20,73.61.20.41,models/pollution_1-1fb0b201.model,metrics/pollution_1-1fb0b201.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTExCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnY2hlYnlzaGV2JwpwNgpzVndlaWdodHMKcDcKUydkaXN0YW5jZScKcDgKc1ZsZWFmX3NpemUKcDkKSTMzCnMu,0.6851370851370852,0.21575723604589317,0.7200000000000001,2018-01-31 15:10:42.866167,2018-01-31 15:10:42.947328,complete, +9,1,16,73.61.20.41,models/pollution_1-e17d7730.model,metrics/pollution_1-e17d7730.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE3CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydjaGVieXNoZXYnCnA2CnNWd2VpZ2h0cwpwNwpTJ2Rpc3RhbmNlJwpwOApzVmxlYWZfc2l6ZQpwOQpJMzAKcy4=,0.5714862914862915,0.18713964321313295,0.6666666666666667,2018-01-31 15:10:43.002671,2018-01-31 15:10:43.092187,complete, +10,1,7,73.61.20.41,models/pollution_1-f693dd2a.model,metrics/pollution_1-f693dd2a.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE1CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnbWFuaGF0dGFuJwpwNgpzVndlaWdodHMKcDcKUyd1bmlmb3JtJwpwOApzVmxlYWZfc2l6ZQpwOQpJMjIKcy4=,0.7492063492063492,0.14561707861815507,0.7058823529411764,2018-01-31 15:10:43.130922,2018-01-31 15:10:43.211920,complete, +11,1,30,73.61.20.41,models/pollution_1-13a3b057.model,metrics/pollution_1-13a3b057.metric,KGRwMApWQwpwMQpGMC43MjU2MTYyNDAyODIwMjU5CnNWdG9sCnAyCkYwLjAxMDk4ODQ5MDMwMjI4ODg5MgpzVmZpdF9pbnRlcmNlcHQKcDMKSTAxCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMApzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.8373737373737373,0.07172072832267952,0.823529411764706,2018-01-31 15:10:43.256881,2018-01-31 15:10:43.324868,complete, +12,1,1,73.61.20.41,models/pollution_1-2ddb55d6.model,metrics/pollution_1-2ddb55d6.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE3CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtaW5rb3dza2knCnA2CnNWcApwNwpJMwpzVndlaWdodHMKcDgKUyd1bmlmb3JtJwpwOQpzVmxlYWZfc2l6ZQpwMTAKSTUKcy4=,0.7245021645021645,0.04303788029830044,0.625,2018-01-31 15:10:43.361773,2018-01-31 15:10:43.444917,complete, +13,1,14,73.61.20.41,models/pollution_1-1776da12.model,metrics/pollution_1-1776da12.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ2V1Y2xpZGVhbicKcDYKc1Z3ZWlnaHRzCnA3ClMnZGlzdGFuY2UnCnA4CnNWbGVhZl9zaXplCnA5CkkzNApzLg==,0.7014285714285714,0.22234373095249965,0.7272727272727272,2018-01-31 15:10:43.480730,2018-01-31 15:10:43.549801,complete, +14,1,2,73.61.20.41,models/pollution_1-27439619.model,metrics/pollution_1-27439619.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTExCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydldWNsaWRlYW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ3VuaWZvcm0nCnA4CnNWbGVhZl9zaXplCnA5CkkyNApzLg==,0.6484126984126984,0.10670398062386226,0.7000000000000001,2018-01-31 15:10:43.582526,2018-01-31 15:10:43.656973,complete, +15,1,5,73.61.20.41,models/pollution_1-e3e71de0.model,metrics/pollution_1-e3e71de0.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE2CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnbWlua293c2tpJwpwNgpzVnAKcDcKSTIKc1Z3ZWlnaHRzCnA4ClMndW5pZm9ybScKcDkKc1ZsZWFmX3NpemUKcDEwCkkyNApzLg==,0.4,0.0,0.8695652173913044,2018-01-31 15:10:43.691405,2018-01-31 15:10:43.761447,complete, +16,1,29,73.61.20.41,models/pollution_1-45690f93.model,metrics/pollution_1-45690f93.metric,KGRwMApWQwpwMQpGMjU0NTAuNTU1MTc2Nzg0Nzk3CnNWdG9sCnAyCkYxNzQuNDYxNzQ3NjM1NDczMgpzVmZpdF9pbnRlcmNlcHQKcDMKSTAxCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMQpzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.7832323232323232,0.0783244594417489,0.4615384615384615,2018-01-31 15:10:43.792402,2018-01-31 15:10:43.867868,complete, +17,1,15,73.61.20.41,models/pollution_1-9a3eec03.model,metrics/pollution_1-9a3eec03.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE4CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtYW5oYXR0YW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ2Rpc3RhbmNlJwpwOApzVmxlYWZfc2l6ZQpwOQpJMTAKcy4=,0.6476190476190475,0.038095238095238036,0.625,2018-01-31 15:10:43.912668,2018-01-31 15:10:43.986640,complete, +18,1,25,73.61.20.41,models/pollution_1-2286b746.model,metrics/pollution_1-2286b746.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjk4NzM2NTcxNTcyNjU4OTgKc1ZtaW5fc2FtcGxlc19zcGxpdApwMgpJMgpzVmNyaXRlcmlvbgpwMwpTJ2VudHJvcHknCnA0CnNWbWF4X2RlcHRoCnA1CkkxMApzVm1pbl9zYW1wbGVzX2xlYWYKcDYKSTIKcy4=,0.7975180375180375,0.14519269130133422,0.8,2018-01-31 15:10:44.026241,2018-01-31 15:10:44.107650,complete, +19,1,10,73.61.20.41,models/pollution_1-8acbfd97.model,metrics/pollution_1-8acbfd97.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTQKc1ZtZXRyaWMKcDIKUydldWNsaWRlYW4nCnAzCnNWd2VpZ2h0cwpwNApTJ3VuaWZvcm0nCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmFsZ29yaXRobQpwNwpTJ2JydXRlJwpwOApzLg==,0.40777777777777774,0.36910970633219875,0.7000000000000001,2018-01-31 15:10:44.152282,2018-01-31 15:10:44.230245,complete, +20,1,3,73.61.20.41,models/pollution_1-66692934.model,metrics/pollution_1-66692934.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTUKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ21hbmhhdHRhbicKcDYKc1Z3ZWlnaHRzCnA3ClMndW5pZm9ybScKcDgKc1ZsZWFmX3NpemUKcDkKSTQyCnMu,0.6777777777777777,0.17441259440309853,0.7058823529411765,2018-01-31 15:10:44.264599,2018-01-31 15:10:44.352854,complete, +21,1,4,73.61.20.41,models/pollution_1-6b2c072e.model,metrics/pollution_1-6b2c072e.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ2NoZWJ5c2hldicKcDYKc1Z3ZWlnaHRzCnA3ClMndW5pZm9ybScKcDgKc1ZsZWFmX3NpemUKcDkKSTMxCnMu,0.5647619047619047,0.30801242723110733,0.782608695652174,2018-01-31 15:10:44.389775,2018-01-31 15:10:44.457795,complete, +22,1,18,73.61.20.41,models/pollution_1-810da9dd.model,metrics/pollution_1-810da9dd.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE2CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnZXVjbGlkZWFuJwpwNgpzVndlaWdodHMKcDcKUydkaXN0YW5jZScKcDgKc1ZsZWFmX3NpemUKcDkKSTE2CnMu,0.7520346320346321,0.11552470205509219,0.7368421052631577,2018-01-31 15:10:44.489631,2018-01-31 15:10:44.581245,complete, +23,1,30,73.61.20.41,models/pollution_1-719dc64d.model,metrics/pollution_1-719dc64d.metric,KGRwMApWQwpwMQpGNS42NTU0NzMzNzQ4Mjg1NjNlLTA1CnNWdG9sCnAyCkY3Ny44NDExNjUyNzEwMDM0MwpzVmZpdF9pbnRlcmNlcHQKcDMKSTAxCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMApzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.0,0.0,0.0,2018-01-31 15:10:44.623382,2018-01-31 15:10:44.690789,complete, +24,1,10,73.61.20.41,models/pollution_1-55929d3b.model,metrics/pollution_1-55929d3b.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEwCnNWbWV0cmljCnAyClMnZXVjbGlkZWFuJwpwMwpzVndlaWdodHMKcDQKUyd1bmlmb3JtJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZhbGdvcml0aG0KcDcKUydicnV0ZScKcDgKcy4=,0.592063492063492,0.18412698412698414,0.8181818181818182,2018-01-31 15:10:44.724598,2018-01-31 15:10:44.796547,complete, +25,1,25,73.61.20.41,models/pollution_1-a8e88328.model,metrics/pollution_1-a8e88328.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjM0OTY3MTk3ODE3NDMzOQpzVm1pbl9zYW1wbGVzX3NwbGl0CnAyCkk0CnNWY3JpdGVyaW9uCnAzClMnZW50cm9weScKcDQKc1ZtYXhfZGVwdGgKcDUKSTQKc1ZtaW5fc2FtcGxlc19sZWFmCnA2CkkyCnMu,0.7992063492063493,0.16950716284714276,0.631578947368421,2018-01-31 15:10:44.835636,2018-01-31 15:10:44.902800,complete, +26,1,23,73.61.20.41,models/pollution_1-7b09d947.model,metrics/pollution_1-7b09d947.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE0CnNWbWV0cmljCnAyClMnbWFuaGF0dGFuJwpwMwpzVndlaWdodHMKcDQKUydkaXN0YW5jZScKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWYWxnb3JpdGhtCnA3ClMnYnJ1dGUnCnA4CnMu,0.6333333333333333,0.18708286933869703,0.75,2018-01-31 15:10:44.937240,2018-01-31 15:10:45.010489,complete, +27,1,25,73.61.20.41,models/pollution_1-8dc91fd9.model,metrics/pollution_1-8dc91fd9.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjgyNTk2NTc4MDM3NTg0MDMKc1ZtaW5fc2FtcGxlc19zcGxpdApwMgpJMgpzVmNyaXRlcmlvbgpwMwpTJ2VudHJvcHknCnA0CnNWbWF4X2RlcHRoCnA1CkkxMApzVm1pbl9zYW1wbGVzX2xlYWYKcDYKSTIKcy4=,0.6638461538461539,0.17106311817879374,0.7000000000000001,2018-01-31 15:10:45.077253,2018-01-31 15:10:45.166802,complete, +28,1,24,73.61.20.41,models/pollution_1-0894a096.model,metrics/pollution_1-0894a096.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTExCnNWbWV0cmljCnAyClMnY2hlYnlzaGV2JwpwMwpzVndlaWdodHMKcDQKUydkaXN0YW5jZScKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWYWxnb3JpdGhtCnA3ClMnYnJ1dGUnCnA4CnMu,0.6324242424242424,0.07023246197035168,0.7499999999999999,2018-01-31 15:10:45.236138,2018-01-31 15:10:45.346548,complete, +29,1,17,73.61.20.41,models/pollution_1-7969297a.model,metrics/pollution_1-7969297a.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMna2RfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtaW5rb3dza2knCnA2CnNWcApwNwpJMgpzVndlaWdodHMKcDgKUydkaXN0YW5jZScKcDkKc1ZsZWFmX3NpemUKcDEwCkk3CnMu,0.5424242424242424,0.2813697964437201,0.7000000000000001,2018-01-31 15:10:45.408537,2018-01-31 15:10:45.498787,complete, +30,1,4,73.61.20.41,models/pollution_1-5a257c3d.model,metrics/pollution_1-5a257c3d.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE2CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydjaGVieXNoZXYnCnA2CnNWd2VpZ2h0cwpwNwpTJ3VuaWZvcm0nCnA4CnNWbGVhZl9zaXplCnA5CkkzMQpzLg==,0.6465079365079365,0.12739180589670823,0.6666666666666665,2018-01-31 15:10:45.541195,2018-01-31 15:10:45.618310,complete, +31,1,9,73.61.20.41,models/pollution_1-b8bdb3ca.model,metrics/pollution_1-b8bdb3ca.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE4CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JydXRlJwpwNApzVm1ldHJpYwpwNQpTJ21pbmtvd3NraScKcDYKc1ZwCnA3CkkyCnNWd2VpZ2h0cwpwOApTJ3VuaWZvcm0nCnA5CnMu,0.6958730158730159,0.17464819044524896,0.5,2018-01-31 15:10:45.674121,2018-01-31 15:10:45.755147,complete, +32,1,11,73.61.20.41,models/pollution_1-47a84e42.model,metrics/pollution_1-47a84e42.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEzCnNWbWV0cmljCnAyClMnbWFuaGF0dGFuJwpwMwpzVndlaWdodHMKcDQKUyd1bmlmb3JtJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZhbGdvcml0aG0KcDcKUydicnV0ZScKcDgKcy4=,0.7222222222222221,0.149071198499986,0.9473684210526316,2018-01-31 15:10:45.798889,2018-01-31 15:10:45.884684,complete, +33,1,26,73.61.20.41,models/pollution_1-0dd18631.model,metrics/pollution_1-0dd18631.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjQzMjQxMDI1OTc0MjUwMzI0CnNWbWluX3NhbXBsZXNfc3BsaXQKcDIKSTIKc1Zjcml0ZXJpb24KcDMKUydnaW5pJwpwNApzVm1heF9kZXB0aApwNQpJNgpzVm1pbl9zYW1wbGVzX2xlYWYKcDYKSTIKcy4=,0.5264069264069264,0.1652886190276506,0.6666666666666667,2018-01-31 15:10:45.929087,2018-01-31 15:10:46.005045,complete, +34,1,14,73.61.20.41,models/pollution_1-a0ff69ca.model,metrics/pollution_1-a0ff69ca.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ2V1Y2xpZGVhbicKcDYKc1Z3ZWlnaHRzCnA3ClMnZGlzdGFuY2UnCnA4CnNWbGVhZl9zaXplCnA5CkkzMApzLg==,0.7720634920634922,0.10606025659845637,0.4615384615384615,2018-01-31 15:10:46.056669,2018-01-31 15:10:46.151627,complete, +35,1,11,73.61.20.41,models/pollution_1-549f1dd0.model,metrics/pollution_1-549f1dd0.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTMKc1ZtZXRyaWMKcDIKUydtYW5oYXR0YW4nCnAzCnNWd2VpZ2h0cwpwNApTJ3VuaWZvcm0nCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmFsZ29yaXRobQpwNwpTJ2JydXRlJwpwOApzLg==,0.7987878787878788,0.11230288312145914,0.47058823529411764,2018-01-31 15:10:46.204605,2018-01-31 15:10:46.298206,complete, +36,1,6,73.61.20.41,models/pollution_1-f55646a6.model,metrics/pollution_1-f55646a6.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE2CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnZXVjbGlkZWFuJwpwNgpzVndlaWdodHMKcDcKUyd1bmlmb3JtJwpwOApzVmxlYWZfc2l6ZQpwOQpJNDgKcy4=,0.7277777777777776,0.08678055195451845,0.7999999999999999,2018-01-31 15:10:46.344673,2018-01-31 15:10:46.450395,complete, +37,1,13,73.61.20.41,models/pollution_1-88c8656a.model,metrics/pollution_1-88c8656a.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTUKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ21pbmtvd3NraScKcDYKc1ZwCnA3CkkxCnNWd2VpZ2h0cwpwOApTJ2Rpc3RhbmNlJwpwOQpzVmxlYWZfc2l6ZQpwMTAKSTMwCnMu,0.6832323232323232,0.19003820386080306,0.6666666666666666,2018-01-31 15:10:46.508298,2018-01-31 15:10:46.597652,complete, +38,1,10,73.61.20.41,models/pollution_1-a332cd36.model,metrics/pollution_1-a332cd36.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTMKc1ZtZXRyaWMKcDIKUydldWNsaWRlYW4nCnAzCnNWd2VpZ2h0cwpwNApTJ3VuaWZvcm0nCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmFsZ29yaXRobQpwNwpTJ2JydXRlJwpwOApzLg==,0.838095238095238,0.14937511563198216,0.37499999999999994,2018-01-31 15:10:46.648252,2018-01-31 15:10:46.763842,complete, +39,1,29,73.61.20.41,models/pollution_1-1baf0ce0.model,metrics/pollution_1-1baf0ce0.metric,KGRwMApWQwpwMQpGNTQ3ODAuMzU4NDA5MDQzNzE0CnNWdG9sCnAyCkYwLjA2ODU4MDE2MjY3NDg1MTcyCnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDInCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmR1YWwKcDcKSTAxCnNWY2xhc3Nfd2VpZ2h0CnA4ClMnYmFsYW5jZWQnCnA5CnMu,0.6432323232323232,0.18213485329619372,0.8421052631578948,2018-01-31 15:10:46.826449,2018-01-31 15:10:46.912498,complete, +40,1,30,73.61.20.41,models/pollution_1-7f7e146a.model,metrics/pollution_1-7f7e146a.metric,KGRwMApWQwpwMQpGMC4wMDA2NTg0OTM1MjM3NTEwOTQ2CnNWdG9sCnAyCkYxNDM5LjA0MjkyODI1MjAzNApzVmZpdF9pbnRlcmNlcHQKcDMKSTAxCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMApzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.0,0.0,0.0,2018-01-31 15:10:46.959450,2018-01-31 15:10:47.052724,complete, +41,1,8,73.61.20.41,models/pollution_1-90aad463.model,metrics/pollution_1-90aad463.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE0CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnY2hlYnlzaGV2JwpwNgpzVndlaWdodHMKcDcKUyd1bmlmb3JtJwpwOApzVmxlYWZfc2l6ZQpwOQpJNDQKcy4=,0.6666666666666667,0.10432810619146017,0.6666666666666665,2018-01-31 15:10:47.112370,2018-01-31 15:10:47.196423,complete, +42,1,19,73.61.20.41,models/pollution_1-8a37b43e.model,metrics/pollution_1-8a37b43e.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEwCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnbWFuaGF0dGFuJwpwNgpzVndlaWdodHMKcDcKUydkaXN0YW5jZScKcDgKc1ZsZWFmX3NpemUKcDkKSTM0CnMu,0.6896825396825397,0.216833763114522,0.7777777777777777,2018-01-31 15:10:47.256563,2018-01-31 15:10:47.351958,complete, +43,1,13,73.61.20.41,models/pollution_1-b93a6826.model,metrics/pollution_1-b93a6826.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEzCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtaW5rb3dza2knCnA2CnNWcApwNwpJMQpzVndlaWdodHMKcDgKUydkaXN0YW5jZScKcDkKc1ZsZWFmX3NpemUKcDEwCkk0CnMu,0.7500000000000001,0.16598500055174642,0.7058823529411764,2018-01-31 15:10:47.400721,2018-01-31 15:10:47.502192,complete, +44,1,29,73.61.20.41,models/pollution_1-323113c5.model,metrics/pollution_1-323113c5.metric,KGRwMApWQwpwMQpGMC4xNzI2NTM0NDAyMTM0NjUxOApzVnRvbApwMgpGMC4wMDMyODY3NzAxNzA4NjY2OTYKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMQpzVnBlbmFsdHkKcDQKUydsMicKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWZHVhbApwNwpJMDEKc1ZjbGFzc193ZWlnaHQKcDgKUydiYWxhbmNlZCcKcDkKcy4=,0.6647619047619048,0.16099464083135576,0.9473684210526316,2018-01-31 15:10:47.576922,2018-01-31 15:10:47.681101,complete, +45,1,26,73.61.20.41,models/pollution_1-d6154f98.model,metrics/pollution_1-d6154f98.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjQzODM4MzEwMDExMzI2NDQKc1ZtaW5fc2FtcGxlc19zcGxpdApwMgpJMgpzVmNyaXRlcmlvbgpwMwpTJ2dpbmknCnA0CnNWbWF4X2RlcHRoCnA1Ckk0CnNWbWluX3NhbXBsZXNfbGVhZgpwNgpJMQpzLg==,0.6008658008658009,0.20019293181163966,0.7000000000000001,2018-01-31 15:10:47.724775,2018-01-31 15:10:47.800832,complete, +46,1,21,73.61.20.41,models/pollution_1-b275c5f1.model,metrics/pollution_1-b275c5f1.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEwCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JydXRlJwpwNApzVm1ldHJpYwpwNQpTJ21pbmtvd3NraScKcDYKc1ZwCnA3CkkyCnNWd2VpZ2h0cwpwOApTJ2Rpc3RhbmNlJwpwOQpzLg==,0.6946608946608946,0.19825726563031038,0.588235294117647,2018-01-31 15:10:47.847475,2018-01-31 15:10:47.943389,complete, +47,1,18,73.61.20.41,models/pollution_1-5d0adda4.model,metrics/pollution_1-5d0adda4.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTgKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMna2RfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydldWNsaWRlYW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ2Rpc3RhbmNlJwpwOApzVmxlYWZfc2l6ZQpwOQpJMjcKcy4=,0.7551948051948052,0.1380477330116077,0.5882352941176471,2018-01-31 15:10:47.991484,2018-01-31 15:10:48.106087,complete, +48,1,15,73.61.20.41,models/pollution_1-31c0a43b.model,metrics/pollution_1-31c0a43b.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTYKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ21hbmhhdHRhbicKcDYKc1Z3ZWlnaHRzCnA3ClMnZGlzdGFuY2UnCnA4CnNWbGVhZl9zaXplCnA5CkkzOQpzLg==,0.8033333333333333,0.10974718422102887,0.4615384615384615,2018-01-31 15:10:48.159555,2018-01-31 15:10:48.248298,complete, +49,1,17,73.61.20.41,models/pollution_1-bf78f4ea.model,metrics/pollution_1-bf78f4ea.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTkKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMna2RfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtaW5rb3dza2knCnA2CnNWcApwNwpJMQpzVndlaWdodHMKcDgKUydkaXN0YW5jZScKcDkKc1ZsZWFmX3NpemUKcDEwCkkyNQpzLg==,0.7082251082251083,0.2387884438494141,0.7368421052631579,2018-01-31 15:10:48.292795,2018-01-31 15:10:48.372511,complete, +50,1,24,73.61.20.41,models/pollution_1-8d84bab8.model,metrics/pollution_1-8d84bab8.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTYKc1ZtZXRyaWMKcDIKUydjaGVieXNoZXYnCnAzCnNWd2VpZ2h0cwpwNApTJ2Rpc3RhbmNlJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZhbGdvcml0aG0KcDcKUydicnV0ZScKcDgKcy4=,0.7134920634920635,0.1539437061105312,0.7272727272727272,2018-01-31 15:10:48.420424,2018-01-31 15:10:48.498325,complete, +51,1,12,73.61.20.41,models/pollution_1-cb2aa5ab.model,metrics/pollution_1-cb2aa5ab.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTMKc1ZtZXRyaWMKcDIKUydjaGVieXNoZXYnCnAzCnNWd2VpZ2h0cwpwNApTJ3VuaWZvcm0nCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmFsZ29yaXRobQpwNwpTJ2JydXRlJwpwOApzLg==,0.694957264957265,0.16368010079512904,0.7058823529411765,2018-01-31 15:10:48.545957,2018-01-31 15:10:48.621582,complete, +52,1,25,73.61.20.41,models/pollution_1-1f791cce.model,metrics/pollution_1-1f791cce.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjc1OTczMzk3MzE3NDgzNjMKc1ZtaW5fc2FtcGxlc19zcGxpdApwMgpJMgpzVmNyaXRlcmlvbgpwMwpTJ2VudHJvcHknCnA0CnNWbWF4X2RlcHRoCnA1Ckk5CnNWbWluX3NhbXBsZXNfbGVhZgpwNgpJMQpzLg==,0.6596825396825398,0.11976966219734955,0.761904761904762,2018-01-31 15:10:48.664713,2018-01-31 15:10:48.735519,complete, +53,1,24,73.61.20.41,models/pollution_1-43517b97.model,metrics/pollution_1-43517b97.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE1CnNWbWV0cmljCnAyClMnY2hlYnlzaGV2JwpwMwpzVndlaWdodHMKcDQKUydkaXN0YW5jZScKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWYWxnb3JpdGhtCnA3ClMnYnJ1dGUnCnA4CnMu,0.6787878787878787,0.1841766230184575,0.7058823529411765,2018-01-31 15:10:48.784416,2018-01-31 15:10:48.861566,complete, +54,1,5,73.61.20.41,models/pollution_1-2e1fc6be.model,metrics/pollution_1-2e1fc6be.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTkKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMna2RfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtaW5rb3dza2knCnA2CnNWcApwNwpJMwpzVndlaWdodHMKcDgKUyd1bmlmb3JtJwpwOQpzVmxlYWZfc2l6ZQpwMTAKSTQzCnMu,0.5590909090909091,0.29529521733526864,0.8181818181818181,2018-01-31 15:10:48.904240,2018-01-31 15:10:48.988646,complete, +55,1,22,73.61.20.41,models/pollution_1-d7a2e459.model,metrics/pollution_1-d7a2e459.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTYKc1ZtZXRyaWMKcDIKUydldWNsaWRlYW4nCnAzCnNWd2VpZ2h0cwpwNApTJ2Rpc3RhbmNlJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZhbGdvcml0aG0KcDcKUydicnV0ZScKcDgKcy4=,0.7147619047619047,0.12427020519910136,0.7368421052631577,2018-01-31 15:10:49.059042,2018-01-31 15:10:49.137011,complete, +56,1,11,73.61.20.41,models/pollution_1-ddaf6fef.model,metrics/pollution_1-ddaf6fef.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIwCnNWbWV0cmljCnAyClMnbWFuaGF0dGFuJwpwMwpzVndlaWdodHMKcDQKUyd1bmlmb3JtJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZhbGdvcml0aG0KcDcKUydicnV0ZScKcDgKcy4=,0.8246753246753247,0.06385464815033519,0.6153846153846153,2018-01-31 15:10:49.180012,2018-01-31 15:10:49.253536,complete, +57,1,30,73.61.20.41,models/pollution_1-82e6af5d.model,metrics/pollution_1-82e6af5d.metric,KGRwMApWQwpwMQpGMjI2NDEuNDc3MTI1MTkwMDY0CnNWdG9sCnAyCkY4Njc1OC44Mzg2Mzg1MzAwOApzVmZpdF9pbnRlcmNlcHQKcDMKSTAxCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMApzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.0,0.0,0.0,2018-01-31 15:10:49.304282,2018-01-31 15:10:49.377533,complete, +58,1,13,73.61.20.41,models/pollution_1-dbb9e3f2.model,metrics/pollution_1-dbb9e3f2.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTQKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ21pbmtvd3NraScKcDYKc1ZwCnA3CkkzCnNWd2VpZ2h0cwpwOApTJ2Rpc3RhbmNlJwpwOQpzVmxlYWZfc2l6ZQpwMTAKSTQ5CnMu,0.7468253968253968,0.11794195548101538,0.7368421052631579,2018-01-31 15:10:49.420577,2018-01-31 15:10:49.507597,complete, +59,1,5,73.61.20.41,models/pollution_1-9a867936.model,metrics/pollution_1-9a867936.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE0CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnbWlua293c2tpJwpwNgpzVnAKcDcKSTIKc1Z3ZWlnaHRzCnA4ClMndW5pZm9ybScKcDkKc1ZsZWFmX3NpemUKcDEwCkkxNQpzLg==,0.7533333333333333,0.04876246279442607,0.7692307692307693,2018-01-31 15:10:49.553230,2018-01-31 15:10:49.642015,complete, +60,1,13,73.61.20.41,models/pollution_1-80cbdc6b.model,metrics/pollution_1-80cbdc6b.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTgKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ21pbmtvd3NraScKcDYKc1ZwCnA3CkkxCnNWd2VpZ2h0cwpwOApTJ2Rpc3RhbmNlJwpwOQpzVmxlYWZfc2l6ZQpwMTAKSTEKcy4=,0.7838383838383838,0.09680120040978542,0.7368421052631579,2018-01-31 15:10:49.700147,2018-01-31 15:10:49.826067,complete, +61,1,28,73.61.20.41,models/pollution_1-8f7d4df3.model,metrics/pollution_1-8f7d4df3.metric,KGRwMApWQwpwMQpGNDMxLjc3OTIzOTQzMjI5MjUKc1Zfc2NhbGUKcDIKSTAxCnNWZml0X2ludGVyY2VwdApwMwpJMDAKc1ZwZW5hbHR5CnA0ClMnbDEnCnA1CnNWdG9sCnA2CkYwLjMzNjEzNDY2NDM0OTE0MTI0CnNWY2xhc3Nfd2VpZ2h0CnA3ClMnYmFsYW5jZWQnCnA4CnMu,0.8487012987012987,0.10109036184633184,0.4,2018-01-31 15:10:49.885986,2018-01-31 15:10:49.967902,complete, +62,1,16,73.61.20.41,models/pollution_1-2bb09298.model,metrics/pollution_1-2bb09298.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ2NoZWJ5c2hldicKcDYKc1Z3ZWlnaHRzCnA3ClMnZGlzdGFuY2UnCnA4CnNWbGVhZl9zaXplCnA5CkkxMgpzLg==,0.6725829725829726,0.16554717315637157,0.6956521739130435,2018-01-31 15:10:50.019490,2018-01-31 15:10:50.112344,complete, +63,1,2,73.61.20.41,models/pollution_1-5988dee9.model,metrics/pollution_1-5988dee9.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTgKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ2V1Y2xpZGVhbicKcDYKc1Z3ZWlnaHRzCnA3ClMndW5pZm9ybScKcDgKc1ZsZWFmX3NpemUKcDkKSTQ3CnMu,0.7214285714285713,0.07514158970504527,0.6666666666666667,2018-01-31 15:10:50.157081,2018-01-31 15:10:50.236602,complete, +64,1,24,73.61.20.41,models/pollution_1-f77d895f.model,metrics/pollution_1-f77d895f.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTUKc1ZtZXRyaWMKcDIKUydjaGVieXNoZXYnCnAzCnNWd2VpZ2h0cwpwNApTJ2Rpc3RhbmNlJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZhbGdvcml0aG0KcDcKUydicnV0ZScKcDgKcy4=,0.7333333333333333,0.03333333333333334,0.6666666666666666,2018-01-31 15:10:50.275531,2018-01-31 15:10:50.348571,complete, +65,1,15,73.61.20.41,models/pollution_1-3e5daa08.model,metrics/pollution_1-3e5daa08.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIwCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtYW5oYXR0YW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ2Rpc3RhbmNlJwpwOApzVmxlYWZfc2l6ZQpwOQpJMzUKcy4=,0.7682539682539683,0.1260281475636775,0.8235294117647058,2018-01-31 15:10:50.396185,2018-01-31 15:10:50.472080,complete, +66,1,4,73.61.20.41,models/pollution_1-a161e6fb.model,metrics/pollution_1-a161e6fb.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTQKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ2NoZWJ5c2hldicKcDYKc1Z3ZWlnaHRzCnA3ClMndW5pZm9ybScKcDgKc1ZsZWFmX3NpemUKcDkKSTIzCnMu,0.580952380952381,0.15818557373212466,0.8421052631578948,2018-01-31 15:10:50.516497,2018-01-31 15:10:50.594434,complete, +67,1,19,73.61.20.41,models/pollution_1-a1bbf8b6.model,metrics/pollution_1-a1bbf8b6.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTcKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMna2RfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtYW5oYXR0YW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ2Rpc3RhbmNlJwpwOApzVmxlYWZfc2l6ZQpwOQpJNwpzLg==,0.6659673659673659,0.17497367430474442,0.6666666666666666,2018-01-31 15:10:50.638616,2018-01-31 15:10:50.713906,complete, +68,1,4,73.61.20.41,models/pollution_1-dfe673b6.model,metrics/pollution_1-dfe673b6.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE2CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydjaGVieXNoZXYnCnA2CnNWd2VpZ2h0cwpwNwpTJ3VuaWZvcm0nCnA4CnNWbGVhZl9zaXplCnA5CkkxOQpzLg==,0.5976190476190476,0.14372750735987208,0.631578947368421,2018-01-31 15:10:50.756027,2018-01-31 15:10:50.840416,complete, +69,1,1,73.61.20.41,models/pollution_1-25f5afb5.model,metrics/pollution_1-25f5afb5.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE3CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtaW5rb3dza2knCnA2CnNWcApwNwpJMgpzVndlaWdodHMKcDgKUyd1bmlmb3JtJwpwOQpzVmxlYWZfc2l6ZQpwMTAKSTIxCnMu,0.43555555555555553,0.24408053720437298,0.8181818181818182,2018-01-31 15:10:50.884746,2018-01-31 15:10:50.960873,complete, +70,1,23,73.61.20.41,models/pollution_1-3787c1a2.model,metrics/pollution_1-3787c1a2.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIKc1ZtZXRyaWMKcDIKUydtYW5oYXR0YW4nCnAzCnNWd2VpZ2h0cwpwNApTJ2Rpc3RhbmNlJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZhbGdvcml0aG0KcDcKUydicnV0ZScKcDgKcy4=,0.6887878787878787,0.1036593984678931,0.6666666666666665,2018-01-31 15:10:51.003748,2018-01-31 15:10:51.098612,complete, +71,1,28,73.61.20.41,models/pollution_1-206e3e3d.model,metrics/pollution_1-206e3e3d.metric,KGRwMApWQwpwMQpGNDY2NzguOTQ1OTg4NjkzMDcKc1Zfc2NhbGUKcDIKSTAxCnNWZml0X2ludGVyY2VwdApwMwpJMDAKc1ZwZW5hbHR5CnA0ClMnbDEnCnA1CnNWdG9sCnA2CkYwLjAzNjcyNTU2MTkxNDA5MTU1NgpzVmNsYXNzX3dlaWdodApwNwpTJ2JhbGFuY2VkJwpwOApzLg==,0.7266666666666667,0.052281290471193786,0.7272727272727273,2018-01-31 15:10:51.147103,2018-01-31 15:10:51.224142,complete, +72,1,22,73.61.20.41,models/pollution_1-d3bb6d5f.model,metrics/pollution_1-d3bb6d5f.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIwCnNWbWV0cmljCnAyClMnZXVjbGlkZWFuJwpwMwpzVndlaWdodHMKcDQKUydkaXN0YW5jZScKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWYWxnb3JpdGhtCnA3ClMnYnJ1dGUnCnA4CnMu,0.7526340326340326,0.05058874423364487,0.7368421052631579,2018-01-31 15:10:51.268736,2018-01-31 15:10:51.344309,complete, +73,1,22,73.61.20.41,models/pollution_1-a00ea548.model,metrics/pollution_1-a00ea548.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE5CnNWbWV0cmljCnAyClMnZXVjbGlkZWFuJwpwMwpzVndlaWdodHMKcDQKUydkaXN0YW5jZScKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWYWxnb3JpdGhtCnA3ClMnYnJ1dGUnCnA4CnMu,0.8078144078144078,0.06018607194412083,0.6363636363636364,2018-01-31 15:10:51.387336,2018-01-31 15:10:51.465324,complete, +74,1,29,73.61.20.41,models/pollution_1-9f631353.model,metrics/pollution_1-9f631353.metric,KGRwMApWQwpwMQpGNDgwLjQyNTU3MzYxODE2MTkzCnNWdG9sCnAyCkYwLjAwMDE0OTY4NjA2NTU2NzAxNTIzCnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDInCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmR1YWwKcDcKSTAxCnNWY2xhc3Nfd2VpZ2h0CnA4ClMnYmFsYW5jZWQnCnA5CnMu,0.7223809523809523,0.11747147094193024,0.888888888888889,2018-01-31 15:10:51.512322,2018-01-31 15:10:51.594859,complete, +75,1,31,73.61.20.41,models/pollution_1-4cf8dda0.model,metrics/pollution_1-4cf8dda0.metric,KGRwMApWQwpwMQpGMS45NTU0ODY4MTc1MjcwMTczCnNWdG9sCnAyCkYzMzkuMjA3MjE4MDk0MjcwNgpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMQpzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.7295238095238095,0.10700626213476679,0.7058823529411764,2018-01-31 15:10:51.652268,2018-01-31 15:10:51.726574,complete, +76,1,8,73.61.20.41,models/pollution_1-bc655260.model,metrics/pollution_1-bc655260.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTExCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnY2hlYnlzaGV2JwpwNgpzVndlaWdodHMKcDcKUyd1bmlmb3JtJwpwOApzVmxlYWZfc2l6ZQpwOQpJMTQKcy4=,0.7753846153846153,0.012307692307692308,0.4347826086956522,2018-01-31 15:10:51.775689,2018-01-31 15:10:51.880542,complete, +77,1,11,73.61.20.41,models/pollution_1-3ae1ca62.model,metrics/pollution_1-3ae1ca62.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTUKc1ZtZXRyaWMKcDIKUydtYW5oYXR0YW4nCnAzCnNWd2VpZ2h0cwpwNApTJ3VuaWZvcm0nCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmFsZ29yaXRobQpwNwpTJ2JydXRlJwpwOApzLg==,0.7611111111111111,0.22879178091082222,0.75,2018-01-31 15:10:51.978634,2018-01-31 15:10:52.066726,complete, +78,1,29,73.61.20.41,models/pollution_1-964eedc9.model,metrics/pollution_1-964eedc9.metric,KGRwMApWQwpwMQpGNS42NjIyODgyNzE1NTg1MjgKc1Z0b2wKcDIKRjAuMDIwMTQ2NjU1OTYxNTM3NTQ3CnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDInCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmR1YWwKcDcKSTAxCnNWY2xhc3Nfd2VpZ2h0CnA4ClMnYmFsYW5jZWQnCnA5CnMu,0.6585281385281385,0.06407768752395457,0.7058823529411765,2018-01-31 15:10:52.116925,2018-01-31 15:10:52.207219,complete, +79,1,27,73.61.20.41,models/pollution_1-53eddbf0.model,metrics/pollution_1-53eddbf0.metric,KGRwMApWQwpwMQpGNy41NzQzNTIwMTA2NTIxMjUKc1Zfc2NhbGUKcDIKSTAxCnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDEnCnA1CnNWdG9sCnA2CkY1Ni42NTM0NTM5MjkzNjAyMQpzVmNsYXNzX3dlaWdodApwNwpTJ2JhbGFuY2VkJwpwOApzLg==,0.0,0.0,0.0,2018-01-31 15:10:52.256251,2018-01-31 15:10:52.337445,complete, +80,1,11,73.61.20.41,models/pollution_1-50627e78.model,metrics/pollution_1-50627e78.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTgKc1ZtZXRyaWMKcDIKUydtYW5oYXR0YW4nCnAzCnNWd2VpZ2h0cwpwNApTJ3VuaWZvcm0nCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmFsZ29yaXRobQpwNwpTJ2JydXRlJwpwOApzLg==,0.692063492063492,0.10509806576029747,0.7368421052631579,2018-01-31 15:10:52.386969,2018-01-31 15:10:52.467854,complete, +81,1,7,73.61.20.41,models/pollution_1-b11d1cef.model,metrics/pollution_1-b11d1cef.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE4CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnbWFuaGF0dGFuJwpwNgpzVndlaWdodHMKcDcKUyd1bmlmb3JtJwpwOApzVmxlYWZfc2l6ZQpwOQpJMQpzLg==,0.5815873015873014,0.21420833405927153,0.6666666666666667,2018-01-31 15:10:52.522562,2018-01-31 15:10:52.609222,complete, +82,1,7,73.61.20.41,models/pollution_1-702ab532.model,metrics/pollution_1-702ab532.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMna2RfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtYW5oYXR0YW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ3VuaWZvcm0nCnA4CnNWbGVhZl9zaXplCnA5CkkyMgpzLg==,0.8484848484848484,0.09124517002534317,0.5333333333333333,2018-01-31 15:10:52.659474,2018-01-31 15:10:52.748826,complete, +83,1,17,73.61.20.41,models/pollution_1-79952b4f.model,metrics/pollution_1-79952b4f.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE2CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnbWlua293c2tpJwpwNgpzVnAKcDcKSTIKc1Z3ZWlnaHRzCnA4ClMnZGlzdGFuY2UnCnA5CnNWbGVhZl9zaXplCnAxMApJMjUKcy4=,0.7880952380952381,0.12697420596165127,0.5555555555555556,2018-01-31 15:10:52.803469,2018-01-31 15:10:52.894003,complete, +84,1,21,73.61.20.41,models/pollution_1-ade889a8.model,metrics/pollution_1-ade889a8.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTgKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYnJ1dGUnCnA0CnNWbWV0cmljCnA1ClMnbWlua293c2tpJwpwNgpzVnAKcDcKSTIKc1Z3ZWlnaHRzCnA4ClMnZGlzdGFuY2UnCnA5CnMu,0.6377777777777778,0.20093608098590784,0.7777777777777777,2018-01-31 15:10:52.939578,2018-01-31 15:10:53.027512,complete, +85,1,31,73.61.20.41,models/pollution_1-cdf16169.model,metrics/pollution_1-cdf16169.metric,KGRwMApWQwpwMQpGMzIyNi4wOTQ4NTgwMjU0NzYKc1Z0b2wKcDIKRjAuMTE5NTQ4MjQyNDEyNDIxNzIKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMApzVnBlbmFsdHkKcDQKUydsMicKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWZHVhbApwNwpJMDEKc1ZjbGFzc193ZWlnaHQKcDgKUydiYWxhbmNlZCcKcDkKcy4=,0.6754545454545454,0.1097555510748114,0.5,2018-01-31 15:10:53.086161,2018-01-31 15:10:53.171613,complete, +86,1,3,73.61.20.41,models/pollution_1-70aa649a.model,metrics/pollution_1-70aa649a.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEwCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtYW5oYXR0YW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ3VuaWZvcm0nCnA4CnNWbGVhZl9zaXplCnA5Ckk1MApzLg==,0.7451515151515152,0.1068988519799782,0.7692307692307692,2018-01-31 15:10:53.217010,2018-01-31 15:10:53.304091,complete, +87,1,21,73.61.20.41,models/pollution_1-92b2a9c1.model,metrics/pollution_1-92b2a9c1.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTcKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYnJ1dGUnCnA0CnNWbWV0cmljCnA1ClMnbWlua293c2tpJwpwNgpzVnAKcDcKSTEKc1Z3ZWlnaHRzCnA4ClMnZGlzdGFuY2UnCnA5CnMu,0.8088888888888889,0.08146801235477051,0.8750000000000001,2018-01-31 15:10:53.350789,2018-01-31 15:10:53.427794,complete, +88,1,24,73.61.20.41,models/pollution_1-c7a0d6aa.model,metrics/pollution_1-c7a0d6aa.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE0CnNWbWV0cmljCnAyClMnY2hlYnlzaGV2JwpwMwpzVndlaWdodHMKcDQKUydkaXN0YW5jZScKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWYWxnb3JpdGhtCnA3ClMnYnJ1dGUnCnA4CnMu,0.7988888888888889,0.08494006601607293,0.5333333333333333,2018-01-31 15:10:53.483275,2018-01-31 15:10:53.584837,complete, +89,1,5,73.61.20.41,models/pollution_1-730e0e56.model,metrics/pollution_1-730e0e56.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE5CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnbWlua293c2tpJwpwNgpzVnAKcDcKSTMKc1Z3ZWlnaHRzCnA4ClMndW5pZm9ybScKcDkKc1ZsZWFmX3NpemUKcDEwCkkyNgpzLg==,0.8065656565656566,0.1210394392859909,0.5882352941176471,2018-01-31 15:10:53.640959,2018-01-31 15:10:53.729591,complete, +90,1,20,73.61.20.41,models/pollution_1-64fb025d.model,metrics/pollution_1-64fb025d.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTQKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMna2RfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydjaGVieXNoZXYnCnA2CnNWd2VpZ2h0cwpwNwpTJ2Rpc3RhbmNlJwpwOApzVmxlYWZfc2l6ZQpwOQpJMjQKcy4=,0.7468975468975468,0.1635789643584663,0.5333333333333333,2018-01-31 15:10:53.775294,2018-01-31 15:10:53.857626,complete, +91,1,18,73.61.20.41,models/pollution_1-753d356d.model,metrics/pollution_1-753d356d.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTYKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMna2RfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydldWNsaWRlYW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ2Rpc3RhbmNlJwpwOApzVmxlYWZfc2l6ZQpwOQpJMjUKcy4=,0.7214285714285713,0.07514158970504527,0.7777777777777777,2018-01-31 15:10:53.903687,2018-01-31 15:10:53.983427,complete, +92,1,1,73.61.20.41,models/pollution_1-f2c19f0e.model,metrics/pollution_1-f2c19f0e.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE4CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtaW5rb3dza2knCnA2CnNWcApwNwpJMgpzVndlaWdodHMKcDgKUyd1bmlmb3JtJwpwOQpzVmxlYWZfc2l6ZQpwMTAKSTMyCnMu,0.7351515151515151,0.18902485248682932,0.7058823529411764,2018-01-31 15:10:54.029486,2018-01-31 15:10:54.113980,complete, +93,1,21,73.61.20.41,models/pollution_1-b3f84df8.model,metrics/pollution_1-b3f84df8.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE0CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JydXRlJwpwNApzVm1ldHJpYwpwNQpTJ21pbmtvd3NraScKcDYKc1ZwCnA3CkkxCnNWd2VpZ2h0cwpwOApTJ2Rpc3RhbmNlJwpwOQpzLg==,0.7666666666666666,0.2,0.5714285714285714,2018-01-31 15:10:54.163037,2018-01-31 15:10:54.242401,complete, +94,1,3,73.61.20.41,models/pollution_1-82809f26.model,metrics/pollution_1-82809f26.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTcKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYmFsbF90cmVlJwpwNApzVm1ldHJpYwpwNQpTJ21hbmhhdHRhbicKcDYKc1Z3ZWlnaHRzCnA3ClMndW5pZm9ybScKcDgKc1ZsZWFmX3NpemUKcDkKSTEyCnMu,0.7620634920634922,0.10530929622307925,0.75,2018-01-31 15:10:54.287678,2018-01-31 15:10:54.364309,complete, +95,1,5,73.61.20.41,models/pollution_1-a0c7b0b5.model,metrics/pollution_1-a0c7b0b5.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTIKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMna2RfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtaW5rb3dza2knCnA2CnNWcApwNwpJMgpzVndlaWdodHMKcDgKUyd1bmlmb3JtJwpwOQpzVmxlYWZfc2l6ZQpwMTAKSTMwCnMu,0.6444444444444446,0.2061293089618784,0.75,2018-01-31 15:10:54.407206,2018-01-31 15:10:54.489261,complete, +96,1,3,73.61.20.41,models/pollution_1-4fa1e3bd.model,metrics/pollution_1-4fa1e3bd.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTE2CnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtYW5oYXR0YW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ3VuaWZvcm0nCnA4CnNWbGVhZl9zaXplCnA5CkkzMQpzLg==,0.6599999999999999,0.23108440016582682,0.5333333333333333,2018-01-31 15:10:54.539222,2018-01-31 15:10:54.621229,complete, +97,1,21,73.61.20.41,models/pollution_1-febb546c.model,metrics/pollution_1-febb546c.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEKc1Zfc2NhbGUKcDIKSTAxCnNWYWxnb3JpdGhtCnAzClMnYnJ1dGUnCnA0CnNWbWV0cmljCnA1ClMnbWlua293c2tpJwpwNgpzVnAKcDcKSTIKc1Z3ZWlnaHRzCnA4ClMnZGlzdGFuY2UnCnA5CnMu,0.7761904761904762,0.07276527042826729,0.7272727272727272,2018-01-31 15:10:54.674706,2018-01-31 15:10:54.756012,complete, +98,1,11,73.61.20.41,models/pollution_1-057275b5.model,metrics/pollution_1-057275b5.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTYKc1ZtZXRyaWMKcDIKUydtYW5oYXR0YW4nCnAzCnNWd2VpZ2h0cwpwNApTJ3VuaWZvcm0nCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmFsZ29yaXRobQpwNwpTJ2JydXRlJwpwOApzLg==,0.7784126984126984,0.1897382530917763,0.625,2018-01-31 15:10:54.815088,2018-01-31 15:10:54.907991,complete, +99,1,5,73.61.20.41,models/pollution_1-37836509.model,metrics/pollution_1-37836509.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTExCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2tkX3RyZWUnCnA0CnNWbWV0cmljCnA1ClMnbWlua293c2tpJwpwNgpzVnAKcDcKSTMKc1Z3ZWlnaHRzCnA4ClMndW5pZm9ybScKcDkKc1ZsZWFmX3NpemUKcDEwCkkxNwpzLg==,0.6823310023310023,0.20069647965226298,0.761904761904762,2018-01-31 15:10:54.961437,2018-01-31 15:10:55.057085,complete, +100,1,15,73.61.20.41,models/pollution_1-306065b8.model,metrics/pollution_1-306065b8.metric,KGRwMApWbl9uZWlnaGJvcnMKcDEKSTEwCnNWX3NjYWxlCnAyCkkwMQpzVmFsZ29yaXRobQpwMwpTJ2JhbGxfdHJlZScKcDQKc1ZtZXRyaWMKcDUKUydtYW5oYXR0YW4nCnA2CnNWd2VpZ2h0cwpwNwpTJ2Rpc3RhbmNlJwpwOApzVmxlYWZfc2l6ZQpwOQpJMzMKcy4=,0.6757142857142857,0.16779482125744025,0.8695652173913044,2018-01-31 15:10:55.107116,2018-01-31 15:10:55.192777,complete, +101,2,40,73.61.20.1,models/pollution_1-e63d583f.model,metrics/pollution_1-e63d583f.metric,KGRwMApWQwpwMQpGMC4wMDIxMTMxMzQ5Mjk3MzE3NDkKc1Z0b2wKcDIKRjQ5NzcuNjE3NDkwMzM2Njg1NQpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMApzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.0,0.0,0.0,2018-01-31 15:30:45.094949,2018-01-31 15:30:45.212737,complete, +102,2,40,73.61.20.1,models/pollution_1-61972bb2.model,metrics/pollution_1-61972bb2.metric,KGRwMApWQwpwMQpGOTc2NzAuMTM2MTQyODgxCnNWdG9sCnAyCkYyNzguMDcxMzU5MDU2MTA3MTYKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMApzVnBlbmFsdHkKcDQKUydsMicKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWZHVhbApwNwpJMDAKc1ZjbGFzc193ZWlnaHQKcDgKUydiYWxhbmNlZCcKcDkKcy4=,0.0,0.0,0.0,2018-01-31 15:30:45.255995,2018-01-31 15:30:45.363135,complete, +103,2,36,73.61.20.1,models/pollution_1-c86727f2.model,metrics/pollution_1-c86727f2.metric,KGRwMApWQwpwMQpGMzY2LjMxMDA2MTE1Mjg4MDczCnNWX3NjYWxlCnAyCkkwMQpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wxJwpwNQpzVnRvbApwNgpGMjUuNTk4NTIzNDM4ODQzMTQ1CnNWY2xhc3Nfd2VpZ2h0CnA3ClMnYmFsYW5jZWQnCnA4CnMu,0.0,0.0,0.0,2018-01-31 15:30:45.418209,2018-01-31 15:30:45.510494,complete, +104,2,40,73.61.20.1,models/pollution_1-a0bd7d35.model,metrics/pollution_1-a0bd7d35.metric,KGRwMApWQwpwMQpGMTQxODQuNDQyNTAyNDc1MzM4CnNWdG9sCnAyCkYyLjEyMTc3NDEzNjg5MjUyOQpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMApzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.7561038961038962,0.10909755700214088,0.7058823529411765,2018-01-31 15:30:45.555835,2018-01-31 15:30:45.648646,complete, +105,2,39,73.61.20.1,models/pollution_1-a7dce8a4.model,metrics/pollution_1-a7dce8a4.metric,KGRwMApWQwpwMQpGMTMuNjM5NzIwNjk1ODAzOTYKc1Z0b2wKcDIKRjEyLjQwMTMxMjYyMzQ0MzM1CnNWZml0X2ludGVyY2VwdApwMwpJMDAKc1ZwZW5hbHR5CnA0ClMnbDInCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmR1YWwKcDcKSTAxCnNWY2xhc3Nfd2VpZ2h0CnA4ClMnYmFsYW5jZWQnCnA5CnMu,0.573015873015873,0.3149263952366166,0.5882352941176471,2018-01-31 15:30:45.692114,2018-01-31 15:30:45.782035,complete, +106,2,36,73.61.20.1,models/pollution_1-93b98ce3.model,metrics/pollution_1-93b98ce3.metric,KGRwMApWQwpwMQpGMC4yNjY0ODk5MjcxOTE0NzQ0NgpzVl9zY2FsZQpwMgpJMDEKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMApzVnBlbmFsdHkKcDQKUydsMScKcDUKc1Z0b2wKcDYKRjEwOS41OTUwNzcxMjYxOTIwMQpzVmNsYXNzX3dlaWdodApwNwpTJ2JhbGFuY2VkJwpwOApzLg==,0.0,0.0,0.0,2018-01-31 15:30:45.819107,2018-01-31 15:30:45.907020,complete, +107,2,40,73.61.20.1,models/pollution_1-c798f10e.model,metrics/pollution_1-c798f10e.metric,KGRwMApWQwpwMQpGMi41NzY3MDY4ODYyMzU1NDk2CnNWdG9sCnAyCkYzODEyLjI4MTI0MzU0OTg0NjMKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMApzVnBlbmFsdHkKcDQKUydsMicKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWZHVhbApwNwpJMDAKc1ZjbGFzc193ZWlnaHQKcDgKUydiYWxhbmNlZCcKcDkKcy4=,0.0,0.0,0.0,2018-01-31 15:30:45.951932,2018-01-31 15:30:46.030495,complete, +108,2,40,73.61.20.1,models/pollution_1-17f4fff8.model,metrics/pollution_1-17f4fff8.metric,KGRwMApWQwpwMQpGMC41MzU1OTc0MjYwMzQwMTMyCnNWdG9sCnAyCkY3LjAwODkwMDIzNDY0NDU1OQpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMApzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.0,0.0,0.0,2018-01-31 15:30:46.068631,2018-01-31 15:30:46.147907,complete, +109,2,38,73.61.20.1,models/pollution_1-af06681a.model,metrics/pollution_1-af06681a.metric,KGRwMApWQwpwMQpGMi40ODAyODA0NTU2MjU3NzkyZS0wNQpzVnRvbApwMgpGMS4wNTkxOTAyNjQ2MjczMDc1CnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDInCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmR1YWwKcDcKSTAwCnNWY2xhc3Nfd2VpZ2h0CnA4ClMnYmFsYW5jZWQnCnA5CnMu,0.780952380952381,0.182201204473419,0.588235294117647,2018-01-31 15:30:46.206129,2018-01-31 15:30:46.290464,complete, +110,2,35,73.61.20.1,,,KGRwMApWQwpwMQpGMC4wMDQ2NzMyODEwMTQ3OTE5MDgKc1Zfc2NhbGUKcDIKSTAxCnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDEnCnA1CnNWdG9sCnA2CkYwLjA3NjYxMTc2OTUzNTM3OTIyCnNWY2xhc3Nfd2VpZ2h0CnA3ClMnYmFsYW5jZWQnCnA4CnMu,,,,2018-01-31 15:30:46.350814,,running, +111,2,37,73.61.20.1,models/pollution_1-d6103da8.model,metrics/pollution_1-d6103da8.metric,KGRwMApWQwpwMQpGMC4wMTE5MDg2MTY3Nzc4MDc3NjYKc1Z0b2wKcDIKRjQ5MDIwLjI3NjAxMTYwMTUxCnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDInCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmR1YWwKcDcKSTAxCnNWY2xhc3Nfd2VpZ2h0CnA4ClMnYmFsYW5jZWQnCnA5CnMu,0.7144444444444444,0.09935595069381407,0.6666666666666665,2018-01-31 15:30:56.739465,2018-01-31 15:30:56.836628,complete, +112,2,38,73.61.20.1,models/pollution_1-6fd7b306.model,metrics/pollution_1-6fd7b306.metric,KGRwMApWQwpwMQpGMC4wMTU3MDcxOTQ4MTk2ODkyNTgKc1Z0b2wKcDIKRjQuMzQ0MjQ4NTU0MTI2OTg2CnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDInCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmR1YWwKcDcKSTAwCnNWY2xhc3Nfd2VpZ2h0CnA4ClMnYmFsYW5jZWQnCnA5CnMu,0.0,0.0,0.0,2018-01-31 15:30:56.879862,2018-01-31 15:30:56.962056,complete, +113,2,37,73.61.20.1,models/pollution_1-227d5c78.model,metrics/pollution_1-227d5c78.metric,KGRwMApWQwpwMQpGMC4wMTc2Nzk4MzMyMjgxODc2OTcKc1Z0b2wKcDIKRjE5LjU2OTM5NjAyNjk1NTQzCnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDInCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmR1YWwKcDcKSTAxCnNWY2xhc3Nfd2VpZ2h0CnA4ClMnYmFsYW5jZWQnCnA5CnMu,0.722857142857143,0.14613384974336482,0.823529411764706,2018-01-31 15:30:57.010340,2018-01-31 15:30:57.118264,complete, +114,2,39,73.61.20.1,models/pollution_1-5c5ff34c.model,metrics/pollution_1-5c5ff34c.metric,KGRwMApWQwpwMQpGMTUuNDI1NzQzNDY3NDg2ODcxCnNWdG9sCnAyCkY0MzUzLjE3ODMzNTY4MDQyMgpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMQpzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.8692063492063493,0.07378203960042636,0.4210526315789474,2018-01-31 15:30:57.157569,2018-01-31 15:30:57.238131,complete, +115,2,35,73.61.20.1,models/pollution_1-eff23fad.model,metrics/pollution_1-eff23fad.metric,KGRwMApWQwpwMQpGMC4wOTkzMzY3MDkwMzczMTgwNQpzVl9zY2FsZQpwMgpJMDEKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMQpzVnBlbmFsdHkKcDQKUydsMScKcDUKc1Z0b2wKcDYKRjAuMDc4MjIxMTY4MTkzODY1MzUKc1ZjbGFzc193ZWlnaHQKcDcKUydiYWxhbmNlZCcKcDgKcy4=,0.26,0.332264954516723,0.75,2018-01-31 15:30:57.273225,2018-01-31 15:30:57.356413,complete, +116,2,40,73.61.20.1,models/pollution_1-aabb7fc3.model,metrics/pollution_1-aabb7fc3.metric,KGRwMApWQwpwMQpGMjAzLjYwNDIxMzg2ODE0MjQ3CnNWdG9sCnAyCkYxLjA5MjY5Mjk3NDk0MDYwMDcKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMApzVnBlbmFsdHkKcDQKUydsMicKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWZHVhbApwNwpJMDAKc1ZjbGFzc193ZWlnaHQKcDgKUydiYWxhbmNlZCcKcDkKcy4=,0.7455555555555555,0.18082117214611873,0.8181818181818182,2018-01-31 15:30:57.395670,2018-01-31 15:30:57.482071,complete, +117,2,39,73.61.20.1,models/pollution_1-c9bf9ea8.model,metrics/pollution_1-c9bf9ea8.metric,KGRwMApWQwpwMQpGMTcuNjU0NjQ0NzIyNTk4MzY2CnNWdG9sCnAyCkYwLjEwNDM3MDQ2OTkyMjY2NTczCnNWZml0X2ludGVyY2VwdApwMwpJMDAKc1ZwZW5hbHR5CnA0ClMnbDInCnA1CnNWX3NjYWxlCnA2CkkwMQpzVmR1YWwKcDcKSTAxCnNWY2xhc3Nfd2VpZ2h0CnA4ClMnYmFsYW5jZWQnCnA5CnMu,0.700952380952381,0.16583951966795693,0.75,2018-01-31 15:30:57.523304,2018-01-31 15:30:57.605148,complete, +118,2,36,73.61.20.1,models/pollution_1-ba4416f0.model,metrics/pollution_1-ba4416f0.metric,KGRwMApWQwpwMQpGMTU5MzkuNDM5NDY3OTA4ODA0CnNWX3NjYWxlCnAyCkkwMQpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wxJwpwNQpzVnRvbApwNgpGNTQ1Ljk3MDE0OTgwNDQ3NjQKc1ZjbGFzc193ZWlnaHQKcDcKUydiYWxhbmNlZCcKcDgKcy4=,0.0,0.0,0.0,2018-01-31 15:30:57.647078,2018-01-31 15:30:57.731458,complete, +119,2,40,73.61.20.1,models/pollution_1-2cf51dd6.model,metrics/pollution_1-2cf51dd6.metric,KGRwMApWQwpwMQpGMC4wOTgyNzkwNzU1MzIyMDkwOApzVnRvbApwMgpGMC40Njg4MDQ2NDU3MjMwMDMwNwpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMApzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.6787878787878788,0.21005977744236534,0.9,2018-01-31 15:30:57.774132,2018-01-31 15:30:57.855378,complete, +120,2,34,73.61.20.1,models/pollution_1-2a2b97e6.model,metrics/pollution_1-2a2b97e6.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjYwMjQ4NTc4NDE2OTc4OApzVm1pbl9zYW1wbGVzX3NwbGl0CnAyCkkzCnNWY3JpdGVyaW9uCnAzClMnZ2luaScKcDQKc1ZtYXhfZGVwdGgKcDUKSTcKc1ZtaW5fc2FtcGxlc19sZWFmCnA2CkkzCnMu,0.6933333333333334,0.11716995427984529,0.7500000000000001,2018-01-31 15:30:57.904087,2018-01-31 15:30:57.995673,complete, +121,2,35,73.61.20.1,models/pollution_1-29f4018a.model,metrics/pollution_1-29f4018a.metric,KGRwMApWQwpwMQpGMC4wMDAxMjA5MDQ3MDU0OTIwNjg3NwpzVl9zY2FsZQpwMgpJMDEKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMQpzVnBlbmFsdHkKcDQKUydsMScKcDUKc1Z0b2wKcDYKRjQ2NjcxLjgwNDMxNDA3NjcxCnNWY2xhc3Nfd2VpZ2h0CnA3ClMnYmFsYW5jZWQnCnA4CnMu,0.0,0.0,0.0,2018-01-31 15:30:58.041241,2018-01-31 15:30:58.127445,complete, +122,2,36,73.61.20.1,models/pollution_1-4fe5d3d1.model,metrics/pollution_1-4fe5d3d1.metric,KGRwMApWQwpwMQpGMC4wMDAzNDE4Mzc3Mjc4MDk0NTgwNQpzVl9zY2FsZQpwMgpJMDEKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMApzVnBlbmFsdHkKcDQKUydsMScKcDUKc1Z0b2wKcDYKRjAuMDAwMTMwOTg1MjQzODM5MTczOTIKc1ZjbGFzc193ZWlnaHQKcDcKUydiYWxhbmNlZCcKcDgKcy4=,0.0,0.0,0.0,2018-01-31 15:30:58.169125,2018-01-31 15:30:58.252901,complete, +123,2,37,73.61.20.1,,,KGRwMApWQwpwMQpGMC4wNTMwMDM5NTgxOTc0MzEzOTQKc1Z0b2wKcDIKRjE5LjU3NjU0MzYzMzU5OTQwMwpzVmZpdF9pbnRlcmNlcHQKcDMKSTAxCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMQpzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,,,,2018-01-31 15:30:58.297211,,running, +124,2,37,localhost,models/pollution_1-a94a103e.model,metrics/pollution_1-a94a103e.metric,KGRwMApWQwpwMQpGMS4xODE0NjA5Mjg2NDAzMjM3CnNWdG9sCnAyCkYxNC40OTk4OTYxODQ3MDI1MzIKc1ZmaXRfaW50ZXJjZXB0CnAzCkkwMQpzVnBlbmFsdHkKcDQKUydsMicKcDUKc1Zfc2NhbGUKcDYKSTAxCnNWZHVhbApwNwpJMDEKc1ZjbGFzc193ZWlnaHQKcDgKUydiYWxhbmNlZCcKcDkKcy4=,0.6433333333333333,0.2012737218593404,0.7499999999999999,2018-01-31 15:37:39.749518,2018-01-31 15:37:39.865153,complete, +125,2,33,localhost,models/pollution_1-a532b070.model,metrics/pollution_1-a532b070.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjE3NDYzMTg1ODUwMzU2MjI4CnNWbWluX3NhbXBsZXNfc3BsaXQKcDIKSTMKc1Zjcml0ZXJpb24KcDMKUydlbnRyb3B5JwpwNApzVm1heF9kZXB0aApwNQpJMwpzVm1pbl9zYW1wbGVzX2xlYWYKcDYKSTMKcy4=,0.5857142857142857,0.2101991281366023,0.7272727272727273,2018-01-31 15:37:39.911510,2018-01-31 15:37:39.991485,complete, +126,2,35,localhost,models/pollution_1-51a457d2.model,metrics/pollution_1-51a457d2.metric,KGRwMApWQwpwMQpGNy41MDM5NzgzNDEyODk1MTQKc1Zfc2NhbGUKcDIKSTAxCnNWZml0X2ludGVyY2VwdApwMwpJMDEKc1ZwZW5hbHR5CnA0ClMnbDEnCnA1CnNWdG9sCnA2CkYyMzcuNDAwODU5NjExNDc4NwpzVmNsYXNzX3dlaWdodApwNwpTJ2JhbGFuY2VkJwpwOApzLg==,0.0,0.0,0.0,2018-01-31 15:37:40.044548,2018-01-31 15:37:40.136608,complete, +127,2,34,localhost,models/pollution_1-7f6d4085.model,metrics/pollution_1-7f6d4085.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjg4NjQzMDI1MDkzMTA5NDIKc1ZtaW5fc2FtcGxlc19zcGxpdApwMgpJNApzVmNyaXRlcmlvbgpwMwpTJ2dpbmknCnA0CnNWbWF4X2RlcHRoCnA1Ckk5CnNWbWluX3NhbXBsZXNfbGVhZgpwNgpJMQpzLg==,0.7644444444444443,0.08618097524295704,0.5,2018-01-31 15:37:40.180470,2018-01-31 15:37:40.262599,complete, +128,2,33,localhost,models/pollution_1-fef7776a.model,metrics/pollution_1-fef7776a.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjk0MzAwNzg5MzY0OTMyMTEKc1ZtaW5fc2FtcGxlc19zcGxpdApwMgpJMwpzVmNyaXRlcmlvbgpwMwpTJ2VudHJvcHknCnA0CnNWbWF4X2RlcHRoCnA1CkkzCnNWbWluX3NhbXBsZXNfbGVhZgpwNgpJMwpzLg==,0.6644444444444444,0.12773429211733262,0.7826086956521738,2018-01-31 15:37:40.314530,2018-01-31 15:37:40.405947,complete, +129,2,34,localhost,models/pollution_1-5bd6b5d2.model,metrics/pollution_1-5bd6b5d2.metric,KGRwMApWbWF4X2ZlYXR1cmVzCnAxCkYwLjQ0MTg2NDA1MzcxMDE3NDcKc1ZtaW5fc2FtcGxlc19zcGxpdApwMgpJMgpzVmNyaXRlcmlvbgpwMwpTJ2dpbmknCnA0CnNWbWF4X2RlcHRoCnA1Ckk5CnNWbWluX3NhbXBsZXNfbGVhZgpwNgpJMQpzLg==,0.7551515151515151,0.10919860004417176,0.761904761904762,2018-01-31 15:37:40.451174,2018-01-31 15:37:40.532247,complete, +130,2,39,localhost,models/pollution_1-3427f8b0.model,metrics/pollution_1-3427f8b0.metric,KGRwMApWQwpwMQpGMjcuMTg0NzA1MzU4NjQwNDMyCnNWdG9sCnAyCkYwLjAwNTEwMTU0NTM5Mzc4MjUwNwpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wyJwpwNQpzVl9zY2FsZQpwNgpJMDEKc1ZkdWFsCnA3CkkwMQpzVmNsYXNzX3dlaWdodApwOApTJ2JhbGFuY2VkJwpwOQpzLg==,0.7666666666666666,0.12247448713915896,0.7142857142857143,2018-01-31 15:37:40.572025,2018-01-31 15:37:40.670665,complete, +131,2,36,localhost,models/pollution_1-3e877204.model,metrics/pollution_1-3e877204.metric,KGRwMApWQwpwMQpGMTQxOS4wMjg4NDk5ODUzMjcKc1Zfc2NhbGUKcDIKSTAxCnNWZml0X2ludGVyY2VwdApwMwpJMDAKc1ZwZW5hbHR5CnA0ClMnbDEnCnA1CnNWdG9sCnA2CkY0MzQ4OS43MzMzMjY5OTkyNzQKc1ZjbGFzc193ZWlnaHQKcDcKUydiYWxhbmNlZCcKcDgKcy4=,0.0,0.0,0.0,2018-01-31 15:37:40.715102,2018-01-31 15:37:40.798759,complete, +132,2,35,localhost,models/pollution_1-63d65260.model,metrics/pollution_1-63d65260.metric,KGRwMApWQwpwMQpGMS4zMzA5NDY5Mjc4MjMwNTgxCnNWX3NjYWxlCnAyCkkwMQpzVmZpdF9pbnRlcmNlcHQKcDMKSTAxCnNWcGVuYWx0eQpwNApTJ2wxJwpwNQpzVnRvbApwNgpGMC41NTUwMjAxNTk4MzcyMjIxCnNWY2xhc3Nfd2VpZ2h0CnA3ClMnYmFsYW5jZWQnCnA4CnMu,0.738095238095238,0.0702280057321554,0.8571428571428571,2018-01-31 15:37:40.841971,2018-01-31 15:37:40.925258,complete, +133,2,36,localhost,models/pollution_1-f2d0ad04.model,metrics/pollution_1-f2d0ad04.metric,KGRwMApWQwpwMQpGMC4xMjYyNTQzOTQ4OTMxMTk2CnNWX3NjYWxlCnAyCkkwMQpzVmZpdF9pbnRlcmNlcHQKcDMKSTAwCnNWcGVuYWx0eQpwNApTJ2wxJwpwNQpzVnRvbApwNgpGMC4wMDAxMzI5OTE4NTg1NzI1NjIxNwpzVmNsYXNzX3dlaWdodApwNwpTJ2JhbGFuY2VkJwpwOApzLg==,0.7775324675324675,0.11609227403407643,0.5,2018-01-31 15:37:40.970800,2018-01-31 15:37:41.056580,complete, diff --git a/atm/data/modelhub/test/dataruns.csv b/atm/data/modelhub/test/dataruns.csv new file mode 100644 index 0000000..8c2a4bc --- /dev/null +++ b/atm/data/modelhub/test/dataruns.csv @@ -0,0 +1,3 @@ +id,dataset_id,description,priority,selector,k_window,tuner,gridding,r_minimum,budget_type,budget,deadline,metric,score_target,start_time,end_time,status +1,1,uniform__uniform,1,uniform,3,uniform,0,2,classifier,100,,f1,cv_judgment_metric,2018-01-31 15:09:41.912935,2018-01-31 15:10:55.234515,complete +2,1,uniform__uniform,1,uniform,3,uniform,0,2,classifier,100,,f1,cv_judgment_metric,2018-01-31 15:30:44.770449,,running diff --git a/atm/data/modelhub/test/datasets.csv b/atm/data/modelhub/test/datasets.csv new file mode 100644 index 0000000..39c2439 --- /dev/null +++ b/atm/data/modelhub/test/datasets.csv @@ -0,0 +1,2 @@ +id,name,class_column,train_path,test_path,description,n_examples,k_classes,d_features,majority,size_kb +1,pollution_1,class,/home/bcyphers/work/fl/atm/atm/data/test/pollution_1.csv,,,60,2,15,0.5166666666666667,7 diff --git a/atm/data/modelhub/test/hyperpartitions.csv b/atm/data/modelhub/test/hyperpartitions.csv new file mode 100644 index 0000000..7de38c8 --- /dev/null +++ b/atm/data/modelhub/test/hyperpartitions.csv @@ -0,0 +1,41 @@ +id,datarun_id,method,categorical_hyperparameters_64,tunable_hyperparameters_64,constant_hyperparameters_64,status +1,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMnYmFsbF90cmVlJwpwNQp0cDYKYShWbWV0cmljCnA3ClMnbWlua293c2tpJwpwOAp0cDkKYS4=,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEoVnAKcDIwCmcyCihnMwpnNApOdHAyMQpScDIyCihkcDIzCmc4CihscDI0CkkxCmFJMwphc2cxMApWaW50CnAyNQpzYnRwMjYKYS4=,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +2,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMnYmFsbF90cmVlJwpwNQp0cDYKYShWbWV0cmljCnA3ClMnZXVjbGlkZWFuJwpwOAp0cDkKYS4=,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +3,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMnYmFsbF90cmVlJwpwNQp0cDYKYShWbWV0cmljCnA3ClMnbWFuaGF0dGFuJwpwOAp0cDkKYS4=,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +4,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMnYmFsbF90cmVlJwpwNQp0cDYKYShWbWV0cmljCnA3ClMnY2hlYnlzaGV2JwpwOAp0cDkKYS4=,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +5,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMna2RfdHJlZScKcDUKdHA2CmEoVm1ldHJpYwpwNwpTJ21pbmtvd3NraScKcDgKdHA5CmEu,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEoVnAKcDIwCmcyCihnMwpnNApOdHAyMQpScDIyCihkcDIzCmc4CihscDI0CkkxCmFJMwphc2cxMApWaW50CnAyNQpzYnRwMjYKYS4=,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +6,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMna2RfdHJlZScKcDUKdHA2CmEoVm1ldHJpYwpwNwpTJ2V1Y2xpZGVhbicKcDgKdHA5CmEu,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +7,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMna2RfdHJlZScKcDUKdHA2CmEoVm1ldHJpYwpwNwpTJ21hbmhhdHRhbicKcDgKdHA5CmEu,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +8,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMna2RfdHJlZScKcDUKdHA2CmEoVm1ldHJpYwpwNwpTJ2NoZWJ5c2hldicKcDgKdHA5CmEu,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +9,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMnYnJ1dGUnCnA1CnRwNgphKFZtZXRyaWMKcDcKUydtaW5rb3dza2knCnA4CnRwOQphLg==,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZwCnAxMwpnMgooZzMKZzQKTnRwMTQKUnAxNQooZHAxNgpnOAoobHAxNwpJMQphSTMKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +10,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMnYnJ1dGUnCnA1CnRwNgphKFZtZXRyaWMKcDcKUydldWNsaWRlYW4nCnA4CnRwOQphLg==,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphLg==,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +11,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMnYnJ1dGUnCnA1CnRwNgphKFZtZXRyaWMKcDcKUydtYW5oYXR0YW4nCnA4CnRwOQphLg==,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphLg==,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +12,1,knn,KGxwMAooVndlaWdodHMKcDEKUyd1bmlmb3JtJwpwMgp0cDMKYShWYWxnb3JpdGhtCnA0ClMnYnJ1dGUnCnA1CnRwNgphKFZtZXRyaWMKcDcKUydjaGVieXNoZXYnCnA4CnRwOQphLg==,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphLg==,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +13,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2JhbGxfdHJlZScKcDUKdHA2CmEoVm1ldHJpYwpwNwpTJ21pbmtvd3NraScKcDgKdHA5CmEu,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEoVnAKcDIwCmcyCihnMwpnNApOdHAyMQpScDIyCihkcDIzCmc4CihscDI0CkkxCmFJMwphc2cxMApWaW50CnAyNQpzYnRwMjYKYS4=,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +14,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2JhbGxfdHJlZScKcDUKdHA2CmEoVm1ldHJpYwpwNwpTJ2V1Y2xpZGVhbicKcDgKdHA5CmEu,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +15,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2JhbGxfdHJlZScKcDUKdHA2CmEoVm1ldHJpYwpwNwpTJ21hbmhhdHRhbicKcDgKdHA5CmEu,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +16,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2JhbGxfdHJlZScKcDUKdHA2CmEoVm1ldHJpYwpwNwpTJ2NoZWJ5c2hldicKcDgKdHA5CmEu,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +17,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2tkX3RyZWUnCnA1CnRwNgphKFZtZXRyaWMKcDcKUydtaW5rb3dza2knCnA4CnRwOQphLg==,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEoVnAKcDIwCmcyCihnMwpnNApOdHAyMQpScDIyCihkcDIzCmc4CihscDI0CkkxCmFJMwphc2cxMApWaW50CnAyNQpzYnRwMjYKYS4=,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +18,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2tkX3RyZWUnCnA1CnRwNgphKFZtZXRyaWMKcDcKUydldWNsaWRlYW4nCnA4CnRwOQphLg==,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +19,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2tkX3RyZWUnCnA1CnRwNgphKFZtZXRyaWMKcDcKUydtYW5oYXR0YW4nCnA4CnRwOQphLg==,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +20,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2tkX3RyZWUnCnA1CnRwNgphKFZtZXRyaWMKcDcKUydjaGVieXNoZXYnCnA4CnRwOQphLg==,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZsZWFmX3NpemUKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkxCmFJNTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +21,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2JydXRlJwpwNQp0cDYKYShWbWV0cmljCnA3ClMnbWlua293c2tpJwpwOAp0cDkKYS4=,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphKFZwCnAxMwpnMgooZzMKZzQKTnRwMTQKUnAxNQooZHAxNgpnOAoobHAxNwpJMQphSTMKYXNnMTAKVmludApwMTgKc2J0cDE5CmEu,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +22,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2JydXRlJwpwNQp0cDYKYShWbWV0cmljCnA3ClMnZXVjbGlkZWFuJwpwOAp0cDkKYS4=,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphLg==,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +23,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2JydXRlJwpwNQp0cDYKYShWbWV0cmljCnA3ClMnbWFuaGF0dGFuJwpwOAp0cDkKYS4=,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphLg==,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +24,1,knn,KGxwMAooVndlaWdodHMKcDEKUydkaXN0YW5jZScKcDIKdHAzCmEoVmFsZ29yaXRobQpwNApTJ2JydXRlJwpwNQp0cDYKYShWbWV0cmljCnA3ClMnY2hlYnlzaGV2JwpwOAp0cDkKYS4=,KGxwMAooVm5fbmVpZ2hib3JzCnAxCmNjb3B5X3JlZwpfcmVjb25zdHJ1Y3RvcgpwMgooY2J0Yi5oeXBlcl9wYXJhbWV0ZXIKSHlwZXJQYXJhbWV0ZXIKcDMKY19fYnVpbHRpbl9fCm9iamVjdApwNApOdHA1ClJwNgooZHA3ClMncmFuZ2UnCnA4CihscDkKSTEKYUkyMAphc1MndHlwZScKcDEwClZpbnQKcDExCnNidHAxMgphLg==,KGxwMAooVl9zY2FsZQpwMQpJMDEKdHAyCmEu,incomplete +25,1,dt,KGxwMAooVmNyaXRlcmlvbgpwMQpTJ2VudHJvcHknCnAyCnRwMwphLg==,KGxwMAooVm1heF9mZWF0dXJlcwpwMQpjY29weV9yZWcKX3JlY29uc3RydWN0b3IKcDIKKGNidGIuaHlwZXJfcGFyYW1ldGVyCkh5cGVyUGFyYW1ldGVyCnAzCmNfX2J1aWx0aW5fXwpvYmplY3QKcDQKTnRwNQpScDYKKGRwNwpTJ3JhbmdlJwpwOAoobHA5CkYwLjEKYUYxLjAKYXNTJ3R5cGUnCnAxMApWZmxvYXQKcDExCnNidHAxMgphKFZtYXhfZGVwdGgKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkyCmFJMTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEoVm1pbl9zYW1wbGVzX3NwbGl0CnAyMApnMgooZzMKZzQKTnRwMjEKUnAyMgooZHAyMwpnOAoobHAyNApJMgphSTQKYXNnMTAKVmludApwMjUKc2J0cDI2CmEoVm1pbl9zYW1wbGVzX2xlYWYKcDI3CmcyCihnMwpnNApOdHAyOApScDI5CihkcDMwCmc4CihscDMxCkkxCmFJMwphc2cxMApWaW50CnAzMgpzYnRwMzMKYS4=,KGxwMAou,incomplete +26,1,dt,KGxwMAooVmNyaXRlcmlvbgpwMQpTJ2dpbmknCnAyCnRwMwphLg==,KGxwMAooVm1heF9mZWF0dXJlcwpwMQpjY29weV9yZWcKX3JlY29uc3RydWN0b3IKcDIKKGNidGIuaHlwZXJfcGFyYW1ldGVyCkh5cGVyUGFyYW1ldGVyCnAzCmNfX2J1aWx0aW5fXwpvYmplY3QKcDQKTnRwNQpScDYKKGRwNwpTJ3JhbmdlJwpwOAoobHA5CkYwLjEKYUYxLjAKYXNTJ3R5cGUnCnAxMApWZmxvYXQKcDExCnNidHAxMgphKFZtYXhfZGVwdGgKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkyCmFJMTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEoVm1pbl9zYW1wbGVzX3NwbGl0CnAyMApnMgooZzMKZzQKTnRwMjEKUnAyMgooZHAyMwpnOAoobHAyNApJMgphSTQKYXNnMTAKVmludApwMjUKc2J0cDI2CmEoVm1pbl9zYW1wbGVzX2xlYWYKcDI3CmcyCihnMwpnNApOdHAyOApScDI5CihkcDMwCmc4CihscDMxCkkxCmFJMwphc2cxMApWaW50CnAzMgpzYnRwMzMKYS4=,KGxwMAou,incomplete +27,1,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMScKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAxCnRwNQphLg==,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +28,1,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMScKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAwCnRwNQphLg==,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +29,1,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMicKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAxCnRwNQphKFZkdWFsCnA2CkkwMQp0cDcKYS4=,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +30,1,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMicKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAxCnRwNQphKFZkdWFsCnA2CkkwMAp0cDcKYS4=,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +31,1,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMicKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAwCnRwNQphKFZkdWFsCnA2CkkwMQp0cDcKYS4=,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +32,1,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMicKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAwCnRwNQphKFZkdWFsCnA2CkkwMAp0cDcKYS4=,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +33,2,dt,KGxwMAooVmNyaXRlcmlvbgpwMQpTJ2VudHJvcHknCnAyCnRwMwphLg==,KGxwMAooVm1heF9mZWF0dXJlcwpwMQpjY29weV9yZWcKX3JlY29uc3RydWN0b3IKcDIKKGNidGIuaHlwZXJfcGFyYW1ldGVyCkh5cGVyUGFyYW1ldGVyCnAzCmNfX2J1aWx0aW5fXwpvYmplY3QKcDQKTnRwNQpScDYKKGRwNwpTJ3JhbmdlJwpwOAoobHA5CkYwLjEKYUYxLjAKYXNTJ3R5cGUnCnAxMApWZmxvYXQKcDExCnNidHAxMgphKFZtYXhfZGVwdGgKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkyCmFJMTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEoVm1pbl9zYW1wbGVzX3NwbGl0CnAyMApnMgooZzMKZzQKTnRwMjEKUnAyMgooZHAyMwpnOAoobHAyNApJMgphSTQKYXNnMTAKVmludApwMjUKc2J0cDI2CmEoVm1pbl9zYW1wbGVzX2xlYWYKcDI3CmcyCihnMwpnNApOdHAyOApScDI5CihkcDMwCmc4CihscDMxCkkxCmFJMwphc2cxMApWaW50CnAzMgpzYnRwMzMKYS4=,KGxwMAou,incomplete +34,2,dt,KGxwMAooVmNyaXRlcmlvbgpwMQpTJ2dpbmknCnAyCnRwMwphLg==,KGxwMAooVm1heF9mZWF0dXJlcwpwMQpjY29weV9yZWcKX3JlY29uc3RydWN0b3IKcDIKKGNidGIuaHlwZXJfcGFyYW1ldGVyCkh5cGVyUGFyYW1ldGVyCnAzCmNfX2J1aWx0aW5fXwpvYmplY3QKcDQKTnRwNQpScDYKKGRwNwpTJ3JhbmdlJwpwOAoobHA5CkYwLjEKYUYxLjAKYXNTJ3R5cGUnCnAxMApWZmxvYXQKcDExCnNidHAxMgphKFZtYXhfZGVwdGgKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkkyCmFJMTAKYXNnMTAKVmludApwMTgKc2J0cDE5CmEoVm1pbl9zYW1wbGVzX3NwbGl0CnAyMApnMgooZzMKZzQKTnRwMjEKUnAyMgooZHAyMwpnOAoobHAyNApJMgphSTQKYXNnMTAKVmludApwMjUKc2J0cDI2CmEoVm1pbl9zYW1wbGVzX2xlYWYKcDI3CmcyCihnMwpnNApOdHAyOApScDI5CihkcDMwCmc4CihscDMxCkkxCmFJMwphc2cxMApWaW50CnAzMgpzYnRwMzMKYS4=,KGxwMAou,incomplete +35,2,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMScKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAxCnRwNQphLg==,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +36,2,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMScKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAwCnRwNQphLg==,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +37,2,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMicKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAxCnRwNQphKFZkdWFsCnA2CkkwMQp0cDcKYS4=,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +38,2,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMicKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAxCnRwNQphKFZkdWFsCnA2CkkwMAp0cDcKYS4=,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +39,2,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMicKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAwCnRwNQphKFZkdWFsCnA2CkkwMQp0cDcKYS4=,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete +40,2,logreg,KGxwMAooVnBlbmFsdHkKcDEKUydsMicKcDIKdHAzCmEoVmZpdF9pbnRlcmNlcHQKcDQKSTAwCnRwNQphKFZkdWFsCnA2CkkwMAp0cDcKYS4=,KGxwMAooVkMKcDEKY2NvcHlfcmVnCl9yZWNvbnN0cnVjdG9yCnAyCihjYnRiLmh5cGVyX3BhcmFtZXRlcgpIeXBlclBhcmFtZXRlcgpwMwpjX19idWlsdGluX18Kb2JqZWN0CnA0Ck50cDUKUnA2CihkcDcKUydyYW5nZScKcDgKKGxwOQpGMWUtMDUKYUYxMDAwMDAuMAphc1MndHlwZScKcDEwClZmbG9hdF9leHAKcDExCnNidHAxMgphKFZ0b2wKcDEzCmcyCihnMwpnNApOdHAxNApScDE1CihkcDE2Cmc4CihscDE3CkYxZS0wNQphRjEwMDAwMC4wCmFzZzEwClZmbG9hdF9leHAKcDE4CnNidHAxOQphLg==,KGxwMAooVmNsYXNzX3dlaWdodApwMQpTJ2JhbGFuY2VkJwpwMgp0cDMKYShWX3NjYWxlCnA0CkkwMQp0cDUKYS4=,incomplete diff --git a/atm/database.py b/atm/database.py index 2257f26..a8ad891 100644 --- a/atm/database.py +++ b/atm/database.py @@ -303,7 +303,7 @@ def to_csv(self, path): """ for table in ['datasets', 'dataruns', 'hyperpartitions', 'classifiers']: df = pd.read_sql('SELECT * FROM %s' % table, self.session.bind) - df.to_csv(os.path.join(path, '%s.csv' % table)) + df.to_csv(os.path.join(path, '%s.csv' % table), index=False) @try_with_session(commit=True) def from_csv(self, path): diff --git a/atm/tests/unit_tests/test_worker.py b/atm/tests/unit_tests/test_worker.py index 7bcbef9..153763f 100644 --- a/atm/tests/unit_tests/test_worker.py +++ b/atm/tests/unit_tests/test_worker.py @@ -1,10 +1,13 @@ +import os import pytest +from atm import PROJECT_ROOT from atm.worker import Worker from btb.tuning import GCP from btb.selection import HierarchicalByAlgorithm +DB_CACHE_PATH = os.path.join(PROJECT_ROOT, 'data/modelhub/test/') DB_PATH = '/tmp/atm.db' DT_PARAMS = {'criterion': 'gini', 'max_features': 0.5, 'max_depth': 3, @@ -14,7 +17,9 @@ @pytest.fixture def db(): os.remove(DB_PATH) - return Database(dialect='sqlite', database=DB_PATH) + db = Database(dialect='sqlite', database=DB_PATH) + # load cached ModelHub state + db.read_csv(DB_CACHE_PATH) @pytest.fixture @@ -23,8 +28,8 @@ def dataset(db): @pytest.fixture -def datarun(): - return db.get_datarun(1) +def datarun(db): + return db.get_datarun(2) @pytest.fixture @@ -34,7 +39,12 @@ def model(datarun): label_column=datarun.label_column) -def get_worker(db, dataset, **kwargs): +@pytest.fixture +def worker(db, datarun): + return Worker(db, datarun) + + +def get_new_worker(db, dataset, **kwargs): kwargs['methods'] = kwargs.get('methods', ['logreg', 'dt']) run_conf = RunConfig(**kwargs) datarun = create_datarun(db, dataset, run_conf) @@ -42,8 +52,8 @@ def get_worker(db, dataset, **kwargs): def test_load_selector_and_tuner(db, dataset): - worker = get_worker(db, dataset, selector='hieralg', k_window=7, - tuner='gcp', r_minimum=7, gridding=3) + worker = get_new_worker(db, dataset, selector='hieralg', k_window=7, + tuner='gcp', r_minimum=7, gridding=3) assert type(worker.selector) == HierarchicalByAlgorithm assert len(worker.selector.choices) == 6 assert worker.selector.k == 7 @@ -54,18 +64,17 @@ def test_load_selector_and_tuner(db, dataset): def test_load_custom_selector_and_tuner(db, dataset): tuner_path = './mytuner.py' selector_path = './myselector.py' - worker = get_worker(db, dataset, selector=selector_path + ':MySelector', - tuner=tuner_path + ':MyTuner') + worker = get_new_worker(db, dataset, selector=selector_path + ':MySelector', + tuner=tuner_path + ':MyTuner') assert isinstance(worker.selector, CustomSelector) assert issubclass(worker.Tuner, CustomTuner) -def test_select_and_tune(): +def test_select_and_tune(worker): """ This won't test that BTB is working correctly, just that the ATM-BTB connection is working. """ - worker = get_worker(db, dataset, selector='BestK', k_window=5) part = worker.select_hyperpartition() params = worker.tune_hyperparameters(part) From e8a6e3622c791e69722b53b744b8d6c80c209ac6 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Thu, 1 Feb 2018 14:16:24 -0500 Subject: [PATCH 23/36] fix to_csv and from_csv functions... ugh --- atm/database.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/atm/database.py b/atm/database.py index a8ad891..b368642 100644 --- a/atm/database.py +++ b/atm/database.py @@ -5,10 +5,12 @@ from operator import attrgetter from sqlalchemy import (Column, DateTime, Enum, ForeignKey, Integer, MetaData, - Numeric, String, Text, and_, create_engine, func) + Numeric, String, Text, and_, create_engine, func, + inspect) from sqlalchemy.engine.url import URL from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship, sessionmaker +from sqlalchemy.orm.properties import ColumnProperty from .constants import * from .utilities import * @@ -311,9 +313,30 @@ def from_csv(self, path): Load a snapshot of the ModelHub database from a set of CSVs in the given directory. """ - for table in ['datasets', 'dataruns', 'hyperpartitions', 'classifiers']: - df = pd.read_csv(os.path.join(path, '%s.csv' % table)) + for model, table in [(self.Dataset, 'dataset'), + (self.Datarun, 'datarun'), + (self.Hyperpartition, 'hyperpartition'), + (self.Classifier, 'classifier')]: + df = pd.read_csv(os.path.join(path, '%ss.csv' % table)) + + # parse datetime columns. This is necessary because SQLAlchemy can't + # interpret strings as datetimes on its own. + # yes, this is the easiest way to do it + for c in inspect(model).attrs: + if type(c) != ColumnProperty: + continue + col = c.columns[0] + if type(col.type) == DateTime: + df[c.key] = pd.to_datetime(df[c.key], + infer_datetime_format=True) + for _, r in df.iterrows(): + # replace NaN and NaT with None + for k, v in r.items(): + if pd.isnull(v): + r[k] = None + + # insert the row into the database create_func = getattr(self, 'create_%s' % table) create_func(**r) From d68d9dd038da51396b3bf33e624c7d3b511849ad Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Thu, 1 Feb 2018 17:37:05 -0500 Subject: [PATCH 24/36] some worker tests run, others don't --- atm/tests/unit_tests/test_enter_data.py | 17 +-- atm/tests/unit_tests/test_worker.py | 140 +++++++++++++++++++----- atm/worker.py | 10 +- 3 files changed, 130 insertions(+), 37 deletions(-) diff --git a/atm/tests/unit_tests/test_enter_data.py b/atm/tests/unit_tests/test_enter_data.py index 7e6f845..8da3900 100644 --- a/atm/tests/unit_tests/test_enter_data.py +++ b/atm/tests/unit_tests/test_enter_data.py @@ -10,6 +10,7 @@ DB_PATH = '/tmp/atm.db' +DB_CACHE_PATH = os.path.join(PROJECT_ROOT, 'data/modelhub/test/') DATA_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/downloaded/' BASELINE_PATH = os.path.join(PROJECT_ROOT, 'data/baselines/best_so_far/') BASELINE_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/best_so_far/' @@ -33,17 +34,19 @@ @pytest.fixture def db(): - return Database(dialect='sqlite', database=DB_PATH) + if os.path.exists(DB_PATH): + os.remove(DB_PATH) + db = Database(dialect='sqlite', database=DB_PATH) + # load cached ModelHub state. This database snapshot has one dataset + # (pollution_1.csv) and two dataruns, one complete and one with 33/100 + # classifiers finished. + db.from_csv(DB_CACHE_PATH) + return db @pytest.fixture def dataset(db): - ds = db.get_dataset(1) - if ds: - return ds - else: - data_path = os.path.join(PROJECT_ROOT, 'data/test/pollution_1.csv') - return create_dataset(db, 'class', data_path) + return db.get_dataset(1) def test_create_dataset(db): diff --git a/atm/tests/unit_tests/test_worker.py b/atm/tests/unit_tests/test_worker.py index 153763f..356dff0 100644 --- a/atm/tests/unit_tests/test_worker.py +++ b/atm/tests/unit_tests/test_worker.py @@ -1,14 +1,27 @@ +import datetime +import mock +import numpy as np import os import pytest +import random +from mock import patch, Mock from atm import PROJECT_ROOT +from atm.config import RunConfig +from atm.constants import TIME_FMT, METRICS_BINARY +from atm.database import ClassifierStatus, Database, db_session +from atm.enter_data import create_datarun +from atm.model import Model +from atm.utilities import get_local_data_path from atm.worker import Worker -from btb.tuning import GCP -from btb.selection import HierarchicalByAlgorithm +from btb.tuning import GP, GPEi, Tuner +from btb.selection import BestKReward, HierarchicalByAlgorithm, Selector DB_CACHE_PATH = os.path.join(PROJECT_ROOT, 'data/modelhub/test/') DB_PATH = '/tmp/atm.db' +METRIC_DIR = '/tmp/metrics/' +MODEL_DIR = '/tmp/models/' DT_PARAMS = {'criterion': 'gini', 'max_features': 0.5, 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 1} @@ -18,25 +31,32 @@ def db(): os.remove(DB_PATH) db = Database(dialect='sqlite', database=DB_PATH) - # load cached ModelHub state - db.read_csv(DB_CACHE_PATH) + # load cached ModelHub state. This database snapshot has one dataset + # (pollution_1.csv) and two dataruns, one complete and one with 33/100 + # classifiers finished. + db.from_csv(DB_CACHE_PATH) + return db @pytest.fixture def dataset(db): + # return the only dataset return db.get_dataset(1) @pytest.fixture def datarun(db): + # return the unfinished datarun return db.get_datarun(2) @pytest.fixture -def model(datarun): - return Model(method=dt, params=DT_PARAMS, - judgment_metric='cv_judgment_metric', - label_column=datarun.label_column) +def model(dataset): + model = Model(method='dt', params=DT_PARAMS, + judgment_metric='cv_judgment_metric', + class_column=dataset.class_column) + train_path, _ = get_local_data_path(dataset.train_path) + model.train_test(train_path=train_path) @pytest.fixture @@ -53,48 +73,118 @@ def get_new_worker(db, dataset, **kwargs): def test_load_selector_and_tuner(db, dataset): worker = get_new_worker(db, dataset, selector='hieralg', k_window=7, - tuner='gcp', r_minimum=7, gridding=3) + tuner='gp', r_minimum=7, gridding=3) assert type(worker.selector) == HierarchicalByAlgorithm assert len(worker.selector.choices) == 6 assert worker.selector.k == 7 assert worker.selector.by_algorithm['logreg'] == 4 - assert worker.Tuner == GCP + assert worker.Tuner == GP def test_load_custom_selector_and_tuner(db, dataset): - tuner_path = './mytuner.py' - selector_path = './myselector.py' + tuner_path = os.path.join(PROJECT_ROOT, 'tests/utilities/mytuner.py') + selector_path = os.path.join(PROJECT_ROOT, 'tests/utilities/myselector.py') worker = get_new_worker(db, dataset, selector=selector_path + ':MySelector', tuner=tuner_path + ':MyTuner') - assert isinstance(worker.selector, CustomSelector) - assert issubclass(worker.Tuner, CustomTuner) + assert isinstance(worker.selector, Selector) + assert issubclass(worker.Tuner, Tuner) -def test_select_and_tune(worker): +def test_select_hyperpartition(worker): """ This won't test that BTB is working correctly, just that the ATM-BTB connection is working. """ - part = worker.select_hyperpartition() - params = worker.tune_hyperparameters(part) + worker.db.get_hyperpartitions = Mock(return_value=[Mock(id=1)]) + clf_mock = Mock(hyperpartition_id=1, cv_judgment_metric=0.5) + worker.db.get_classifiers = Mock(return_value=[clf_mock]) + worker.selector.select = Mock(return_value=1) + hp = worker.select_hyperpartition() + + worker.selector.select.assert_called_with({1: [0.5]}) + assert hp.id == 1 + + +def test_tune_hyperparameters(worker): + """ + This won't test that BTB is working correctly, just that the ATM-BTB + connection is working. + """ + hp = worker.db.get_hyperpartition(1) + clfs = worker.db.get_classifiers(hyperpartition_id=1) + + mock_tuner = Mock() + mock_tuner.fit = Mock() + mock_tuner.propose = Mock(return_value=[]) + + worker.Tuner = mock_tuner + params = worker.tune_hyperparameters(hp) + + mock_tuner.assert_called_with(tunables=hp.tunables, + gridding=worker.datarun.gridding, + r_minimum=worker.datarun.r_minimum) + mock_turner.fit.assert_called() + mock_turner.propose.assert_called() + + + worker.selector.select.assert_called_with({1: [0.5]}) + assert hp.id == 1 -def test_tune_hyperparameters(): - pass def test_test_classifier(db, dataset): - worker = get_worker(db, dataset, save_files=True) + metric = 'roc_auc' + worker = get_new_worker(db, dataset, metric=metric, score_target='mu_sigma') + model, metrics = worker.test_classifier(method='dt', params=DT_PARAMS) + judge_mets = [m[metric] for m in metrics['cv']] -def test_save_classifier(db, dataset, model): - worker = get_worker(db, dataset, save_files=True) - worker.save_classifier(1, ) + assert type(model) == Model + assert model.judgment_metric == metric + assert model.cv_judgment_metric == np.mean(judge_mets) + assert model.cv_judgment_metric_stdev == np.std(judge_mets) -def test_is_datarun_finished(): - pass +def test_save_classifier(db, datarun, model): + worker = Worker(db, datarun, model_dir=MODEL_DIR, metric_dir=METRIC_DIR) + hp = db.get_hyperpartitions(datarun_id=worker.datarun.id)[0] + classifier = worker.db.start_classifier(hyperpartition_id=hp.id, + datarun_id=worker.datarun.id, + host='localhost', + hyperparameter_values=DT_PARAMS) + metrics = {'cv': [{k: random.random() for k in METRICS_BINARY} + for i in range(5)], + 'test': {k: random.random() for k in METRICS_BINARY}} + + worker.db.complete_classifier = Mock() + worker.save_classifier(classifier.id, model, metrics) + worker.db.complete_classifier.assert_called() + + with db_session(worker.db): + clf = db.get_classifier(classifier.id) + + loaded = load_model(clf, MODEL_DIR) + assert type(loaded) == Model + assert loaded.method == model.method + assert loaded.random_state == model.random_state + + assert load_metrics(clf, METRIC_DIR) == metrics def test_run_classifier(): pass + + +def test_is_datarun_finished(db, dataset, datarun): + r1 = db.get_datarun(1) + worker = Worker(db, r1) + assert worker.is_datarun_finished() + + r2 = db.get_datarun(2) + worker = Worker(db, r2) + assert not worker.is_datarun_finished() + + deadline = (datetime.datetime.now() - datetime.timedelta(seconds=1)).strftime(TIME_FMT) + worker = get_new_worker(db, dataset, deadline=deadline) + assert worker.is_datarun_finished() diff --git a/atm/worker.py b/atm/worker.py index 5d8701a..7d96bbd 100755 --- a/atm/worker.py +++ b/atm/worker.py @@ -179,14 +179,14 @@ def tune_hyperparameters(self, hyperpartition): # Get previously-used parameters: every classifier should either be # completed or have thrown an error all_clfs = self.db.get_classifiers(hyperpartition_id=hyperpartition.id) - classifiers = [l for l in all_clfs - if l.status == ClassifierStatus.COMPLETE] + classifiers = [c for c in all_clfs + if c.status == ClassifierStatus.COMPLETE] # Extract parameters and scores as numpy arrays from classifiers - X = params_to_vectors([l.hyperparameter_values for l in classifiers], + X = params_to_vectors([c.hyperparameter_values for c in classifiers], tunables) - y = np.array([float(getattr(l, self.datarun.score_target)) - for l in classifiers]) + y = np.array([float(getattr(c, self.datarun.score_target)) + for c in classifiers]) # Initialize the tuner and propose a new set of parameters # this has to be initialized with information from the hyperpartition, so we From 85e14d564f62708d11d902f179873d636861585f Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Thu, 1 Feb 2018 17:37:38 -0500 Subject: [PATCH 25/36] add custom selector and tuner for testing --- atm/tests/utilities/myselector.py | 8 ++++++++ atm/tests/utilities/mytuner.py | 12 ++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 atm/tests/utilities/myselector.py create mode 100644 atm/tests/utilities/mytuner.py diff --git a/atm/tests/utilities/myselector.py b/atm/tests/utilities/myselector.py new file mode 100644 index 0000000..17ce1ed --- /dev/null +++ b/atm/tests/utilities/myselector.py @@ -0,0 +1,8 @@ +from btb.selection import Selector +import random + + +class MySelector(Selector): + def select(self, choice_scores): + """ Select a choice uniformly at random. """ + return self.choices[random.randint(0, len(self.choices) - 1)] diff --git a/atm/tests/utilities/mytuner.py b/atm/tests/utilities/mytuner.py new file mode 100644 index 0000000..e1b0b97 --- /dev/null +++ b/atm/tests/utilities/mytuner.py @@ -0,0 +1,12 @@ +from btb.tuning import Tuner + + +class MyTuner(Tuner): + """ + Very bare_bones tuner that returns a random set of parameters each time. + """ + def propose(self): + """ + Generate and return a random set of parameters. + """ + return self.create_candidates(1)[0, :] From 4656ff8b20f29b14db1eccbbd05cd056e73da55f Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 14:57:10 -0500 Subject: [PATCH 26/36] tests run!! --- atm/tests/unit_tests/test_worker.py | 159 ++++++++++++++++++++-------- atm/worker.py | 17 ++- 2 files changed, 126 insertions(+), 50 deletions(-) diff --git a/atm/tests/unit_tests/test_worker.py b/atm/tests/unit_tests/test_worker.py index 356dff0..ef94bdb 100644 --- a/atm/tests/unit_tests/test_worker.py +++ b/atm/tests/unit_tests/test_worker.py @@ -4,28 +4,54 @@ import os import pytest import random -from mock import patch, Mock +from mock import patch, Mock, ANY +import atm from atm import PROJECT_ROOT -from atm.config import RunConfig +from atm.config import RunConfig, SQLConfig from atm.constants import TIME_FMT, METRICS_BINARY from atm.database import ClassifierStatus, Database, db_session -from atm.enter_data import create_datarun +from atm.enter_data import enter_data from atm.model import Model -from atm.utilities import get_local_data_path -from atm.worker import Worker +from atm.utilities import get_local_data_path, load_model, load_metrics +from atm.worker import ClassifierError, Worker from btb.tuning import GP, GPEi, Tuner -from btb.selection import BestKReward, HierarchicalByAlgorithm, Selector +from btb.selection import BestKReward, BestKVelocity, Selector DB_CACHE_PATH = os.path.join(PROJECT_ROOT, 'data/modelhub/test/') DB_PATH = '/tmp/atm.db' METRIC_DIR = '/tmp/metrics/' MODEL_DIR = '/tmp/models/' +DATASET_ID = 1 +DATARUN_ID = 2 +HYPERPART_ID = 34 DT_PARAMS = {'criterion': 'gini', 'max_features': 0.5, 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 1} +# helper class to allow fuzzy arg matching +class StringWith(object): + def __init__(self, match): + self.match = match + + def __eq__(self, other): + return self.match in other + + +# helper class to allow incomplete object matching +class ObjWithAttrs(object): + def __init__(self, **kwargs): + self.attrs = kwargs + for k, v in kwargs.items(): + setattr(self, k, v) + + def __eq__(self, other): + return all([getattr(other, k) == v for k, v in self.attrs.items()]) + + def __repr__(self): + return '<%s>' % ', '.join(['%s=%s' % i for i in self.attrs.items()]) + @pytest.fixture def db(): @@ -41,22 +67,36 @@ def db(): @pytest.fixture def dataset(db): # return the only dataset - return db.get_dataset(1) + return db.get_dataset(DATASET_ID) @pytest.fixture def datarun(db): # return the unfinished datarun - return db.get_datarun(2) + return db.get_datarun(DATARUN_ID) + + +@pytest.fixture +def hyperpartition(db): + # return a decision tree hyperpartition matching the static params above + return db.get_hyperpartition(HYPERPART_ID) @pytest.fixture def model(dataset): model = Model(method='dt', params=DT_PARAMS, - judgment_metric='cv_judgment_metric', + judgment_metric='roc_auc', class_column=dataset.class_column) train_path, _ = get_local_data_path(dataset.train_path) model.train_test(train_path=train_path) + return model + + +@pytest.fixture +def metrics(): + cv_mets = [{k: random.random() for k in METRICS_BINARY} for i in range(5)] + test_mets = {k: random.random() for k in METRICS_BINARY} + return {'cv': cv_mets, 'test': test_mets} @pytest.fixture @@ -64,27 +104,28 @@ def worker(db, datarun): return Worker(db, datarun) -def get_new_worker(db, dataset, **kwargs): +def get_new_worker(**kwargs): kwargs['methods'] = kwargs.get('methods', ['logreg', 'dt']) + sql_conf = SQLConfig(database=DB_PATH) run_conf = RunConfig(**kwargs) - datarun = create_datarun(db, dataset, run_conf) + run_id = enter_data(sql_conf, run_conf) + db = Database(**vars(sql_conf)) + datarun = db.get_datarun(run_id) return Worker(db, datarun) def test_load_selector_and_tuner(db, dataset): - worker = get_new_worker(db, dataset, selector='hieralg', k_window=7, - tuner='gp', r_minimum=7, gridding=3) - assert type(worker.selector) == HierarchicalByAlgorithm - assert len(worker.selector.choices) == 6 + worker = get_new_worker(selector='bestkvel', k_window=7, tuner='gp') + assert type(worker.selector) == BestKVelocity + assert len(worker.selector.choices) == 8 assert worker.selector.k == 7 - assert worker.selector.by_algorithm['logreg'] == 4 assert worker.Tuner == GP def test_load_custom_selector_and_tuner(db, dataset): tuner_path = os.path.join(PROJECT_ROOT, 'tests/utilities/mytuner.py') selector_path = os.path.join(PROJECT_ROOT, 'tests/utilities/myselector.py') - worker = get_new_worker(db, dataset, selector=selector_path + ':MySelector', + worker = get_new_worker(selector=selector_path + ':MySelector', tuner=tuner_path + ':MyTuner') assert isinstance(worker.selector, Selector) assert issubclass(worker.Tuner, Tuner) @@ -106,36 +147,30 @@ def test_select_hyperpartition(worker): assert hp.id == 1 -def test_tune_hyperparameters(worker): +def test_tune_hyperparameters(worker, hyperpartition): """ This won't test that BTB is working correctly, just that the ATM-BTB connection is working. """ - hp = worker.db.get_hyperpartition(1) - clfs = worker.db.get_classifiers(hyperpartition_id=1) - mock_tuner = Mock() - mock_tuner.fit = Mock() - mock_tuner.propose = Mock(return_value=[]) - - worker.Tuner = mock_tuner - params = worker.tune_hyperparameters(hp) + worker.Tuner = Mock(return_value=mock_tuner) - mock_tuner.assert_called_with(tunables=hp.tunables, - gridding=worker.datarun.gridding, - r_minimum=worker.datarun.r_minimum) - mock_turner.fit.assert_called() - mock_turner.propose.assert_called() - - - worker.selector.select.assert_called_with({1: [0.5]}) - assert hp.id == 1 + with patch('atm.worker.vector_to_params') as vtp_mock: + params = worker.tune_hyperparameters(hyperpartition) + vtp_mock.assert_called() + approximate_tunables = [(k, ObjWithAttrs(range=v.range)) + for k, v in hyperpartition.tunables] + worker.Tuner.assert_called_with(tunables=approximate_tunables, + gridding=worker.datarun.gridding, + r_minimum=worker.datarun.r_minimum) + mock_tuner.fit.assert_called() + mock_tuner.propose.assert_called() def test_test_classifier(db, dataset): metric = 'roc_auc' - worker = get_new_worker(db, dataset, metric=metric, score_target='mu_sigma') + worker = get_new_worker(metric=metric, score_target='mu_sigma') model, metrics = worker.test_classifier(method='dt', params=DT_PARAMS) judge_mets = [m[metric] for m in metrics['cv']] @@ -146,16 +181,13 @@ def test_test_classifier(db, dataset): assert model.cv_judgment_metric_stdev == np.std(judge_mets) -def test_save_classifier(db, datarun, model): +def test_save_classifier(db, datarun, model, metrics): worker = Worker(db, datarun, model_dir=MODEL_DIR, metric_dir=METRIC_DIR) hp = db.get_hyperpartitions(datarun_id=worker.datarun.id)[0] classifier = worker.db.start_classifier(hyperpartition_id=hp.id, datarun_id=worker.datarun.id, host='localhost', hyperparameter_values=DT_PARAMS) - metrics = {'cv': [{k: random.random() for k in METRICS_BINARY} - for i in range(5)], - 'test': {k: random.random() for k in METRICS_BINARY}} worker.db.complete_classifier = Mock() worker.save_classifier(classifier.id, model, metrics) @@ -172,10 +204,6 @@ def test_save_classifier(db, datarun, model): assert load_metrics(clf, METRIC_DIR) == metrics -def test_run_classifier(): - pass - - def test_is_datarun_finished(db, dataset, datarun): r1 = db.get_datarun(1) worker = Worker(db, r1) @@ -186,5 +214,46 @@ def test_is_datarun_finished(db, dataset, datarun): assert not worker.is_datarun_finished() deadline = (datetime.datetime.now() - datetime.timedelta(seconds=1)).strftime(TIME_FMT) - worker = get_new_worker(db, dataset, deadline=deadline) + worker = get_new_worker(deadline=deadline) assert worker.is_datarun_finished() + + +def test_run_classifier(worker, hyperpartition, model, metrics): + worker.select_hyperpartition = Mock(return_value=hyperpartition) + worker.tune_hyperparameters = Mock(return_value=DT_PARAMS) + worker.test_classifier = Mock(return_value=(model, metrics)) + worker.save_classifier = Mock() + worker.db = Mock() + + # make sure the function shorts out if the datarun is finished + worker.is_datarun_finished = Mock(return_value=True) + worker.run_classifier() + assert not worker.select_hyperpartition.called + assert not worker.tune_hyperparameters.called + + # make sure things run smoothly: hyperparameters are chosen and a classifier + # is created, tested, and saved. + worker.is_datarun_finished = Mock(return_value=False) + worker.run_classifier() + worker.select_hyperpartition.assert_called_once() + worker.tune_hyperparameters.assert_called_once_with(hyperpartition) + worker.db.start_classifier.assert_called_once_with(hyperpartition_id=hyperpartition.id, + datarun_id=worker.datarun.id, + host=ANY, + hyperparameter_values=DT_PARAMS) + worker.test_classifier.assert_called_once_with(hyperpartition.method, DT_PARAMS) + worker.save_classifier.assert_called_once_with(ANY, model, metrics) + + # make sure hyperpartition specification works + hp_id = hyperpartition.id + 1 + worker.db.get_hyperpartition = lambda i: ObjWithAttrs(id=i, method='dt', + datarun_id=worker.datarun.id) + worker.run_classifier(hyperpartition_id=hp_id) + worker.tune_hyperparameters.assert_called_with(ObjWithAttrs(id=hp_id)) + + # make sure error handling works correctly + worker.test_classifier.side_effect = ValueError('qwerty') + with pytest.raises(ClassifierError): + worker.run_classifier() + worker.db.mark_classifier_errored.assert_called_with( + ANY, error_message=StringWith('qwerty')) diff --git a/atm/worker.py b/atm/worker.py index 7d96bbd..801d4fd 100755 --- a/atm/worker.py +++ b/atm/worker.py @@ -56,8 +56,9 @@ class ClassifierError(Exception): class Worker(object): def __init__(self, database, datarun, save_files=True, cloud_mode=False, - aws_config=None, model_dir=DEFAULT_MODEL_DIR, - metric_dir=DEFAULT_METRIC_DIR, verbose_metrics=False): + aws_config=None, public_ip='localhost', + model_dir=DEFAULT_MODEL_DIR, metric_dir=DEFAULT_METRIC_DIR, + verbose_metrics=False): """ database: Database object with connection information datarun: Datarun ORM object to work on. @@ -70,6 +71,7 @@ def __init__(self, database, datarun, save_files=True, cloud_mode=False, self.save_files = save_files self.cloud_mode = cloud_mode self.aws_config = aws_config + self.public_ip = public_ip self.verbose_metrics = verbose_metrics self.model_dir = model_dir @@ -374,6 +376,10 @@ def run_classifier(self, hyperpartition_id=None): _log('Choosing hyperparameters...') if hyperpartition_id is not None: hyperpartition = self.db.get_hyperpartition(hyperpartition_id) + if hyperpartition.datarun_id != self.datarun.id: + _log('Hyperpartition %d is not a part of datarun %d' % + (hyperpartition_id, self.datarun.id)) + return else: # use the multi-arm bandit to choose which hyperpartition to use next hyperpartition = self.select_hyperpartition() @@ -397,7 +403,7 @@ def run_classifier(self, hyperpartition_id=None): _log('Creating classifier...') classifier = self.db.start_classifier(hyperpartition_id=hyperpartition.id, datarun_id=self.datarun.id, - host=get_public_ip(), + host=self.public_ip, hyperparameter_values=params) try: @@ -439,6 +445,7 @@ def work(db, datarun_ids=None, save_files=False, choose_randomly=True, complete. """ start_time = datetime.datetime.now() + public_ip = get_public_ip() ensure_directory(log_dir) # name log file after the local hostname @@ -478,8 +485,8 @@ def work(db, datarun_ids=None, save_files=False, choose_randomly=True, # actual work happens here worker = Worker(db, run, save_files=save_files, cloud_mode=cloud_mode, aws_config=aws_config, - model_dir=model_dir, metric_dir=metric_dir, - verbose_metrics=verbose_metrics) + public_ip=public_ip, model_dir=model_dir, + metric_dir=metric_dir, verbose_metrics=verbose_metrics) try: worker.run_classifier() except ClassifierError: From dc41420bda7a927ae42886b803baa61e811df075 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 15:13:37 -0500 Subject: [PATCH 27/36] yes pipe into pip install in makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0dfe8cd..bb91277 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,6 @@ test: lint installdeps: pip install --upgrade pip - pip install -e . --process-dependency-links + yes | pip install -e . --process-dependency-links pip install -r requirements-dev.txt From 43d0e346307d7bcee9efb434ba7714177671a39b Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 15:17:39 -0500 Subject: [PATCH 28/36] make pip install quiet, to make circleCI logs legibile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bb91277..21f58dd 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,6 @@ test: lint installdeps: pip install --upgrade pip - yes | pip install -e . --process-dependency-links + yes | pip install -e . --process-dependency-links --quiet pip install -r requirements-dev.txt From c86fe9e85c9dd6ffb5b87dd2b79b1a3bf358621e Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 15:29:35 -0500 Subject: [PATCH 29/36] add github to known_hosts --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 21f58dd..740a40b 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ test: lint installdeps: pip install --upgrade pip - yes | pip install -e . --process-dependency-links --quiet + ssh-keyscan -H github.com > /etc/ssh/ssh_known_hosts + pip install -e . --process-dependency-links --quiet pip install -r requirements-dev.txt From 69bd26af136a058f8e9178453f6ad01c89c2a51a Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 15:40:09 -0500 Subject: [PATCH 30/36] fix flake8 errors --- atm/database.py | 2 +- atm/worker.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/atm/database.py b/atm/database.py index b368642..83fcc83 100644 --- a/atm/database.py +++ b/atm/database.py @@ -554,7 +554,7 @@ def create_classifier(self, **kwargs): @try_with_session(commit=True) def start_classifier(self, hyperpartition_id, datarun_id, host, - hyperparameter_values): + hyperparameter_values): """ Save a new, fully qualified classifier object to the database. Returns: the ID of the newly-created classifier diff --git a/atm/worker.py b/atm/worker.py index 801d4fd..94766de 100755 --- a/atm/worker.py +++ b/atm/worker.py @@ -40,6 +40,7 @@ # TODO: use python's logging module instead of this LOG_FILE = None + def _log(msg, stdout=True): if LOG_FILE: with open(LOG_FILE, 'a') as lf: From ea7b9efcd5bbbba94ee4462da6698ac2b32c8d9a Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 15:47:15 -0500 Subject: [PATCH 31/36] fix isort --- atm/database.py | 2 +- atm/tests/unit_tests/test_enter_data.py | 10 +++++----- atm/tests/unit_tests/test_method.py | 3 ++- atm/tests/unit_tests/test_worker.py | 13 +++++++------ atm/tests/utilities/myselector.py | 3 ++- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/atm/database.py b/atm/database.py index 83fcc83..652a37d 100644 --- a/atm/database.py +++ b/atm/database.py @@ -1,9 +1,9 @@ from __future__ import absolute_import, print_function -import pandas as pd from datetime import datetime from operator import attrgetter +import pandas as pd from sqlalchemy import (Column, DateTime, Enum, ForeignKey, Integer, MetaData, Numeric, String, Text, and_, create_engine, func, inspect) diff --git a/atm/tests/unit_tests/test_enter_data.py b/atm/tests/unit_tests/test_enter_data.py index 8da3900..36d3931 100644 --- a/atm/tests/unit_tests/test_enter_data.py +++ b/atm/tests/unit_tests/test_enter_data.py @@ -1,14 +1,14 @@ -import os import json +import os + import pytest -from atm import constants, PROJECT_ROOT -from atm.config import SQLConfig, RunConfig +from atm import PROJECT_ROOT, constants +from atm.config import RunConfig, SQLConfig from atm.database import Database, db_session -from atm.enter_data import enter_data, create_dataset, create_datarun +from atm.enter_data import create_datarun, create_dataset, enter_data from atm.utilities import get_local_data_path - DB_PATH = '/tmp/atm.db' DB_CACHE_PATH = os.path.join(PROJECT_ROOT, 'data/modelhub/test/') DATA_URL = 'https://s3.amazonaws.com/mit-dai-delphi-datastore/downloaded/' diff --git a/atm/tests/unit_tests/test_method.py b/atm/tests/unit_tests/test_method.py index dd9d3a1..92e2bce 100644 --- a/atm/tests/unit_tests/test_method.py +++ b/atm/tests/unit_tests/test_method.py @@ -1,7 +1,8 @@ #!/usr/bin/python2.7 -import pytest import json +import pytest + from atm.method import Method diff --git a/atm/tests/unit_tests/test_worker.py b/atm/tests/unit_tests/test_worker.py index ef94bdb..a875b3e 100644 --- a/atm/tests/unit_tests/test_worker.py +++ b/atm/tests/unit_tests/test_worker.py @@ -1,23 +1,24 @@ import datetime +import os +import random + import mock import numpy as np -import os import pytest -import random -from mock import patch, Mock, ANY +from mock import ANY, Mock, patch import atm from atm import PROJECT_ROOT from atm.config import RunConfig, SQLConfig -from atm.constants import TIME_FMT, METRICS_BINARY +from atm.constants import METRICS_BINARY, TIME_FMT from atm.database import ClassifierStatus, Database, db_session from atm.enter_data import enter_data from atm.model import Model -from atm.utilities import get_local_data_path, load_model, load_metrics +from atm.utilities import get_local_data_path, load_metrics, load_model from atm.worker import ClassifierError, Worker -from btb.tuning import GP, GPEi, Tuner from btb.selection import BestKReward, BestKVelocity, Selector +from btb.tuning import GP, GPEi, Tuner DB_CACHE_PATH = os.path.join(PROJECT_ROOT, 'data/modelhub/test/') DB_PATH = '/tmp/atm.db' diff --git a/atm/tests/utilities/myselector.py b/atm/tests/utilities/myselector.py index 17ce1ed..87a1b12 100644 --- a/atm/tests/utilities/myselector.py +++ b/atm/tests/utilities/myselector.py @@ -1,6 +1,7 @@ -from btb.selection import Selector import random +from btb.selection import Selector + class MySelector(Selector): def select(self, choice_scores): From 48438869924a659bf992903eb57019fe1f4a804a Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 15:49:13 -0500 Subject: [PATCH 32/36] quiet the other pip install --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 740a40b..6808ecd 100644 --- a/Makefile +++ b/Makefile @@ -16,5 +16,5 @@ installdeps: pip install --upgrade pip ssh-keyscan -H github.com > /etc/ssh/ssh_known_hosts pip install -e . --process-dependency-links --quiet - pip install -r requirements-dev.txt + pip install -r requirements-dev.txt --quiet From 32e383c631f5e80be56da1437d284621883a6ade Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 15:55:49 -0500 Subject: [PATCH 33/36] move dataset to http --- atm/data/modelhub/test/datasets.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atm/data/modelhub/test/datasets.csv b/atm/data/modelhub/test/datasets.csv index 39c2439..c1b0204 100644 --- a/atm/data/modelhub/test/datasets.csv +++ b/atm/data/modelhub/test/datasets.csv @@ -1,2 +1,2 @@ id,name,class_column,train_path,test_path,description,n_examples,k_classes,d_features,majority,size_kb -1,pollution_1,class,/home/bcyphers/work/fl/atm/atm/data/test/pollution_1.csv,,,60,2,15,0.5166666666666667,7 +1,pollution_1,class,https://s3.amazonaws.com/mit-dai-delphi-datastore/downloaded/pollution_1.csv,,,60,2,15,0.5166666666666667,7 From 2deec35ecafc11b56e587fde27a566860171d6ec Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 16:06:40 -0500 Subject: [PATCH 34/36] download data before trying to train/test --- atm/tests/unit_tests/test_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atm/tests/unit_tests/test_worker.py b/atm/tests/unit_tests/test_worker.py index a875b3e..7d35ee8 100644 --- a/atm/tests/unit_tests/test_worker.py +++ b/atm/tests/unit_tests/test_worker.py @@ -85,10 +85,10 @@ def hyperpartition(db): @pytest.fixture def model(dataset): + train_path, _ = download_data(dataset.train_path) model = Model(method='dt', params=DT_PARAMS, judgment_metric='roc_auc', class_column=dataset.class_column) - train_path, _ = get_local_data_path(dataset.train_path) model.train_test(train_path=train_path) return model From 17e3716ff4d77cbacc77c9ba72cb4a269b45c408 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 16:12:48 -0500 Subject: [PATCH 35/36] import download_data --- atm/tests/unit_tests/test_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atm/tests/unit_tests/test_worker.py b/atm/tests/unit_tests/test_worker.py index 7d35ee8..587e6c4 100644 --- a/atm/tests/unit_tests/test_worker.py +++ b/atm/tests/unit_tests/test_worker.py @@ -14,7 +14,7 @@ from atm.database import ClassifierStatus, Database, db_session from atm.enter_data import enter_data from atm.model import Model -from atm.utilities import get_local_data_path, load_metrics, load_model +from atm.utilities import download_data, load_metrics, load_model from atm.worker import ClassifierError, Worker from btb.selection import BestKReward, BestKVelocity, Selector From 814d0de3c270cc595971ef44f09171bf956589c8 Mon Sep 17 00:00:00 2001 From: Bennett Cyphers Date: Fri, 2 Feb 2018 16:24:22 -0500 Subject: [PATCH 36/36] update paths --- Makefile | 2 +- README.md | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 6808ecd..da3c5c3 100644 --- a/Makefile +++ b/Makefile @@ -13,8 +13,8 @@ test: lint python $(TEST_CMD) installdeps: - pip install --upgrade pip ssh-keyscan -H github.com > /etc/ssh/ssh_known_hosts + pip install --upgrade pip pip install -e . --process-dependency-links --quiet pip install -r requirements-dev.txt --quiet diff --git a/README.md b/README.md index 4b953b2..4211520 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ Below we will give a quick tutorial of how to run atm on your desktop. We will u ``` $ python atm/enter_data.py ``` - This command will create a ``datarun``. In ATM, a *datarun* is a single logical machine learning task. If you run the above command without any arguments, it will use the default settings found in the `config/templates/\*.yaml` files to create a new SQLite3 database at `./atm.db`, create a new `dataset` instance which refers to the data above, and create a `datarun` instance which points to that dataset. More about what is stored in this database and what is it used for can be found [here](https://cyphe.rs/static/atm.pdf). + This command will create a ``datarun``. In ATM, a *datarun* is a single logical machine learning task. If you run the above command without any arguments, it will use the default settings found in the `atm/config/templates/\*.yaml` files to create a new SQLite3 database at `./atm.db`, create a new `dataset` instance which refers to the data above, and create a `datarun` instance which points to that dataset. More about what is stored in this database and what is it used for can be found [here](https://cyphe.rs/static/atm.pdf). The command should produce a lot of output, the end of which looks something like this: @@ -119,7 +119,7 @@ AND that's it! You can break out of the worker with Ctrl+C and restart it with t ## Customizing ATM's configuration and using your own data -ATM's default configuration is fully controlled by the yaml files in ``conig/templates/``. Our documentation will cover the configuration in more detail, but this section provides a brief overview of how to specify the most important values. +ATM's default configuration is fully controlled by the yaml files in ``atm/conig/templates/``. Our documentation will cover the configuration in more detail, but this section provides a brief overview of how to specify the most important values. ### Running ATM on your own data If you want to use the system for your own dataset, convert your data to a csv file similar to the example shown above. The format is: @@ -141,9 +141,10 @@ That means there are two ways to pass configuration to the command. Saving configuration as YAML files is an easy way to save complicated setups or share them with team members. - You should start with the templates provided in `config/templates` and modify them to suit your own needs. + You should start with the templates provided in `atm/config/templates` and modify them to suit your own needs. ``` - $ cp config/templates/*.yaml config/ + $ mkdir config + $ cp atm/config/templates/*.yaml config/ $ vim config/*.yaml ```