diff --git a/datasets/covtype.py b/datasets/covtype.py index 059b7a7..6d8f0e0 100644 --- a/datasets/covtype.py +++ b/datasets/covtype.py @@ -3,6 +3,7 @@ with safe_import_context() as import_ctx: from sklearn.datasets import fetch_covtype + from sklearn.preprocessing import StandardScaler class Dataset(BaseDataset): @@ -11,9 +12,18 @@ class Dataset(BaseDataset): install_cmd = 'conda' requirements = ['pip:scikit-learn'] + parameters = { + 'standardized': [False, True] + } + def get_data(self): X, y = fetch_covtype(return_X_y=True) y[y != 2] = -1 y[y == 2] = 1 # try to separate class 2 from the other 6 classes. + + if self.standardized: + scaler = StandardScaler() + X = scaler.fit_transform(X) data = dict(X=X, y=y) + return X.shape[1], data diff --git a/solvers/chop.py b/solvers/chop.py index 3594aa2..60c5a44 100644 --- a/solvers/chop.py +++ b/solvers/chop.py @@ -13,16 +13,17 @@ class Solver(BaseSolver): name = 'chop' install_cmd = 'conda' - requirements = ['pip:https://github.com/openopt/chop/archive/master.zip'] + requirements = ['pip:https://github.com/openopt/chop/archive/master.zip', + 'pip:scikit-learn'] parameters = { 'solver': ['pgd'], - 'line_search': [False, True], + 'line_search': [True, False], 'stochastic': [False, True], 'batch_size': ['full', 1], - 'momentum': [0., 0.7], + 'momentum': [0., 0.9], 'device': ['cpu', 'cuda'] - } + } def skip(self, X, y, lmbd): if self.device == 'cuda' and not torch.cuda.is_available(): @@ -59,23 +60,32 @@ def set_objective(self, X, y, lmbd): device = torch.device(self.device) - self.X = torch.tensor(X).to(device) - self.y = torch.tensor(y > 0, dtype=torch.float64).to(device) + self.X = torch.tensor(X, dtype=torch.float32, device=device) + self.y = torch.tensor(y, dtype=torch.float32, device=device) _, n_features = X.shape self.x0 = torch.zeros(n_features, dtype=self.X.dtype, device=self.X.device) - self.criterion = torch.nn.BCEWithLogitsLoss() + + # prepare loader for stochastic methods + if self.stochastic: + dataset = TensorDataset(self.X, self.y) + self.loader = DataLoader(dataset, batch_size=self.batch_size) + + def logloss(x, data=self.X, target=self.y): + y_X_x = target * (data @ x.flatten()) + l2 = 0.5 * x.pow(2).sum() + loss = torch.log1p(torch.exp(-y_X_x)).sum() + self.lmbd * l2 + return loss + + self.objective = logloss def run_stochastic(self, n_iter): - # prepare dataset - dataset = TensorDataset(self.X, self.y) - loader = DataLoader(dataset, batch_size=self.batch_size) # prepare opt variable - x = self.x0.clone().detach().flatten() + x = self.x0.clone().detach() x.requires_grad_(True) if self.solver == 'pgd': @@ -86,7 +96,6 @@ def run_stochastic(self, n_iter): raise NotImplementedError # Optimization loop - counter = 0 alpha = self.lmbd / self.X.size(0) @@ -98,24 +107,29 @@ def loglossderiv(p, y): return -y return -y / (1. + np.exp(z)) - def optimal_step_size(t): - """From sklearn, from an idea by Leon Bottou""" + def initial_step_size(): p = np.sqrt(1. / np.sqrt(alpha)) eta0 = p / max(1, loglossderiv(-p, 1)) t0 = 1. / (alpha * eta0) + return t0 - return 1. / (alpha * (t0 + t - 1.)) + t0 = initial_step_size() - while counter < n_iter: + def optimal_step_size(t): + """From sklearn, from an idea by Leon Bottou""" + return 1. / (alpha * (t0 + t - 1.)) - for data, target in loader: + counter = 0 + stop = False + while not stop: + for data, target in self.loader: counter += 1 + if counter == n_iter: + stop = True + break optimizer.lr = optimal_step_size(counter) - optimizer.zero_grad() - pred = data @ x - loss = self.criterion(pred, target) - loss += .5 * alpha * (x ** 2).sum() + loss = self.objective(x, data=data, target=target) loss.backward() optimizer.step() @@ -123,20 +137,14 @@ def optimal_step_size(t): def run_full_batch(self, n_iter): # Set up the problem + @chop.utils.closure + def objective(x): + return self.objective(x, data=self.X, target=self.y) # chop's full batch optimizers require # (batch_size, *shape) shape x0 = self.x0.reshape(1, -1) - @chop.utils.closure - def logloss(x): - - alpha = self.lmbd / self.X.size(0) - out = chop.utils.bmv(self.X, x) - loss = self.criterion(out, self.y) - reg = .5 * alpha * (x ** 2).sum() - return loss + reg - # Solve the problem if self.solver == 'pgd': if self.line_search: @@ -145,8 +153,7 @@ def logloss(x): # estimate the step using backtracking line search once step = None - result = chop.optim.minimize_pgd(logloss, x0, - prox=lambda x, s=None: x, + result = chop.optim.minimize_pgd(objective, x0, step=step, max_iter=n_iter)