benchopt · GeoffNN · Mar 26, 2021 · Mar 28, 2021 · Mar 28, 2021 · Mar 28, 2021
diff --git a/datasets/covtype.py b/datasets/covtype.py
@@ -3,6 +3,7 @@
 
 with safe_import_context() as import_ctx:
     from sklearn.datasets import fetch_covtype
+    from sklearn.preprocessing import StandardScaler
 
 
 class Dataset(BaseDataset):
@@ -11,9 +12,18 @@ class Dataset(BaseDataset):
     install_cmd = 'conda'
     requirements = ['pip:scikit-learn']
 
+    parameters = {
+        'standardized': [False, True]
+    }
+
     def get_data(self):
         X, y = fetch_covtype(return_X_y=True)
         y[y != 2] = -1
         y[y == 2] = 1  # try to separate class 2 from the other 6 classes.
+
+        if self.standardized:
+            scaler = StandardScaler()
+            X = scaler.fit_transform(X)
         data = dict(X=X, y=y)
+
         return X.shape[1], data
diff --git a/solvers/chop.py b/solvers/chop.py
@@ -13,16 +13,17 @@ class Solver(BaseSolver):
     name = 'chop'
 
     install_cmd = 'conda'
-    requirements = ['pip:https://github.com/openopt/chop/archive/master.zip']
+    requirements = ['pip:https://github.com/openopt/chop/archive/master.zip',
+                    'pip:scikit-learn']
-    requirements = ['pip:https://github.com/openopt/chop/archive/master.zip',
-                    'pip:scikit-learn']
+    requirements = ['pip:https://github.com/openopt/chop/archive/master.zip']
-    requirements = ['pip:https://github.com/openopt/chop/archive/master.zip',
-                    'pip:scikit-learn']
+    requirements = ['pip:https://github.com/openopt/chop/archive/master.zip']
 
     parameters = {
         'solver': ['pgd'],
-        'line_search': [False, True],
+        'line_search': [True, False],
         'stochastic': [False, True],
         'batch_size': ['full', 1],
-        'momentum': [0., 0.7],
+        'momentum': [0., 0.9],
         'device': ['cpu', 'cuda']
-        }
+    }
 
     def skip(self, X, y, lmbd):
         if self.device == 'cuda' and not torch.cuda.is_available():
@@ -59,23 +60,32 @@ def set_objective(self, X, y, lmbd):
 
         device = torch.device(self.device)
 
-        self.X = torch.tensor(X).to(device)
-        self.y = torch.tensor(y > 0, dtype=torch.float64).to(device)
+        self.X = torch.tensor(X, dtype=torch.float32, device=device)
+        self.y = torch.tensor(y, dtype=torch.float32, device=device)
 
         _, n_features = X.shape
 
         self.x0 = torch.zeros(n_features,
                               dtype=self.X.dtype,
                               device=self.X.device)
-        self.criterion = torch.nn.BCEWithLogitsLoss()
+
+        # prepare loader for stochastic methods
+        if self.stochastic:
+            dataset = TensorDataset(self.X, self.y)
+            self.loader = DataLoader(dataset, batch_size=self.batch_size)
+
+        def logloss(x, data=self.X, target=self.y):
+            y_X_x = target * (data @ x.flatten())
+            l2 = 0.5 * x.pow(2).sum()
+            loss = torch.log1p(torch.exp(-y_X_x)).sum() + self.lmbd * l2
+            return loss
+
+        self.objective = logloss
 
     def run_stochastic(self, n_iter):
-        # prepare dataset
-        dataset = TensorDataset(self.X, self.y)
-        loader = DataLoader(dataset, batch_size=self.batch_size)
 
         # prepare opt variable
-        x = self.x0.clone().detach().flatten()
+        x = self.x0.clone().detach()
         x.requires_grad_(True)
 
         if self.solver == 'pgd':
@@ -86,7 +96,6 @@ def run_stochastic(self, n_iter):
             raise NotImplementedError
 
         # Optimization loop
-        counter = 0
 
         alpha = self.lmbd / self.X.size(0)
 
@@ -98,45 +107,44 @@ def loglossderiv(p, y):
                 return -y
             return -y / (1. + np.exp(z))
 
-        def optimal_step_size(t):
-            """From sklearn, from an idea by Leon Bottou"""
+        def initial_step_size():
             p = np.sqrt(1. / np.sqrt(alpha))
             eta0 = p / max(1, loglossderiv(-p, 1))
             t0 = 1. / (alpha * eta0)
+            return t0
 
-            return 1. / (alpha * (t0 + t - 1.))
+        t0 = initial_step_size()
 
-        while counter < n_iter:
+        def optimal_step_size(t):
+            """From sklearn, from an idea by Leon Bottou"""
+            return 1. / (alpha * (t0 + t - 1.))
 
-            for data, target in loader:
+        counter = 0
+        stop = False
+        while not stop:
+            for data, target in self.loader:
                 counter += 1
+                if counter == n_iter:
+                    stop = True
+                    break
                 optimizer.lr = optimal_step_size(counter)
-
                 optimizer.zero_grad()
-                pred = data @ x
-                loss = self.criterion(pred, target)
-                loss += .5 * alpha * (x ** 2).sum()
+                loss = self.objective(x, data=data, target=target)
                 loss.backward()
                 optimizer.step()
 
         self.beta = x.detach().clone()
 
     def run_full_batch(self, n_iter):
         # Set up the problem
+        @chop.utils.closure
+        def objective(x):
+            return self.objective(x, data=self.X, target=self.y)
 
         # chop's full batch optimizers require
         # (batch_size, *shape) shape
         x0 = self.x0.reshape(1, -1)
 
-        @chop.utils.closure
-        def logloss(x):
-
-            alpha = self.lmbd / self.X.size(0)
-            out = chop.utils.bmv(self.X, x)
-            loss = self.criterion(out, self.y)
-            reg = .5 * alpha * (x ** 2).sum()
-            return loss + reg
-
         # Solve the problem
         if self.solver == 'pgd':
             if self.line_search:
@@ -145,8 +153,7 @@ def logloss(x):
                 # estimate the step using backtracking line search once
                 step = None
 
-            result = chop.optim.minimize_pgd(logloss, x0,
-                                             prox=lambda x, s=None: x,
+            result = chop.optim.minimize_pgd(objective, x0,
                                              step=step,
                                              max_iter=n_iter)