diff --git a/hoplite/agile/classifier.py b/hoplite/agile/classifier.py
index f1f4edc..7cb9367 100644
--- a/hoplite/agile/classifier.py
+++ b/hoplite/agile/classifier.py
@@ -43,12 +43,28 @@ def bce_loss(
   y_true = tf.cast(y_true, dtype=logits.dtype)
   log_p = tf.math.log_sigmoid(logits)
   log_not_p = tf.math.log_sigmoid(-logits)
-  raw_bce = -y_true * log_p + (1.0 - y_true) * log_not_p
+  # optax sigmoid_binary_cross_entropy:
+  # -labels * log_p - (1.0 - labels) * log_not_p
+  raw_bce = -y_true * log_p - (1.0 - y_true) * log_not_p
   is_labeled_mask = tf.cast(is_labeled_mask, dtype=logits.dtype)
   weights = (1.0 - is_labeled_mask) * weak_neg_weight + is_labeled_mask
   return tf.reduce_mean(raw_bce * weights)
 
 
+def hinge_loss(
+    y_true: tf.Tensor,
+    logits: tf.Tensor,
+    is_labeled_mask: tf.Tensor,
+    weak_neg_weight: float,
+) -> tf.Tensor:
+  """Weighted SVM hinge loss."""
+  # Convert multihot to +/- 1 labels.
+  y_true = 2 * y_true - 1
+  weights = (1.0 - is_labeled_mask) * weak_neg_weight + is_labeled_mask
+  raw_hinge_loss = tf.maximum(0, 1 - y_true * logits)
+  return tf.reduce_mean(raw_hinge_loss * weights)
+
+
 def infer(params, embeddings: np.ndarray):
   """Apply the model to embeddings."""
   return np.dot(embeddings, params['beta']) + params['beta_bias']
@@ -105,6 +121,7 @@ def train_linear_classifier(
     learning_rate: float,
     weak_neg_weight: float,
     num_train_steps: int,
+    loss: str = 'bce',
 ):
   """Train a linear classifier."""
   embedding_dim = data_manager.db.embedding_dimension()
@@ -112,12 +129,18 @@ def train_linear_classifier(
   lin_model = get_linear_model(embedding_dim, num_classes)
   optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
   lin_model.compile(optimizer=optimizer, loss='binary_crossentropy')
+  if loss == 'hinge':
+    loss_fn = hinge_loss
+  elif loss == 'bce':
+    loss_fn = bce_loss
+  else:
+    raise ValueError(f'Unknown loss: {loss}')
 
   @tf.function
   def train_step(y_true, embeddings, is_labeled_mask):
     with tf.GradientTape() as tape:
       logits = lin_model(embeddings, training=True)
-      loss = bce_loss(y_true, logits, is_labeled_mask, weak_neg_weight)
+      loss = loss_fn(y_true, logits, is_labeled_mask, weak_neg_weight)
       loss = tf.reduce_mean(loss)
     grads = tape.gradient(loss, lin_model.trainable_variables)
     optimizer.apply_gradients(zip(grads, lin_model.trainable_variables))