Skip to content

Commit

Permalink
fix: fix bugs of prepend bos during eval and sampling (#30)
Browse files Browse the repository at this point in the history
  • Loading branch information
SmallMelon-L authored Jul 6, 2024
1 parent 0e3d268 commit 504fbb3
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
11 changes: 11 additions & 0 deletions src/lm_saes/analysis/sample_feature_activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,19 @@ def sample_feature_activations(
_, cache = model.run_with_cache_until(batch, names_filter=[cfg.sae.hook_point_in, cfg.sae.hook_point_out], until=cfg.sae.hook_point_out)
activation_in, activation_out = cache[cfg.sae.hook_point_in], cache[cfg.sae.hook_point_out]

filter_mask = torch.logical_or(
batch.eq(model.tokenizer.eos_token_id),
batch.eq(model.tokenizer.pad_token_id)
)
filter_mask = torch.logical_or(
filter_mask,
batch.eq(model.tokenizer.bos_token_id)
)

feature_acts = sae.encode(activation_in, label=activation_out)[..., start_index: end_index]

feature_acts[filter_mask] = 0

act_times += feature_acts.gt(0.0).sum(dim=[0, 1])

for name in cfg.subsample.keys():
Expand Down
9 changes: 6 additions & 3 deletions src/lm_saes/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def run_evals(
):
### Evals
eval_tokens = activation_store.next_tokens(cfg.act_store.dataset.store_batch_size)

assert eval_tokens is not None, "Activation store is empty"

# Get Reconstruction Score
losses_df = recons_loss_batched(
Expand All @@ -41,13 +43,15 @@ def run_evals(
# get cache
_, cache = model.run_with_cache_until(
eval_tokens,
prepend_bos=False,
names_filter=[cfg.sae.hook_point_in, cfg.sae.hook_point_out],
until=cfg.sae.hook_point_out,
)

filter_mask = torch.logical_and(eval_tokens.ne(model.tokenizer.eos_token_id), eval_tokens.ne(model.tokenizer.pad_token_id))
filter_mask = torch.logical_and(filter_mask, eval_tokens.ne(model.tokenizer.bos_token_id))

# get act
original_act_in, original_act_out = cache[cfg.sae.hook_point_in], cache[cfg.sae.hook_point_out]
original_act_in, original_act_out = cache[cfg.sae.hook_point_in][filter_mask], cache[cfg.sae.hook_point_out][filter_mask]

feature_acts = sae.encode(original_act_in, label=original_act_out)
reconstructed = sae.decode(feature_acts)
Expand Down Expand Up @@ -144,7 +148,6 @@ def get_recons_loss(

_, cache = model.run_with_cache_until(
batch_tokens,
prepend_bos=False,
names_filter=[cfg.sae.hook_point_in, cfg.sae.hook_point_out],
until=cfg.sae.hook_point_out,
)
Expand Down

0 comments on commit 504fbb3

Please sign in to comment.