Skip to content

Commit

Permalink
Finishes sherlock code
Browse files Browse the repository at this point in the history
  • Loading branch information
PTNobel committed Dec 2, 2024
1 parent 99e5361 commit 829d305
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 5 deletions.
20 changes: 20 additions & 0 deletions utils/run_parameter_sweep.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
#SBATCH --job-name=adelie_sweep
#SBATCH --output=adelie_sweep/output/slurm-%A_%a.out
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16GB
#SBATCH --partition=candes

BASE_DIR=$HOME/alo/benchmarking/lasso_sweep
RESULTS_DIR=$BASE_DIR/results
mkdir -p $RESULTS_DIR
DEST_FILE=$RESULTS_DIR/sweep.npz


ml python/3.9.0
ml py-pytorch/2.0.0_py39
. $HOME/randalo/.venv/bin/activate


python $HOME/randalo/utils/sherlock_script.py $DEST_FILE
33 changes: 28 additions & 5 deletions utils/sherlock_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@
import pandas as pd
import pgenlib as pg

import sys
if len(sys.argv) != 2:
raise RuntimeError()

data_dir = "/oak/group/candes/for_parth"
cache_dir = "/scratch//candes/for_parth"
cache_dir = "/scratch/candes/for_parth"
df = pd.read_csv(os.path.join(data_dir, "master_phe.csv"), sep="\t", index_col=0)
covars_dense = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, -1].to_numpy()

chromosomes = [17, 18, 19, 20, 21, 22]
# TODO: check this
chromosomes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]

X = ad.matrix.concatenate(
[ad.matrix.dense(covars_dense)] +
Expand All @@ -26,17 +31,35 @@
)
print(X.shape)

rng = np.random.default_rng(0xEE364A)
P = np.random.permutation(y.shape[-1])
n_train = P.size * 9 // 10
train_mask = P[:n_train]
test_mask = P[n_train:]
X_train = X[train_mask]
y_train = X[train_mask]
X_test = X[test_mask]
y_test = X[test_mask]


state = ad.grpnet(
X=X,
glm=ad.glm.gaussian(y),
X=X_train,
glm=ad.glm.gaussian(y_train),
intercept=False,
)

import randalo as ra
import randalo.adelie_integration as ai
import torch

ld, alo = ai.get_alo_for_sweep(y, state, torch.nn.MSELoss())
loss = torch.nn.MSELoss()
L = state.beta.shape[-1]
oos.np.empty(L)
ins.np.empty(L)
for i in range(L):
oos[i] = loss(torch.from_numpy(X_test @ state.beta), torch.from_numpy(y_test))
ins[i] = loss(torch.from_numpy(X_train @ state.beta), torch.from_numpy(y_train))

ld, alo = ai.get_alo_for_sweep(y, state, loss)

np.savez(sys.argv[-1], lamda=ld, alo=alo, oos=oos, in_sample=ins)

0 comments on commit 829d305

Please sign in to comment.