-
Notifications
You must be signed in to change notification settings - Fork 1
/
data.sh
39 lines (27 loc) · 1.16 KB
/
data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/bash
# download and process data
mkdir -p data
while getopts ":c" opt; do
case $opt in
c)
echo "Option -c selected, which will clear the datasets folder and re-process all data files."
read -r -p "Are you sure you want to proceed? [y/n]" prompt
if [[ $prompt == "y" || $prompt == "Y" || $prompt == "yes" || $prompt == "Yes" ]]
then
rm -r data/datasets/*
fi
esac
done
echo "downloading CITE-seq data if needed..."
CITEDATA=data/raw/GSE194122_openproblems_neurips2021_cite_BMMC_processed.h5ad
if ! [[ -f "$CITEDATA" ]]; then
wget -q -P data/raw/ https://ftp.ncbi.nlm.nih.gov/geo/series/GSE194nnn/GSE194122/suppl/GSE194122%5Fopenproblems%5Fneurips2021%5Fcite%5FBMMC%5Fprocessed.h5ad.gz
gzip -d data/raw/GSE194122_openproblems_neurips2021_cite_BMMC_processed.h5ad.gz
else
echo "Data already exists, skipping download."
fi
cd data/scripts/
echo "Processing CITE-seq data if needed..."
python process_cite.py
echo "Creating interventional balls data if needed..."
python generate_balls.py --distribution_case intervention --scm_mechanism non_linear --latent_case scm