-
Notifications
You must be signed in to change notification settings - Fork 0
/
call_hdbscan_script.sh
executable file
·43 lines (34 loc) · 1.08 KB
/
call_hdbscan_script.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env bash
# Path of the clustering script
PGM_PATH=hdbscan_clustering.py
# File with a list of projections to use
# FOR THE DEMO: You will need a list of UMAP files generated by the previous script
# You can do this in terminal with the command:
# ls projections/*.txt > umap_list.txt
PROJ_LIST=umap_list.txt
# Directories to store results
OUT_DIR=hdbscan_clusters
LOG_DIR=hdbscan_logs
PROB_DIR=hdbscan_probs
# Flag for whether the input file has headers
HEAD=F
# Flag for whether to return cluster membership probabilities
PROBS=T
# Number of ID columns (not implemented, but keeping this here for now)
N_ID=0
# Get the list of UMAPs to run
declare -a UMAPS=($(cat ${PROJ_LIST} | tr '\n' ' '))
# Loop over each UMAP file and each HDBSCAN parametrization
for UMAP in ${UMAPS[@]}
do
echo "Beginning file ${UMAP}"
python ${PGM_PATH} \
-dset ${UMAP} \
-min_points 25 50 \
-eps 0.3 0.5 \
-head ${HEAD} \
-probs ${PROBS} \
-outdir ${OUT_DIR} \
-probdir ${PROB_DIR} \
-log ${LOG_DIR}
done