-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparallel_jobs.sh
72 lines (42 loc) · 1.01 KB
/
parallel_jobs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#input files
sync_dir=$1
ne_file=$2
#output directory
out_dir="results"
start=`date +%s`
counter=0;
for sync in $sync_dir/*.sync
do
counter=$((counter+1));
ne=$(cat $ne_file | awk -v counter2="$counter" 'NR==counter2')
n=`basename $sync`
echo "File under process: "$n;
##keep pops file
pops=$sync_dir/$n".pops"
##make folder to store split files
split_dir=$sync_dir"/splitdir"
mkdir -p $split_dir
##split file
split -l 30000 $sync $split_dir"/"
#generate *.pops file for each split file
for a in $split_dir/a*
do
a_name=`basename $a`
suff=".pops";
`cp $pops $split_dir"/"$a_name$suff`
done
##run in parallel
parallel 'python2 CLEAR.py --sync {1} --N {2} --out {1}.out' ::: $split_dir/a[a-z] ::: $ne
echo "Process finished for: "$n;
mv $split_dir/*.out $out_dir/
cd $out_dir
python clear_output.py a*.out;
awk '{if($1=="2L") print $0}' *.tsv > $n.clear
rm a*.out
rm *.tsv
cd ../
rm -r $split_dir
end=`date +%s`
runtime=$((end-start))
echo "Runtime was: " $runtime
done