forked from CMSROMA/Utilities
-
Notifications
You must be signed in to change notification settings - Fork 2
/
mergeOutput.sh
executable file
·168 lines (141 loc) · 5.43 KB
/
mergeOutput.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/bin/bash
################
# This script merge the ntuples produced by crab jobs
# It's supposed that all the ntuples are stored in directories which finish by UNMERGED
#######
usage(){
echo "`basename $0` -u UI_WORKING_DIR"
echo "--------- optional"
echo " --merged_remote_dir dir: merge and copy the merged file the merged_remote_dir"
echo " -g, --grep arg: merge files matching the argument"
echo " --noRemove: do not remove original files"
}
#------------------------------ parsing
# options may be followed by one colon to indicate they have a required argument
if ! options=$(getopt -u -o hu:g: -l help,merged_remote_dir:,grep:,noRemove -- "$@")
then
# something went wrong, getopt will put out an error message for us
exit 1
fi
set -- $options
while [ $# -gt 0 ]
do
case $1 in
-h|--help) usage; exit 0;;
-u|--ui_working_dir) UI_WORKING_DIR=$2; shift;;
--merged_remote_dir) MERGED_REMOTE_DIR=$2; shift;;
-g|--grep) FILENAME_BASE=$2; shift;;
--noRemove) NOREMOVE=y;;
# --ntuple) FILENAME_BASE="ntuple";;
(--) shift; break;;
(-*) echo "$0: error - unrecognized option $1" 1>&2; usage >> /dev/stderr; exit 1;;
(*) break;;
esac
shift
done
#------------------------------ checking
if [ ! -d "${UI_WORKING_DIR}" ];then
echo "[ERROR] crab working directory ${UI_WORKING_DIR} not found" >> /dev/stderr
usage >> /dev/stderr
exit 1
elif [ ! -r "${UI_WORKING_DIR}/share/crab.cfg" ];then
echo "[ERROR] crab config {UI_WORKING_DIR}/share/crab.cfg not found" >> /dev/stderr
usage >> /dev/stderr
exit 1
fi
### taking the output directory (also possible directly from the crab.cfg file
USER_REMOTE_DIR=`grep '^user_remote_dir=' ${UI_WORKING_DIR}/share/crab.cfg |cut -d '=' -f 2`
STORAGE_PATH=`grep 'storage_path=' ${UI_WORKING_DIR}/share/crab.cfg | sed 's|/srm/v2/server?SFN=|root://eoscms/|' | cut -d '=' -f 2 `
NJOBS=`grep 'number_of_jobs=' ${UI_WORKING_DIR}/share/crab.cfg |cut -d '=' -f 2`
#echo $STORAGE_PATH $USER_REMOTE_DIR
#echo "RUNRANGE=${RUNRANGE:=`grep 'runselection=' ${UI_WORKING_DIR}/share/crab.cfg |cut -d '=' -f 2`}"
#RUNRANGE=`grep 'runselection=' ${UI_WORKING_DIR}/share/crab.cfg |cut -d '=' -f 2`
if [ -z "$RUNRANGE" ];then
echo "RUNRANGE=${RUNRANGE:=allRange}"
fi
DATASETNAME=`echo ${USER_REMOTE_DIR} | sed "s|${RUNRANGE}.*||"`
DATASETNAME=`basename $DATASETNAME`
#echo "DATASETNAME=${DATASETNAME}"
#crab -c ${UI_WORKING_DIR} -report | grep srmPath | cut -d ' ' -f 6
## make the list of files in the output directory assuming there are
## only unmerged output files from this process
case ${USER_REMOTE_DIR} in
*/unmerged)
echo "MERGED_REMOTE_DIR=${MERGED_REMOTE_DIR:=`dirname ${USER_REMOTE_DIR}`}"
;;
*/UNMERGED)
echo "MERGED_REMOTE_DIR=${MERGED_REMOTE_DIR:=`dirname ${USER_REMOTE_DIR}`}"
;;
*)
if [ -z "${MERGED_REMOTE_DIR}" ];then
echo "[ERROR] Unmerged files not in UNMERGED subdir: ${USER_REMOTE_DIR}" >> /dev/stderr
exit 1
fi
;;
esac
if [ "${FILENAME_BASE}" == "PUDumper" ];then
MERGEDFILE=PUDumper-${DATASETNAME}-${RUNRANGE}.root
elif [ "`echo ${FILENAME_BASE} | awk '(/extraID/){printf(\"1\")}'`" == "1" ]; then
MERGEDFILE=eleIDTree-${DATASETNAME}-${RUNRANGE}.root
elif [ "`echo ${FILENAME_BASE} | awk '(/extraCalibTree/){printf(\"1\")}'`" == "1" ]; then
MERGEDFILE=extraCalibTree-${DATASETNAME}-${RUNRANGE}.root
else
MERGEDFILE=${DATASETNAME}-${RUNRANGE}.root
fi
#else
# MERGEDFILE=${DATASETNAME}-${RUNRANGE}-JSON_${JSON}.root
#fi
case ${MERGED_REMOTE_DIR} in
/afs/*)
eosFile=${MERGED_REMOTE_DIR}/${MERGEDFILE}
;;
*)
eosFile=${STORAGE_PATH}/${MERGED_REMOTE_DIR}/${MERGEDFILE}
;;
esac
if [ -e "${UI_WORKING_DIR}/res/merged_${FILENAME_BASE}" ];then
echo "[REPORT] Ntuples ${FILENAME_BASE} already merged"
echo ${STORAGE_PATH}/${MERGED_REMOTE_DIR}/${MERGEDFILE}
exit 0
fi
#echo "MERGED_REMOTE_DIR=${MERGED_REMOTE_DIR:=${USER_REMOTE_DIR}}"
rm filelist/ -Rf
if [ -n "${FILENAME_BASE}" ];then
makefilelist.sh -g ${FILENAME_BASE} unmerged ${STORAGE_PATH}/${USER_REMOTE_DIR} || exit 1
else
makefilelist.sh unmerged ${STORAGE_PATH}/${USER_REMOTE_DIR} || exit 1
fi
if [ ! -d "/tmp/$USER" ];then
mkdir -p /tmp/$USER
fi
#if [ -z "$JSON" ];then
# JSON=`echo ${USER_REMOTE_DIR} | sed "s|.*${RUNRANGE}/||;s|/unmerged.*||"`
# eos.select ls $eosFile && {
# echo "$eosFile"
# echo "[WARNING] Files not merged because merged file already exist" >> /dev/stderr
# echo "[WARNING] Files not merged because merged file already exist" >> /dev/stdout
# exit 1
# }
# if [ "`cat filelist/unmerged.list | wc -l`" != "${NJOBS}" ];then
# echo "[ERROR `basename $0`] Number of files to merge differs with respect to number of jobs: " >> /dev/stderr
# echo " `cat filelist/unmerged.list | wc -l` != ${NJOBS}" >> /dev/stderr
# exit 1
# fi
hadd -f /tmp/$USER/${MERGEDFILE} `cat filelist/unmerged.list` || exit 1
# copy the merged file to the repository
# dirname is needed to remove "unmerged" subdir from the path
xrdcp -Nv /tmp/$USER/${MERGEDFILE} ${eosFile} || exit 1
# let's remove the files
if [ -z "${NOREMOVE}" ];then
for file in `cat filelist/unmerged.list`
do
file=`echo $file | sed 's|root://eoscms.cern.ch/||'`
eos.select rm $file
done
if [ -z "${MERGED_REMOTE_DIR}" ];then
eos.select rmdir `dirname ${STORAGE_PATH}/${USER_REMOTE_DIR} | sed 's|root://eoscms/||'`/unmerged
fi
fi
rm filelist/unmerged.list
touch $UI_WORKING_DIR/res/merged_${FILENAME_BASE}
exit 0