-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsync_caches.sh
executable file
·207 lines (163 loc) · 5.8 KB
/
sync_caches.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/bin/bash
# Created : 2023-07-07
# Modified : 2024-10-11
# Syncs caches with two remote servers.
# This script should be run from our local machine.
if [ "${USER}" == "" ];
then
# Running through cron.
USER=$(whoami);
fi;
DIR="$(dirname $0)";
echo "Retrieving remote caches...";
for HOST in kg-web01 web01;
do
# First, retrieve caches.
RAND="sync_$(echo $RANDOM | md5sum | cut -b 1-10)";
mkdir "${DIR}/${RAND}";
if [ $? -ne 0 ];
then
echo "Couldn't create temp directory $RAND.";
exit 1;
fi;
rsync -aq "${HOST}:/home/${USER}/git/caches/NC_cache.txt" \
":/home/${USER}/git/caches/mapping_cache.txt" \
"${DIR}/${RAND}";
if [ $? -ne 0 ];
then
echo "Couldn't download caches from ${HOST}.";
exit 2;
fi;
done;
echo "";
# Now, clean caches.
echo "Cleaning NC cache...";
FILES=$(find "${DIR}" -name NC_cache.txt);
cat $FILES | sort | uniq > "${DIR}/NC_cache.new.txt";
# The new cache is allowed to be slightly longer or slightly shorter than the original cache.
OLD=$(cat "${DIR}/NC_cache.txt" | wc -l);
NEW=$(cat "${DIR}/NC_cache.new.txt" | wc -l);
DIFF=$(calc -dp ${NEW}00 / $OLD | cut -d . -f 1);
if [ "${DIFF}" -ge 99 ] && [ "${DIFF}" -le 101 ];
then
echo "Overlap between new file and old file ${DIFF}%; accepted.";
else
echo "New file differs too much from old file; ${DIFF}% overlap.";
exit 3;
fi;
mv -f "${DIR}/NC_cache.new.txt" "${DIR}/NC_cache.txt";
if [ $? -ne 0 ];
then
echo "Couldn't overwrite the new cache file.";
exit 4;
fi;
# grep needs an -F in case DIR is "./".
echo "$FILES" | grep -vF "${DIR}/NC" | xargs rm;
if [ $? -ne 0 ];
then
echo "Couldn't delete temp files.";
exit 5;
fi;
echo "";
# Clean the mapping cache, multiple times if needed.
FILES=$(find "${DIR}" -name mapping_cache.txt);
cat $FILES | sort | uniq > "${DIR}/mapping_cache.sorted.txt";
while (true);
do
echo "Cleaning mapping cache...";
# First, find all the repeated variants.
cut -f 1 "${DIR}/mapping_cache.sorted.txt" | uniq -c | grep -vE "^\s+1\s" | awk '{print $2}' > "${DIR}/tmp.repeated_vars.txt";
echo -n "Repeated vars: ";
cat "${DIR}/tmp.repeated_vars.txt" | wc -l;
# Then, for each repeated variant, try to be intelligent about which one to toss.
cat "${DIR}/tmp.repeated_vars.txt" | while IFS='' read -r variant;
do
matches=$(grep -m2 "${variant}\s" "${DIR}/mapping_cache.sorted.txt");
if [ $(echo "${matches}" | grep -v VV | wc -l) != "0" ];
then
# Remove the one that doesn't have an VV mapping.
# We assume here, that we have verified the cache and
# therefore *ALL* variants should have the VV method.
echo "${matches}" | grep -v VV;
elif [ "$(echo "${matches}" | grep numberConversion | wc -l)" -eq "1" ];
then
# So, both have VV, and only one has numberConversion. Toss the one without the numberConversion.
echo "${matches}" | grep -v numberConversion;
else
# This can be problematic. I once had this, when I had manually edited a line with VV.
# We then ended up with two lines with VV here, that were different, and both got removed.
# Prevent issues by showing the user when this happens.
echo "Warning: Can't decide between which lines to keep. I will remove both lines." >&2;
echo "Re-add whatever line you wish to keep to the cleaned mapping cache file." >&2;
echo "${matches}" >&2;
echo "${matches}";
fi
done > "${DIR}/tmp.lines_to_be_deleted.txt";
echo -n "Lines to be deleted: ";
COUNT=$(cat "${DIR}/tmp.lines_to_be_deleted.txt" | wc -l);
echo $COUNT;
# Check...
if [ $COUNT -gt 0 ];
then
# Finally, take the mapping cache and remove these lines.
comm -2 -3 "${DIR}/mapping_cache.sorted.txt" "${DIR}/tmp.lines_to_be_deleted.txt" > "${DIR}/mapping_cache.new.txt";
# The new cache is allowed to be slightly longer or slightly shorter than the original cache.
OLD=$(cat "${DIR}/mapping_cache.txt" | wc -l);
NEW=$(cat "${DIR}/mapping_cache.new.txt" | wc -l);
DIFF=$(calc -dp ${NEW}00 / $OLD | cut -d . -f 1);
if [ "${DIFF}" -ge 99 ] && [ "${DIFF}" -le 101 ];
then
echo "Overlap between new file and old file ${DIFF}%; accepted.";
else
echo "New file differs too much from old file; ${DIFF}% overlap.";
exit 6;
fi;
# Overwrite the cache.
mv -f "${DIR}/mapping_cache.new.txt" "${DIR}/mapping_cache.txt";
if [ $? -ne 0 ];
then
echo "Couldn't overwrite the new cache file.";
exit 7;
fi;
fi;
# Clean up.
rm "${DIR}/mapping_cache.sorted.txt" "${DIR}/tmp.repeated_vars.txt" "${DIR}/tmp.lines_to_be_deleted.txt";
if [ $COUNT -eq 0 ];
then
# We're done!
break;
else
# We'll have to go another round.
cp -p "${DIR}/mapping_cache.txt" "${DIR}/mapping_cache.sorted.txt";
fi;
done;
# grep needs an -F in case DIR is "./".
echo "$FILES" | grep -vF "${DIR}/mapping" | xargs rm;
if [ $? -ne 0 ];
then
echo "Couldn't delete temp files.";
exit 8;
fi;
echo "";
# We should now be done, clean up the temp directories.
find "${DIR}" -type d -empty -name "sync_??????????" | xargs rmdir;
if [ $? -ne 0 ];
then
echo "Couldn't delete temp directories.";
exit 9;
fi;
# Pushing caches to the remote servers...
echo "Pushing caches to remote servers...";
for HOST in kg-web01 web01;
do
rsync -aq "${DIR}/NC_cache.txt" \
"${DIR}/mapping_cache.txt" \
"${HOST}:/home/${USER}/git/caches/";
if [ $? -ne 0 ];
then
echo "Couldn't push caches to ${HOST}.";
exit 10;
fi;
done;
echo "done!";
echo "";