-
Notifications
You must be signed in to change notification settings - Fork 0
/
makeDNASpreadsheet.sh
executable file
·89 lines (80 loc) · 2.88 KB
/
makeDNASpreadsheet.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env bash
# Process most recently downloaded DNA Match Manager spreadsheets and generate diffs
DATE="$(date +%Y-%m-%d)"
LONGDATE="$(date +%Y-%m-%d.%H%M%S)"
# Make sure we are in the correct directory
DIRNAME=$(dirname "$0")
cd $DIRNAME
# use "-m" to change minimum cMs, for example:
# ./makeDNASpreadsheet.sh -m 19.5
# use .5 less than you actually want because cMs are rounded up on the Ancestry web page
# default is 100 cMs
cMs_min=99.5
while getopts ":m:" opt; do
case $opt in
m)
cMs_min="$OPTARG"
;;
\?)
echo "[Warning] Ignoring invalid option: -$OPTARG" >&2
;;
:)
echo "[Error] Option -$OPTARG requires an argument" >&2
exit 1
;;
esac
done
# Generated spreadsheets
RELATIVES_NEW="Relatives-$LONGDATE.csv"
RELATIVES_TMP="Relatives-$LONGDATE.tmp"
ADDITIONS_NEW="Additions-$LONGDATE.csv"
# Make sure $RELATIVES_TMP exists and is empty
rm -f $RELATIVES_TMP
touch $RELATIVES_TMP
# Latest previously generated spreadsheet
find . -maxdepth 1 -name "Relatives-*.csv" | grep -q '^.'
if [ $? == 0 ]; then
RELATIVES_CURRENT=$(find Relatives-*csv | head -1)
fi
# Process files downloaded by DNA Match Manager
# Use the latest .csv files with names containing one of these strings
KEYS=(23andMe Ancestry FTDNA GEDmatch MyHeritage AllSites)
#
echo "==> Processing these current files:"
for i in "${KEYS[@]}"; do
target="*$i*.csv"
find . -maxdepth 1 -name "$target" | grep -q '^.'
if [ $? == 0 ]; then
CURRENT_FILE=$(find $target | head -1)
echo $CURRENT_FILE
awk -v cMs_min=$cMs_min -f getFieldsFromDNA.awk $CURRENT_FILE >>$RELATIVES_TMP
fi
done
echo ""
echo "==> Producing this new spreadsheet:"
echo "$RELATIVES_NEW"
# Grab generated header line and put it at top of new spreadsheet
grep -m 1 "^Source Site" $RELATIVES_TMP >$RELATIVES_NEW
### To add a column change the following 2 comment lines. Ditto in getFieldsFromDNA.awk
# 1 2 3 4 5 6 7 8 9 10
# Source Name Email Side Gender Pct cMs Segs Longest Relationship
# Sort by cMs, Pct, and then Name
### To add or move a column change the field numbers in the following statement
grep -v "^Source Site" $RELATIVES_TMP |
sort -u | sort --field-separator=$'\t' --key=7,7nr --key=6,6nr -f --key=2,2 >>$RELATIVES_NEW
rm -f $RELATIVES_TMP
# If there is no current spreadsheet to compare against then exit
[ -z "$RELATIVES_CURRENT" ] && exit
diff -q $RELATIVES_CURRENT $RELATIVES_NEW >/dev/null 2>&1
if [ $? == 1 ]; then
echo ""
echo "==> Differences from the previous run:"
diff -U 1 $RELATIVES_CURRENT $RELATIVES_NEW
echo ""
echo "==> These additions from the previous run have been saved in $ADDITIONS_NEW"
diff $RELATIVES_CURRENT $RELATIVES_NEW | grep "^>" | cut -c 3- >$ADDITIONS_NEW
cat $ADDITIONS_NEW
else
echo ""
echo "==> No differences from previous run."
fi