forked from mattkinsey/bucky
-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_US_data.sh
executable file
·102 lines (88 loc) · 3.68 KB
/
get_US_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/bin/bash
function parse_yaml {
local prefix=$2
local s='[[:space:]]*' w='[a-zA-Z0-9_]*' fs=$(echo @|tr @ '\034')
sed -ne "s|^\($s\):|\1|" \
-e "s|^\($s\)\($w\)$s:${s}[\"']\(.*\)[\"']$s\$|\1$fs\2$fs\3|p" \
-e "s|^\($s\)\($w\)$s:$s\(.*\)$s\$|\1$fs\2$fs\3|p" $1 |
awk -F$fs '{
indent = length($1)/2;
vname[indent] = $2;
for (i in vname) {if (i > indent) {delete vname[i]}}
if (length($3) > 0) {
vn=""; for (i=0; i<indent; i++) {vn=(vn)(vname[i])("_")}
printf("%s%s%s=\"%s\"\n", "'$prefix'",vn, $2, $3);
}
}'
}
base_dir=$(pwd)
eval $(parse_yaml config.yml)
mkdir -p $data_dir
cd $data_dir
# CSSE case data
if [ ! -d "cases/COVID-19" ]; then
mkdir -p cases && pushd cases > /dev/null
echo Cloning CSSE repo
git -c http.sslVerify=false clone https://github.com/CSSEGISandData/COVID-19.git
popd > /dev/null
fi
# Vaccination time series
if [ ! -d "vac/covid19-vaccine-timeseries" ]; then
mkdir -p vac && pushd vac > /dev/null
echo Cloning vaccine timeseries
git -c http.sslVerify=false clone https://github.com/mattkinsey/covid19-vaccine-timeseries
popd > /dev/null
fi
# County level ACIP demographics
if [ ! -d "vac/county-acip-demos" ]; then
mkdir -p vac && pushd vac > /dev/null
echo Cloning ACIP demos
git -c http.sslVerify=false clone https://github.com/mattkinsey/county-acip-demos
popd > /dev/null
fi
# Descartes mobility data
if [ ! -d "mobility/DL-COVID-19" ]; then
mkdir -p mobility && pushd mobility > /dev/null
echo Cloning Descartes Labs mobility data
git -c http.sslVerify=false clone https://github.com/descarteslabs/DL-COVID-19.git
popd > /dev/null
fi
# COVIDExposureIndices mobility data
if [ ! -d "mobility/COVIDExposureIndices" ]; then
mkdir -p mobility && pushd mobility > /dev/null
echo Cloning COVIDExposureIndices mobility data
git -c http.sslVerify=false clone https://github.com/COVIDExposureIndices/COVIDExposureIndices.git
popd > /dev/null
fi
# US TL shapefiles
if [ ! -f "shapefiles/tl_2019_us_county.shp" ]; then
mkdir -p shapefiles && pushd shapefiles > /dev/null
curl -kL https://www2.census.gov/geo/tiger/TIGER2019/COUNTY/tl_2019_us_county.zip --output tl_2019_us_county.zip
unzip -o tl_2019_us_county.zip
#rm tl_2019_us_county.zip
popd > /dev/null
fi
if [ ! -f "shapefiles/tl_2019_us_state.shp" ]; then
mkdir -p shapefiles && pushd shapefiles > /dev/null
curl -kL https://www2.census.gov/geo/tiger/TIGER2019/STATE/tl_2019_us_state.zip --output tl_2019_us_state.zip
unzip -o tl_2019_us_state.zip
#rm tl_2019_us_state.zip
popd > /dev/null
fi
# US Census bridged-race population estimates (age stratified)
#https://www.cdc.gov/nchs/nvss/bridged_race/Documentation-Bridged-PostcenV2018.pdf
if [ ! -f "population/US_pop.csv" ]; then
mkdir -p population && pushd population > /dev/null
[ ! -f "pcen_v2019_y1019.txt.zip" ] && curl -kL https://www.cdc.gov/nchs/nvss/bridged_race/pcen_v2019_y1019_txt.zip --output pcen_v2019_y1019.txt.zip
[ ! -f "pcen_v2019_y1019.csv" ] && unzip -p pcen_v2019_y1019.txt.zip pcen_v2019_y1019.txt |
cut -c 5-11,94-101 | sed "s/./&,/7;s/./&,/5" > pcen_v2019_y1019.csv
[ ! -f "US_pop.csv" ] && PYTHONPATH=$base_dir python -c "from bucky.util.util import bin_age_csv; bin_age_csv('pcen_v2019_y1019.csv','US_pop.csv')"
popd > /dev/null
fi
# Copy included data to data_dir
cp -nR $base_dir/included_data/* .
# Contact matrices
if [ ! -d "contact_matrices_152_countries" ]; then
curl -kL https://doi.org/10.1371/journal.pcbi.1005697.s002 --output journal.pcbi.1005697.s002.zip
unzip journal.pcbi.1005697.s002.zip
fi