-
Notifications
You must be signed in to change notification settings - Fork 0
/
regen_sitemap.py
74 lines (47 loc) · 2.27 KB
/
regen_sitemap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from datetime import date
#todayobj = date.today()
#todays_datestring = "%s-%s-0%s" % (todayobj.year, todayobj.month, todayobj.day)
#datestring = todays_datestring
datestring = "2022-11-06"
print(datestring)
url_base = "https://apps.npr.org/election-results-live-2022/"
states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']
paths = ["", "#/house", "#/senate", "#/governor", "#/president", ]
social_offices = ["share/governor.html", "share/house.html", "share/senate.html"]
state_paths = ['', '/key', '/S', '/H','/I', '/G']
header = """<?xml version="1.0" encoding="UTF-8"?>"""
urlset_start = """<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">"""
urlset_end = """</urlset>\n"""
#url_frag = """\t<url>\n\t\t<loc>%s</loc>\n\t\t<lastmod>%s</lastmod>\n\t\t<changefreq>hourly</changefreq>\n\t</url>"""
url_frag = """\t<url>\n\t\t<loc>%s</loc>\n\t\t<lastmod>%s</lastmod>\n\t\t<changefreq>daily</changefreq>\n\t</url>"""
url_frag_nothourly = """\t<url>\n\t\t<loc>%s</loc>\n\t\t<lastmod>%s</lastmod>\n\t\t<changefreq>weekly</changefreq>\n\t</url>"""
url_count = 0
outfile = open("sitemap.xml", 'w')
outfile.write(header + "\n")
outfile.write(urlset_start + "\n")
for path in paths:
this_url = url_base+path
print(this_url)
this_sitemap_url = url_frag % (this_url, datestring)
outfile.write( this_sitemap_url + "\n")
url_count += 1
for path in social_offices:
this_url = url_base+path
print(this_url)
this_sitemap_url = url_frag_nothourly % (this_url, datestring)
outfile.write( this_sitemap_url + "\n")
url_count += 1
for state in states:
for suffix in state_paths:
this_url = url_base + "#/states/" + state + suffix
print(this_url)
this_sitemap_url = url_frag % (this_url, datestring)
outfile.write( this_sitemap_url + "\n")
url_count += 1
social_url = url_base + "share/" + state + ".html"
print(social_url)
social_sitemap_url = url_frag_nothourly % (social_url, datestring)
outfile.write( social_sitemap_url + "\n")
url_count += 1
outfile.write(urlset_end)
print("Wrote a total of %s urls" % url_count)