-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcheck_links.py
executable file
·36 lines (31 loc) · 1.01 KB
/
check_links.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python3
import re
import sys
if len(sys.argv) < 2:
print("Usage: check_links.py in.md")
rx_header = re.compile("^#+ (.+)$")
set_header = set()
rx_link = re.compile(".*]\(#(.+?)\).*")
set_link = set()
with open(sys.argv[1]) as f:
for l in f:
# get available header links
hr = rx_header.match(l.strip())
if hr:
h = hr.group(1).lower()
# NOTE: This is for GitHub.
# h = re.sub("[\(\).:&]", "", h)
# h = re.sub(" ", "-", h.strip())
# NOTE: Heroku works slightly different. It seems to replace special
# characters with a space and then squashes consecutive spaces
# (just a guess).
h = re.sub("[\(\).:&\+]", " ", h)
h = re.sub("\s+", "-", h.strip())
set_header.add(h)
# get used references
lr = rx_link.match(l)
if lr:
set_link.add(lr.group(1))
for l in set_link:
if l not in set_header:
print("Dangling link found: " + l)