-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtest_urls.py
115 lines (94 loc) · 3.54 KB
/
test_urls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import re
import requests
import time
from pathlib import Path
from urllib.parse import urlparse
import pytest
re_url = re.compile(r'[<"](https://github.*|https://raw.githubusercontent[^>"]*)[>"]')
root_dirs = ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"]
def get_urls(root_dirs):
"""
Get URLs from .ttl files in specified directories.
"""
urls = []
for root_dir in root_dirs:
for file_path in Path(root_dir).rglob("*.ttl"):
for url in re_url.findall(file_path.read_text(encoding="utf8")):
urls.append((file_path, url.strip('<">'), root_dir))
return urls
def request_url(method, url):
"""
Make HTTP request to the given URL with retries.
"""
for i in range(1, 4):
ret = method(url)
if ret.status_code != 429:
break
backoff = int(ret.headers["Retry-After"])
if backoff > 100:
backoff = 100
time.sleep(i * backoff)
return ret
def extract_relative_path(url, root_dir):
"""
Extracts the relative path from the URL based on the root directory.
Returns None if an error occurs.
"""
# Check if root_dir is present in the URL
if root_dir not in url:
return None
# Find the index of the root_dir in the URL
start_index = url.find(root_dir)
if start_index == -1:
return None
# Extract relative path from start_index
relative_path = url[start_index:]
return relative_path
def check_local_file_exists(file_path):
"""
Check if the file exists locally
If the file exists locally, it will exist when a PR is merged
"""
return os.path.exists(file_path)
def check_repository_existence(url):
# Extract the username and repository name from the URL
parsed_url = urlparse(url)
path_parts = parsed_url.path.split("/")
username = path_parts[1]
repository = path_parts[2]
# GET request to verify the existence of the repository
response = requests.get(f"https://api.github.com/repos/{username}/{repository}")
# The repo exists on Github
if response.status_code == 200:
return True
# The repo doesn't exist on Github
else:
return False
@pytest.mark.skipif(all(not os.path.exists(root_dir) for root_dir in root_dirs), reason="No root directories found")
def test_url():
print("Starting URL test...")
errors = []
# Check if root_dir exist
for root_dir in root_dirs:
if not os.path.exists(root_dir):
print(f"WARNING: root directory '{root_dir}' does not exist.")
for file_path, url, root_dir in get_urls(root_dirs):
ret = request_url(requests.head, url)
# Check if the response status code is 200 or 301 (redirect)
if ret.status_code not in [200, 301]:
relative_path = extract_relative_path(url, root_dir)
if relative_path:
local_file_exists = check_local_file_exists(relative_path)
github_repo_exists = check_repository_existence(url)
if not (local_file_exists and github_repo_exists):
errors.append(f"ERROR: URL '{url}' in file '{file_path}' is not accessible, and the corresponding local file does not exist.")
else:
errors.append(f"ERROR: the corresponding local file of url '{url}' in file '{file_path}' does not exist, root_dir '{root_dir}' is different")
if errors:
print("\nErrors found during URL test:")
for error in errors:
print(error)
assert False
# Run test
test_url()