-
Notifications
You must be signed in to change notification settings - Fork 0
/
shared.py
35 lines (30 loc) · 1.11 KB
/
shared.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import re
import pickle
from os import path
def is_valid(url):
validity_check = r"/year"
if len(re.findall(validity_check,url))>0:
return False
return url.startswith("https://supreme.justia.com/cases/federal/us")
def strip_url(url):
if not url.startswith('http'):
url = "https://supreme.justia.com" + url
endings = [r"/\w*.html",r"/\w*.pdf"]
for end in endings:
url = re.split(end,url)[0]
if url[-1] == '/':
url = url[:-1]
return url
PICKLE_EXTENSION = ".pickle"
def dumpout_struct(name,struct,folder="data"):
name = name.replace(PICKLE_EXTENSION,'')
output_file = open(f'{folder}/{name}{PICKLE_EXTENSION}', 'wb+')
pickle.dump(struct,output_file)
def readin_struct(name,default=None,folder="data"):
name = name.replace(PICKLE_EXTENSION,'')
if not path.exists(f'{folder}/{name}{PICKLE_EXTENSION}'):
if default is None:
raise Exception(f"File {name} does not exist, and no default provided!")
return default,False
struct_file = open(f'{folder}/{name}{PICKLE_EXTENSION}','rb')
return pickle.load(struct_file),True