-
Notifications
You must be signed in to change notification settings - Fork 0
/
markdownbase.py
130 lines (96 loc) · 4.63 KB
/
markdownbase.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
import markdownpage
import publishtarget
import sourcefile
import re
class MarkdownBase:
"""
A class representing a markdown note folder
"""
def __init__(self, directory):
self.directory = directory
self.source_files = []
self.md_files = []
self.publish_files = []
self.dead_links = []
self.dead_link_note = "" # this will be passed through as properties
def index_source(self, source_directory: str = "", specific_pages: [str] = []) -> None:
"""
Indexes source directory.
Defaults to self.directory; specific filenames can be provided as string list
"""
source_path = os.path.join(self.directory, source_directory)
source_filenames = [file for file in os.listdir(source_path)
if os.path.isfile(os.path.join(source_path, file))]
for filename in source_filenames:
if len(specific_pages) > 0:
if filename not in specific_pages:
continue
self.source_files.append(sourcefile.SourceFile(self.directory, os.path.join(source_directory, filename)))
def build_page_models(self) -> None:
"""
Builds list of all MarkdownPage objects in self.directory.
"""
markdown_sourcefile = [file for file in self.source_files if file.file_suffix == ".md"]
for md in markdown_sourcefile:
mdp = markdownpage.MarkdownPage(md)
mdp.model_pages()
self.md_files.append(mdp)
# mdp.convert_local_refs() # this might need dead links cleaned, in which case, it should be moved
def is_dead_link(self, link_to_check: str) -> bool:
"""
Determines if link_to_check has a valid target.
"""
if link_to_check in self.dead_links:
return True
if link_to_check.ref_target not in [pubfile.source_file.linkified_name for pubfile in self.publish_files]:
self.dead_links.append(link_to_check)
return True
def collate_dead_links(self) -> dict:
"""
Returns dict mapping of SourceFile-to-publish to any dead Links.
"""
page_dead_links = {}
for pub_file in self.publish_files:
dead_links = []
for link in [links for links in pub_file.body_links if links.is_local_ref()]:
if self.is_dead_link(link):
dead_links.append(link)
if len(dead_links) > 0:
page_dead_links[pub_file] = dead_links
return page_dead_links
def deactivate_dead_links(self, dead_links: dict):
"""
Deactivates dead links.
"""
for markdown_page, dead_links in dead_links.items():
for link in dead_links:
if len(self.dead_link_note) > 0:
replacement_text = f"_{link.text}_[*]({self.dead_link_note})"
else:
replacement_text = f"_{link.text}_)"
link_regex = re.compile(rf"\[{link.text}\]\({link.ref_target}\)")
# link_regex = re.compile(rf"\[.*{link.text}.*\]\(.*{link.ref_target}.*\)")
new_body_text = re.sub(link_regex, replacement_text, markdown_page.body_text)
# print(f"remove from {markdown_page.source_file.filename}: {link.ref_target}")
markdown_page.body_text = new_body_text
def sanitise_dead_links(self, dead_link_note):
self.dead_link_note = dead_link_note
self.deactivate_dead_links(self.collate_dead_links())
def define_publish_list(self):
"""Filters md_files' MarkdownPage objects leaving only those publish."""
# TODO:
# * Possibly refactor to return the filtered list
# * Consider making this a dict {MarkdownPage: [Reference]}
# This will allow attachments not referenced by publish pages to be omitted from publishing.
for pub_file in [md_file for md_file in self.md_files if md_file.publish]:
self.publish_files.append(pub_file)
def create_publish_target(self, pub_path: str):
# TODO: Only publish attachments for published pages (parse published pages to find)
attachments = [file for file in self.source_files if file.file_suffix != ".md"]
return publishtarget.PublishTarget(pub_path, self.publish_files, attachments)
def output_exceptions(self, publish=False):
for file in [files for files in self.md_files if files.publish == publish and len(files.parse_exceptions) > 0]:
print(file.source_file.filename)
for exception in file.parse_exceptions:
print(f'- {exception}')