-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7867 from lrineau/Scripts-improve_add_toc_to_gith…
…ub_wiki_page.py-GF Improve add_toc_to_github_wiki_page.py
- Loading branch information
Showing
1 changed file
with
117 additions
and
108 deletions.
There are no files selected for viewing
225 changes: 117 additions & 108 deletions
225
Scripts/developer_scripts/add_toc_to_github_wiki_page.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,125 +1,134 @@ | ||
from sys import argv | ||
from sys import exit | ||
import codecs | ||
import re | ||
import argparse | ||
import sys | ||
from urllib.parse import quote | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("filename", | ||
help="the Mardown file to process") | ||
parser.add_argument("--codebase", | ||
help="for a Markdown file of Codebase instead of Github", | ||
action="store_true") | ||
parser.add_argument("--h1", | ||
help="support level one sections (h1)", | ||
action="store_true") | ||
parser.add_argument("filename", help="the Markdown file to process") | ||
parser.add_argument( | ||
"--codebase", | ||
help="for a Markdown file of Codebase instead of Github", | ||
action="store_true", | ||
) | ||
parser.add_argument("--h1", help="support level one sections (h1)", action="store_true") | ||
parser.add_argument("--max-level", help="maximum level of sections", type=int, default = 5) | ||
args = parser.parse_args() | ||
|
||
|
||
# a probably incomplete version to generate an anchor from a section name | ||
def get_anchor(s): | ||
s = s.replace("`","") | ||
s = s.replace("(","") | ||
s = s.replace(")","") | ||
s = s.replace(".","") | ||
s = s.replace("#","") | ||
s = s.replace(":","") | ||
s = s.replace(",","") | ||
s = s.replace(";","") | ||
if args.codebase: | ||
s = s.replace("/","-") | ||
else: | ||
s = s.replace("/","") | ||
s = s.replace("<","") | ||
s = s.replace(">","") | ||
s = s.replace("+","") | ||
s = s.replace("=","") | ||
s = s.replace("?","") | ||
s = s.replace("@","") | ||
s = s.lstrip(" ") | ||
s = s.rstrip("\n") | ||
s = s.rstrip(" ") | ||
s = re.sub(r'\s+','-',s) | ||
if not args.codebase: | ||
s = s.lower() | ||
if args.codebase: | ||
s = s.replace("'","-and-39-") | ||
return "#"+s | ||
s = s.replace("`", "") | ||
s = s.replace("(", "") | ||
s = s.replace(")", "") | ||
s = s.replace(".", "") | ||
s = s.replace("#", "") | ||
s = s.replace(":", "") | ||
s = s.replace(",", "") | ||
s = s.replace(";", "") | ||
if args.codebase: | ||
s = s.replace("/", "-") | ||
else: | ||
s = s.replace("/", "") | ||
s = s.replace("<", "") | ||
s = s.replace(">", "") | ||
s = s.replace("+", "") | ||
s = s.replace("=", "") | ||
s = s.replace("?", "") | ||
s = s.replace("@", "") | ||
s = s.lstrip(" ") | ||
s = s.rstrip("\n") | ||
s = s.rstrip(" ") | ||
s = re.sub(r"\s+", "-", s) | ||
if not args.codebase: | ||
s = s.lower() | ||
if args.codebase: | ||
s = s.replace("'", "-and-39-") | ||
return "#" + quote(s) | ||
|
||
|
||
# indices the nesting level (first level allowed is ##) | ||
def get_level(s): | ||
m = re.search('^(#+)\s', s) | ||
if m: | ||
return len(m.group(1)) | ||
else: | ||
return 0 | ||
m = re.search(r"^(#+)\s", s) | ||
if m: | ||
return len(m.group(1)) | ||
else: | ||
return 0 | ||
|
||
|
||
def get_name(s): | ||
m = re.search('^#+\s+(.*)\s*$', s) | ||
if m: | ||
return m.group(1) | ||
else: | ||
return "ERROR: Section name extraction" | ||
m = re.search(r"^#+\s+(.*)\s*$", s) | ||
if m: | ||
return m.group(1) | ||
else: | ||
return "ERROR: Section name extraction" | ||
|
||
|
||
#generate the entry for one section | ||
# generate the entry for one section | ||
def get_toc_entry(s): | ||
name = get_name(s) | ||
if args.h1: | ||
level = get_level(s)-1 | ||
else: | ||
level = get_level(s)-2 | ||
anchor = get_anchor(s) | ||
|
||
if level<0: | ||
return "ERROR: h1 sections are not allowed" | ||
|
||
res="* ["+name+"]("+anchor+")" | ||
for i in range(0,level): | ||
res=" "+res | ||
return res | ||
|
||
#now the main | ||
input = args.filename | ||
|
||
f = codecs.open(input, 'r', encoding='utf-8') | ||
|
||
if not f: | ||
print("Cannot open "+input+"\n") | ||
exit() | ||
|
||
#look for <!--TOC--> the begin of the file | ||
line=f.readline() | ||
if line.find("<!--TOC-->")==-1: | ||
exit() | ||
|
||
#skip current TOC | ||
line=f.readline() | ||
while line and line.find("<!--TOC-->")==-1: | ||
line=f.readline() | ||
|
||
if not line: | ||
exit() | ||
|
||
buffer="" | ||
TOC="<!--TOC-->\n\n# Table of Contents\n" | ||
|
||
verbatim_mode=False # to ignore verbatim mode while looking for sections | ||
TOC_empty=True | ||
for line in f.readlines(): | ||
buffer+=line | ||
if verbatim_mode: | ||
if line[:3]=="```": | ||
verbatim_mode=False | ||
else: | ||
if line[:3]=="```": | ||
verbatim_mode=True | ||
name = get_name(s) | ||
if args.h1: | ||
level = get_level(s) - 1 | ||
else: | ||
if line[0]=="#": | ||
TOC+=(get_toc_entry(line)+"\n") | ||
TOC_empty=False | ||
TOC+="\n<!--TOC-->\n" | ||
|
||
if not TOC_empty: | ||
f.close() | ||
f = codecs.open(input, 'w', encoding='utf-8') | ||
f.write(TOC) | ||
f.write(buffer) | ||
level = get_level(s) - 2 | ||
anchor = get_anchor(s) | ||
|
||
if level < 0: | ||
return "ERROR: h1 sections are not allowed" | ||
|
||
res = "* [" + name + "](" + anchor + ")" | ||
for _ in range(0, level): | ||
res = " " + res | ||
return res | ||
|
||
|
||
# now the main | ||
def main(): | ||
filename = args.filename | ||
|
||
f = codecs.open(filename, "r", encoding="utf-8") | ||
|
||
if not f: | ||
print("Cannot open " + input + "\n") | ||
sys.exit() | ||
|
||
# look for <!--TOC--> the begin of the file | ||
line = f.readline() | ||
if line.find("<!--TOC-->") == -1: | ||
sys.exit() | ||
|
||
# skip current TOC | ||
line = f.readline() | ||
while line and line.find("<!--TOC-->") == -1: | ||
line = f.readline() | ||
|
||
if not line: | ||
sys.exit() | ||
|
||
buffer = "" | ||
toc = "<!--TOC-->\n\n# Table of Contents\n" | ||
|
||
verbatim_mode = False # to ignore verbatim mode while looking for sections | ||
toc_empty = True | ||
for line in f.readlines(): | ||
buffer += line | ||
if verbatim_mode: | ||
if line[:3] == "```": | ||
verbatim_mode = False | ||
else: | ||
if line[:3] == "```": | ||
verbatim_mode = True | ||
else: | ||
if line[0] == "#" and get_level(line) <= args.max_level: | ||
toc += get_toc_entry(line) + "\n" | ||
toc_empty = False | ||
toc += "\n<!--TOC-->\n" | ||
|
||
if not toc_empty: | ||
f.close() | ||
f = codecs.open(filename, "w", encoding="utf-8") | ||
f.write(toc) | ||
f.write(buffer) | ||
|
||
if __name__ == "__main__": | ||
main() |