Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve add_toc_to_github_wiki_page.py #7867

Merged
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 98 additions & 93 deletions Scripts/developer_scripts/add_toc_to_github_wiki_page.py
Original file line number Diff line number Diff line change
@@ -1,125 +1,130 @@
from sys import argv
from sys import exit
import codecs
import re
import argparse
import sys
from urllib.parse import quote

parser = argparse.ArgumentParser()
parser.add_argument("filename",
help="the Mardown file to process")
parser.add_argument("--codebase",
help="for a Markdown file of Codebase instead of Github",
action="store_true")
parser.add_argument("--h1",
help="support level one sections (h1)",
action="store_true")
parser.add_argument("filename", help="the Mardown file to process")
parser.add_argument(
"--codebase",
help="for a Markdown file of Codebase instead of Github",
action="store_true",
lrineau marked this conversation as resolved.
Show resolved Hide resolved
)
parser.add_argument("--h1", help="support level one sections (h1)", action="store_true")
parser.add_argument("--max-level", help="maximum level of sections", type=int, default = 5)
args = parser.parse_args()


# a probably incomplete version to generate an anchor from a section name
def get_anchor(s):
s = s.replace("`","")
s = s.replace("(","")
s = s.replace(")","")
s = s.replace(".","")
s = s.replace("#","")
s = s.replace(":","")
s = s.replace(",","")
s = s.replace(";","")
if args.codebase:
s = s.replace("/","-")
else:
s = s.replace("/","")
s = s.replace("<","")
s = s.replace(">","")
s = s.replace("+","")
s = s.replace("=","")
s = s.replace("?","")
s = s.replace("@","")
s = s.lstrip(" ")
s = s.rstrip("\n")
s = s.rstrip(" ")
s = re.sub(r'\s+','-',s)
if not args.codebase:
s = s.lower()
if args.codebase:
s = s.replace("'","-and-39-")
return "#"+s
s = s.replace("`", "")
s = s.replace("(", "")
s = s.replace(")", "")
s = s.replace(".", "")
s = s.replace("#", "")
s = s.replace(":", "")
s = s.replace(",", "")
s = s.replace(";", "")
if args.codebase:
s = s.replace("/", "-")
else:
s = s.replace("/", "")
s = s.replace("<", "")
s = s.replace(">", "")
s = s.replace("+", "")
s = s.replace("=", "")
s = s.replace("?", "")
s = s.replace("@", "")
s = s.lstrip(" ")
s = s.rstrip("\n")
s = s.rstrip(" ")
s = re.sub(r"\s+", "-", s)
if not args.codebase:
s = s.lower()
if args.codebase:
s = s.replace("'", "-and-39-")
return "#" + quote(s)


# indices the nesting level (first level allowed is ##)
def get_level(s):
m = re.search('^(#+)\s', s)
if m:
return len(m.group(1))
else:
return 0
m = re.search(r"^(#+)\s", s)
if m:
return len(m.group(1))
else:
return 0


def get_name(s):
m = re.search('^#+\s+(.*)\s*$', s)
if m:
return m.group(1)
else:
return "ERROR: Section name extraction"
m = re.search(r"^#+\s+(.*)\s*$", s)
if m:
return m.group(1)
else:
return "ERROR: Section name extraction"


#generate the entry for one section
# generate the entry for one section
def get_toc_entry(s):
name = get_name(s)
if args.h1:
level = get_level(s)-1
else:
level = get_level(s)-2
anchor = get_anchor(s)
name = get_name(s)
if args.h1:
level = get_level(s) - 1
else:
level = get_level(s) - 2
anchor = get_anchor(s)

if level < 0:
return "ERROR: h1 sections are not allowed"

if level<0:
return "ERROR: h1 sections are not allowed"
res = "* [" + name + "](" + anchor + ")"
for _ in range(0, level):
res = " " + res
return res

res="* ["+name+"]("+anchor+")"
for i in range(0,level):
res=" "+res
return res

#now the main
input = args.filename
# now the main
filename = args.filename

f = codecs.open(input, 'r', encoding='utf-8')
f = codecs.open(filename, "r", encoding="utf-8")

if not f:
print("Cannot open "+input+"\n")
exit()
print("Cannot open " + input + "\n")
sys.exit()

#look for <!--TOC--> the begin of the file
line=f.readline()
if line.find("<!--TOC-->")==-1:
exit()
# look for <!--TOC--> the begin of the file
line = f.readline()
if line.find("<!--TOC-->") == -1:
sys.exit()

#skip current TOC
line=f.readline()
while line and line.find("<!--TOC-->")==-1:
line=f.readline()
# skip current TOC
line = f.readline()
while line and line.find("<!--TOC-->") == -1:
line = f.readline()

if not line:
exit()
sys.exit()

buffer=""
TOC="<!--TOC-->\n\n# Table of Contents\n"
buffer = ""
TOC = "<!--TOC-->\n\n# Table of Contents\n"

verbatim_mode=False # to ignore verbatim mode while looking for sections
TOC_empty=True
verbatim_mode = False # to ignore verbatim mode while looking for sections
TOC_empty = True
for line in f.readlines():
buffer+=line
if verbatim_mode:
if line[:3]=="```":
verbatim_mode=False
else:
if line[:3]=="```":
verbatim_mode=True
buffer += line
if verbatim_mode:
if line[:3] == "```":
verbatim_mode = False
else:
if line[0]=="#":
TOC+=(get_toc_entry(line)+"\n")
TOC_empty=False
TOC+="\n<!--TOC-->\n"
if line[:3] == "```":
verbatim_mode = True
else:
if line[0] == "#" and get_level(line) <= args.max_level:
TOC += get_toc_entry(line) + "\n"
TOC_empty = False
TOC += "\n<!--TOC-->\n"

if not TOC_empty:
f.close()
f = codecs.open(input, 'w', encoding='utf-8')
f.write(TOC)
f.write(buffer)
f.close()
f = codecs.open(filename, "w", encoding="utf-8")
f.write(TOC)
f.write(buffer)