Skip to content

Commit

Permalink
Add a function to extract the titles and levels of headings from the …
Browse files Browse the repository at this point in the history
…markdown contents.
  • Loading branch information
HaudinFlorence committed Sep 16, 2024
1 parent e159962 commit fafa016
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 3 deletions.
8 changes: 7 additions & 1 deletion nbconvert/exporters/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from nbconvert.utils.iso639_1 import iso639_1

from .templateexporter import TemplateExporter

from nbconvert.filters.markdown_mistune import extract_titles_from_markdown_input

def find_lab_theme(theme_name):
"""
Expand Down Expand Up @@ -256,6 +256,12 @@ def from_notebook_node( # type:ignore[explicit-override, override]
highlight_code = self.filters.get(
"highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self)
)
markdown_collection = ""
for cell in nb.cells:
if cell.cell_type == 'markdown':
markdown_collection = markdown_collection + cell.source + "\n"

resources["tableofcontents"] = extract_titles_from_markdown_input(markdown_collection)

resources = self._init_resources(resources)

Expand Down
8 changes: 8 additions & 0 deletions nbconvert/exporters/templateexporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,4 +680,12 @@ def get_prefix_root_dirs(self):
def _init_resources(self, resources):
resources = super()._init_resources(resources)
resources["deprecated"] = deprecated
resources["include_tableofcontents"] = self.include_tableofcontents
return resources

def include_tableofcontents(self, resources):
#if len(resources["tableofcontents"])>0:
return True
#else:
#return False

42 changes: 42 additions & 0 deletions nbconvert/filters/markdown_mistune.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from pygments.util import ClassNotFound

from nbconvert.filters.strings import add_anchor
import mistune
from mistune.renderers.markdown import MarkdownRenderer

try: # for Mistune >= 3.0
from mistune import ( # type:ignore[attr-defined]
Expand Down Expand Up @@ -487,3 +489,43 @@ def render(self, source: str) -> str:
def markdown2html_mistune(source: str) -> str:
"""Convert a markdown string to HTML using mistune"""
return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)

# Custom renderer to capture headings
class HeadingExtractor(MarkdownRenderer):
def __init__(self):
super().__init__()
self.headings = []

def heading(self, text, level):
self.headings.append((level, text))
return '' # We return an empty string to avoid outputting the headings


def extract_titles_from_markdown_input (markdown_input):
# Markdown_input is a single string with all the markdown content concatenated
# Initiate list of titles
titles_array = []

# Instantiate the custom renderer
renderer = HeadingExtractor()

# Create a Markdown parser with the custom renderer
extract_titles = mistune.create_markdown(renderer=renderer)


# Parse the Markdown
extract_titles(markdown_input)

# renderer.headings is an array for each markdown element
#print(renderer.headings)

# Extracted headings
for level, title in renderer.headings:
children = title['children']
attrs = title['attrs']
raw_text= children[0]['raw']
level= attrs['level']
titles_array.append([level, raw_text])

print(titles_array)
return titles_array
1 change: 1 addition & 0 deletions nbconvert/nbconvertapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def validate(self, obj, value):
{
"to": "NbConvertApp.export_format",
"template": "TemplateExporter.template_name",
"toc": "TemplateExporter.include_tableofcontents",
"template-file": "TemplateExporter.template_file",
"theme": "HTMLExporter.theme",
"sanitize_html": "HTMLExporter.sanitize_html",
Expand Down
3 changes: 1 addition & 2 deletions share/templates/base/null.j2
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ calling super.
consider calling super even if it is a leaf block, we might insert more blocks later.
#}
{%- block header -%}
{%- endblock header -%}
{%- block body -%}
{%- block body_header -%}
<div>{{"Hello \n\"World\""}}</div>
{%- endblock body_header -%}
{%- block body_loop -%}
{%- for cell in nb.cells -%}
Expand Down
5 changes: 5 additions & 0 deletions share/templates/lab/base.html.j2
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
{% from 'celltags.j2' import celltags %}
{% from 'cell_id_anchor.j2' import cell_id_anchor %}

{% block tableofcontents %}
<div> {{"Table of contents"}} </div>

{% endblock tableofcontents %}

{% block codecell %}
{%- if not cell.outputs -%}
{%- set no_output_class="jp-mod-noOutputs" -%}
Expand Down

0 comments on commit fafa016

Please sign in to comment.