From fafa016f92750cad9d46dd0e779bd6b9b910a0da Mon Sep 17 00:00:00 2001 From: HaudinFlorence Date: Mon, 16 Sep 2024 09:34:56 +0200 Subject: [PATCH] Add a function to extract the titles and levels of headings from the markdown contents. --- nbconvert/exporters/html.py | 8 ++++- nbconvert/exporters/templateexporter.py | 8 +++++ nbconvert/filters/markdown_mistune.py | 42 +++++++++++++++++++++++++ nbconvert/nbconvertapp.py | 1 + share/templates/base/null.j2 | 3 +- share/templates/lab/base.html.j2 | 5 +++ 6 files changed, 64 insertions(+), 3 deletions(-) diff --git a/nbconvert/exporters/html.py b/nbconvert/exporters/html.py index d63699c0d..f8415ab4a 100644 --- a/nbconvert/exporters/html.py +++ b/nbconvert/exporters/html.py @@ -31,7 +31,7 @@ from nbconvert.utils.iso639_1 import iso639_1 from .templateexporter import TemplateExporter - +from nbconvert.filters.markdown_mistune import extract_titles_from_markdown_input def find_lab_theme(theme_name): """ @@ -256,6 +256,12 @@ def from_notebook_node( # type:ignore[explicit-override, override] highlight_code = self.filters.get( "highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self) ) + markdown_collection = "" + for cell in nb.cells: + if cell.cell_type == 'markdown': + markdown_collection = markdown_collection + cell.source + "\n" + + resources["tableofcontents"] = extract_titles_from_markdown_input(markdown_collection) resources = self._init_resources(resources) diff --git a/nbconvert/exporters/templateexporter.py b/nbconvert/exporters/templateexporter.py index 42220cbda..baac9f5af 100644 --- a/nbconvert/exporters/templateexporter.py +++ b/nbconvert/exporters/templateexporter.py @@ -680,4 +680,12 @@ def get_prefix_root_dirs(self): def _init_resources(self, resources): resources = super()._init_resources(resources) resources["deprecated"] = deprecated + resources["include_tableofcontents"] = self.include_tableofcontents return resources + + def include_tableofcontents(self, resources): + #if len(resources["tableofcontents"])>0: + return True + #else: + #return False + \ No newline at end of file diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py index 02ab346e0..ee34ddfae 100644 --- a/nbconvert/filters/markdown_mistune.py +++ b/nbconvert/filters/markdown_mistune.py @@ -19,6 +19,8 @@ from pygments.util import ClassNotFound from nbconvert.filters.strings import add_anchor +import mistune +from mistune.renderers.markdown import MarkdownRenderer try: # for Mistune >= 3.0 from mistune import ( # type:ignore[attr-defined] @@ -487,3 +489,43 @@ def render(self, source: str) -> str: def markdown2html_mistune(source: str) -> str: """Convert a markdown string to HTML using mistune""" return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source) + +# Custom renderer to capture headings +class HeadingExtractor(MarkdownRenderer): + def __init__(self): + super().__init__() + self.headings = [] + + def heading(self, text, level): + self.headings.append((level, text)) + return '' # We return an empty string to avoid outputting the headings + + +def extract_titles_from_markdown_input (markdown_input): + # Markdown_input is a single string with all the markdown content concatenated + # Initiate list of titles + titles_array = [] + + # Instantiate the custom renderer + renderer = HeadingExtractor() + + # Create a Markdown parser with the custom renderer + extract_titles = mistune.create_markdown(renderer=renderer) + + + # Parse the Markdown + extract_titles(markdown_input) + + # renderer.headings is an array for each markdown element + #print(renderer.headings) + + # Extracted headings + for level, title in renderer.headings: + children = title['children'] + attrs = title['attrs'] + raw_text= children[0]['raw'] + level= attrs['level'] + titles_array.append([level, raw_text]) + + print(titles_array) + return titles_array \ No newline at end of file diff --git a/nbconvert/nbconvertapp.py b/nbconvert/nbconvertapp.py index cd305afbf..20ec02696 100755 --- a/nbconvert/nbconvertapp.py +++ b/nbconvert/nbconvertapp.py @@ -54,6 +54,7 @@ def validate(self, obj, value): { "to": "NbConvertApp.export_format", "template": "TemplateExporter.template_name", + "toc": "TemplateExporter.include_tableofcontents", "template-file": "TemplateExporter.template_file", "theme": "HTMLExporter.theme", "sanitize_html": "HTMLExporter.sanitize_html", diff --git a/share/templates/base/null.j2 b/share/templates/base/null.j2 index 929b18759..547ddb458 100644 --- a/share/templates/base/null.j2 +++ b/share/templates/base/null.j2 @@ -21,10 +21,9 @@ calling super. consider calling super even if it is a leaf block, we might insert more blocks later. #} -{%- block header -%} -{%- endblock header -%} {%- block body -%} {%- block body_header -%} +
{{"Hello \n\"World\""}}
{%- endblock body_header -%} {%- block body_loop -%} {%- for cell in nb.cells -%} diff --git a/share/templates/lab/base.html.j2 b/share/templates/lab/base.html.j2 index cebe84104..b4001ea2c 100644 --- a/share/templates/lab/base.html.j2 +++ b/share/templates/lab/base.html.j2 @@ -2,6 +2,11 @@ {% from 'celltags.j2' import celltags %} {% from 'cell_id_anchor.j2' import cell_id_anchor %} +{% block tableofcontents %} +
{{"Table of contents"}}
+ +{% endblock tableofcontents %} + {% block codecell %} {%- if not cell.outputs -%} {%- set no_output_class="jp-mod-noOutputs" -%}