From fafa016f92750cad9d46dd0e779bd6b9b910a0da Mon Sep 17 00:00:00 2001
From: HaudinFlorence <haudin.florence@gmail.com>
Date: Mon, 16 Sep 2024 09:34:56 +0200
Subject: [PATCH] Add a function to extract the titles and levels of headings
 from the markdown contents.

---
 nbconvert/exporters/html.py             |  8 ++++-
 nbconvert/exporters/templateexporter.py |  8 +++++
 nbconvert/filters/markdown_mistune.py   | 42 +++++++++++++++++++++++++
 nbconvert/nbconvertapp.py               |  1 +
 share/templates/base/null.j2            |  3 +-
 share/templates/lab/base.html.j2        |  5 +++
 6 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/nbconvert/exporters/html.py b/nbconvert/exporters/html.py
index d63699c0d..f8415ab4a 100644
--- a/nbconvert/exporters/html.py
+++ b/nbconvert/exporters/html.py
@@ -31,7 +31,7 @@
 from nbconvert.utils.iso639_1 import iso639_1
 
 from .templateexporter import TemplateExporter
-
+from nbconvert.filters.markdown_mistune import extract_titles_from_markdown_input
 
 def find_lab_theme(theme_name):
     """
@@ -256,6 +256,12 @@ def from_notebook_node(  # type:ignore[explicit-override, override]
         highlight_code = self.filters.get(
             "highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self)
         )
+        markdown_collection = ""
+        for cell in nb.cells:           
+            if cell.cell_type == 'markdown':
+                markdown_collection = markdown_collection + cell.source + "\n"
+
+        resources["tableofcontents"] = extract_titles_from_markdown_input(markdown_collection)
 
         resources = self._init_resources(resources)
 
diff --git a/nbconvert/exporters/templateexporter.py b/nbconvert/exporters/templateexporter.py
index 42220cbda..baac9f5af 100644
--- a/nbconvert/exporters/templateexporter.py
+++ b/nbconvert/exporters/templateexporter.py
@@ -680,4 +680,12 @@ def get_prefix_root_dirs(self):
     def _init_resources(self, resources):
         resources = super()._init_resources(resources)
         resources["deprecated"] = deprecated
+        resources["include_tableofcontents"] = self.include_tableofcontents
         return resources
+
+    def include_tableofcontents(self, resources):
+        #if len(resources["tableofcontents"])>0:
+            return True
+        #else:
+            #return False
+        
\ No newline at end of file
diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
index 02ab346e0..ee34ddfae 100644
--- a/nbconvert/filters/markdown_mistune.py
+++ b/nbconvert/filters/markdown_mistune.py
@@ -19,6 +19,8 @@
 from pygments.util import ClassNotFound
 
 from nbconvert.filters.strings import add_anchor
+import mistune
+from mistune.renderers.markdown import MarkdownRenderer
 
 try:  # for Mistune >= 3.0
     from mistune import (  # type:ignore[attr-defined]
@@ -487,3 +489,43 @@ def render(self, source: str) -> str:
 def markdown2html_mistune(source: str) -> str:
     """Convert a markdown string to HTML using mistune"""
     return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)
+
+# Custom renderer to capture headings
+class HeadingExtractor(MarkdownRenderer):
+    def __init__(self):
+        super().__init__()
+        self.headings = []
+
+    def heading(self, text, level):
+        self.headings.append((level, text))
+        return ''  # We return an empty string to avoid outputting the headings
+
+    
+def extract_titles_from_markdown_input (markdown_input): 
+    # Markdown_input is a single string with all the markdown content concatenated
+    # Initiate list of titles
+    titles_array = []
+    
+    # Instantiate the custom renderer
+    renderer = HeadingExtractor()
+    
+    # Create a Markdown parser with the custom renderer
+    extract_titles = mistune.create_markdown(renderer=renderer)
+   
+
+    # Parse the Markdown
+    extract_titles(markdown_input)
+   
+    # renderer.headings is an array for each markdown element
+    #print(renderer.headings)
+ 
+    # Extracted headings
+    for level, title in renderer.headings:
+        children = title['children']
+        attrs = title['attrs']
+        raw_text= children[0]['raw']
+        level= attrs['level']
+        titles_array.append([level, raw_text])
+    
+    print(titles_array)    
+    return titles_array
\ No newline at end of file
diff --git a/nbconvert/nbconvertapp.py b/nbconvert/nbconvertapp.py
index cd305afbf..20ec02696 100755
--- a/nbconvert/nbconvertapp.py
+++ b/nbconvert/nbconvertapp.py
@@ -54,6 +54,7 @@ def validate(self, obj, value):
     {
         "to": "NbConvertApp.export_format",
         "template": "TemplateExporter.template_name",
+        "toc": "TemplateExporter.include_tableofcontents",
         "template-file": "TemplateExporter.template_file",
         "theme": "HTMLExporter.theme",
         "sanitize_html": "HTMLExporter.sanitize_html",
diff --git a/share/templates/base/null.j2 b/share/templates/base/null.j2
index 929b18759..547ddb458 100644
--- a/share/templates/base/null.j2
+++ b/share/templates/base/null.j2
@@ -21,10 +21,9 @@ calling super.
 consider calling super even if it is a leaf block, we might insert more blocks later.
 
 #}
-{%- block header -%}
-{%- endblock header -%}
 {%- block body -%}
     {%- block body_header -%}
+        <div>{{"Hello \n\"World\""}}</div>
     {%- endblock body_header -%}
     {%- block body_loop -%}
         {%- for cell in nb.cells -%}
diff --git a/share/templates/lab/base.html.j2 b/share/templates/lab/base.html.j2
index cebe84104..b4001ea2c 100644
--- a/share/templates/lab/base.html.j2
+++ b/share/templates/lab/base.html.j2
@@ -2,6 +2,11 @@
 {% from 'celltags.j2' import celltags %}
 {% from 'cell_id_anchor.j2' import cell_id_anchor %}
 
+{% block tableofcontents %}
+    <div> {{"Table of contents"}} </div>
+
+{% endblock tableofcontents %}
+
 {% block codecell %}
 {%- if not cell.outputs -%}
 {%- set no_output_class="jp-mod-noOutputs" -%}