compile includes

raspberrypi · Sep 29, 2023 · c7c4199 · c7c4199
1 parent 8703440
commit c7c4199
Showing 1 changed file with 106 additions and 63 deletions.
diff --git a/scripts/transform_doxygen_html.py b/scripts/transform_doxygen_html.py
@@ -307,6 +307,28 @@ def make_filename_id(filename):
     print("ERROR: ", e, exc_tb.tb_lineno)
   return my_id
 
+def find_item_in_toc(toc_data, filename):
+  try:
+    found = False
+    matching_file = None
+    for item in toc_data:
+      if item == filename:
+        matching_file = item
+        found = True
+        break
+      else:
+        for k, v in toc_data[item].items():
+          found = find_item_in_dict(k,v,filename)
+          if found == True:
+            matching_file = item
+            break
+        if found == True:
+          break
+  except Exception as e:
+    exc_type, exc_obj, exc_tb = sys.exc_info()
+    print("ERROR: ", e, exc_tb.tb_lineno)
+  return matching_file
+
 def fix_external_links(root, toc_data):
   try:
     matches = root.xpath(".//a[@href]")
@@ -319,21 +341,7 @@ def fix_external_links(root, toc_data):
           filename = href.split("#")[0]
           target_id = href.split("#")[1]
         # walk the toc data to find the main html file
-        found = False
-        parent_file = None
-        for item in toc_data:
-          if item == filename:
-            parent_file = item
-            found = True
-            break
-          else:
-            for k, v in toc_data[item].items():
-              found = find_item_in_dict(k,v,filename)
-              if found == True:
-                parent_file = item
-                break
-            if found == True:
-              break
+        parent_file = find_item_in_toc(toc_data, filename)
         if parent_file is not None:
           parent_file_dest = re.sub("^group__", "", parent_file)
           new_href = parent_file_dest
@@ -604,13 +612,39 @@ def compile_json_mappings(json_dir, json_files):
     print("ERROR: ", e, exc_tb.tb_lineno)
   return compiled
 
-def walk_json(k,v,group_adoc):
+def compile_includes(my_adoc, output_path, v):
+  try:
+    for sk, sv in v.items():
+      # append includes directly to the parent file
+      adoc_filename = re.sub("html$", "adoc", sk)
+      full_adoc_path = os.path.join(output_path, adoc_filename)
+      # read the adoc
+      included_content = ""
+      with open(full_adoc_path) as f:
+        included_content = f.read()
+      my_adoc += "\n\n"
+      my_adoc += included_content
+      if len(sv) > 0:
+        my_adoc = compile_includes(my_adoc, output_path, sv)
+      os.remove(full_adoc_path)
+  except Exception as e:
+    exc_type, exc_obj, exc_tb = sys.exc_info()
+    print("ERROR: ", e, exc_tb.tb_lineno)
+  return my_adoc
+
+def walk_json(k,v,group_adoc, output_path):
   try:
     filename = re.sub("html$", "adoc", k)
     group_adoc = group_adoc + "include::" + filename + "[]\n\n"
     if len(v) > 0:
-      for sk, sv in v.items():
-        group_adoc = walk_json(sk,sv,group_adoc)
+      # compile includes into a single file
+      my_adoc = ""
+      my_adoc_path = os.path.join(output_path, filename)
+      with open(my_adoc_path) as f:
+        my_adoc = f.read()
+      my_adoc = compile_includes(my_adoc, output_path, v)
+      # write the new file
+      write_output(my_adoc_path, my_adoc)
   except Exception as e:
     exc_type, exc_obj, exc_tb = sys.exc_info()
     print("ERROR: ", e, exc_tb.tb_lineno)
@@ -640,6 +674,57 @@ def walk_nested_adoc(k, v, output_path, level):
     print("ERROR: ", e, exc_tb.tb_lineno)
   return level
 
+def parse_indiviual_file(html_path, html_file, complete_json_mappings, updated_links, toc_data):
+  try:
+    # create the full path
+    this_path = os.path.join(html_path, html_file)
+    # read the input root
+    with open(this_path) as h:
+      html_content = h.read()
+      html_content = re.sub('<\!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD XHTML 1\.0 Transitional\/\/EN" "https:\/\/www\.w3\.org\/TR\/xhtml1\/DTD\/xhtml1-transitional\.dtd">', '', html_content)
+      html_content = re.sub('rel="stylesheet">', 'rel="stylesheet"/>', html_content)
+      html_content = re.sub('&display=swap"', '"', html_content)
+      html_content = re.sub('<img src="logo-mobile\.svg" alt="Raspberry Pi">', '', html_content)
+      html_content = re.sub('<img src="logo\.svg" alt="Raspberry Pi">', '', html_content)
+      html_content = re.sub("<\!-- HTML header for doxygen \S*?-->", '', html_content)
+      html_content = re.sub(' xmlns="http://www.w3.org/1999/xhtml"', '', html_content)
+      root = etree.HTML(html_content)
+
+    # give everything an id
+    root = add_ids(root, html_file)
+    # loop over each json file
+    skip = ["table_memname.json"]
+    for mapping in complete_json_mappings:
+      for item in mapping:
+        root = transform_element(item, root)
+    # fix links
+    root, updated_links = fix_internal_links(root, html_file, updated_links)
+    root = fix_external_links(root, toc_data)
+    # cleanup
+    root = merge_lists("ul", root)
+    root = merge_lists("ol", root)
+    root = wrap_list_items(root)
+    # combine multi-para notes into one container
+    root = merge_note_paras(root)
+    # add some extra items to help with the adoc conversion
+    root = prep_for_adoc(root)
+    # fix some heading levels
+    root = fix_heading_levels(root)
+    root = fix_duplicate_ids(root, html_file)
+    # cleanup
+    root = strip_attribute("data-processed", root)
+    # get the document title
+    title_text = get_document_title(root)
+    # get only the relevant content
+    contents = root.find(".//div[@class='contents']")
+    # prep and write the adoc
+    final_output = stringify(contents)
+    adoc = make_adoc(final_output, title_text, html_file)
+  except Exception as e:
+    exc_type, exc_obj, exc_tb = sys.exc_info()
+    print("ERROR: ", e, exc_tb.tb_lineno)
+  return adoc
+
 def handler(html_path, output_path, header_path, output_json):
   try:
     dir_path = os.path.dirname(os.path.realpath(__file__))
@@ -669,51 +754,9 @@ def handler(html_path, output_path, header_path, output_json):
     # process every html file
     updated_links = {}
     for html_file in html_files:
-      # create the full path
-      this_path = os.path.join(html_path, html_file)
       this_output_path = os.path.join(output_path, html_file)
-      # read the input root
-      with open(this_path) as h:
-        html_content = h.read()
-        html_content = re.sub('<\!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD XHTML 1\.0 Transitional\/\/EN" "https:\/\/www\.w3\.org\/TR\/xhtml1\/DTD\/xhtml1-transitional\.dtd">', '', html_content)
-        html_content = re.sub('rel="stylesheet">', 'rel="stylesheet"/>', html_content)
-        html_content = re.sub('&display=swap"', '"', html_content)
-        html_content = re.sub('<img src="logo-mobile\.svg" alt="Raspberry Pi">', '', html_content)
-        html_content = re.sub('<img src="logo\.svg" alt="Raspberry Pi">', '', html_content)
-        html_content = re.sub("<\!-- HTML header for doxygen \S*?-->", '', html_content)
-        html_content = re.sub(' xmlns="http://www.w3.org/1999/xhtml"', '', html_content)
-        root = etree.HTML(html_content)
-
-      # give everything an id
-      root = add_ids(root, html_file)
-      # loop over each json file
-      skip = ["table_memname.json"]
-      for mapping in complete_json_mappings:
-        for item in mapping:
-          root = transform_element(item, root)
-      # fix links
-      root, updated_links = fix_internal_links(root, html_file, updated_links)
-      root = fix_external_links(root, toc_data)
-      # cleanup
-      root = merge_lists("ul", root)
-      root = merge_lists("ol", root)
-      root = wrap_list_items(root)
-      # combine multi-para notes into one container
-      root = merge_note_paras(root)
-      # add some extra items to help with the adoc conversion
-      root = prep_for_adoc(root)
-      # fix some heading levels
-      root = fix_heading_levels(root)
-      root = fix_duplicate_ids(root, html_file)
-      # cleanup
-      root = strip_attribute("data-processed", root)
-      # get the document title
-      title_text = get_document_title(root)
-      # get only the relevant content
-      contents = root.find(".//div[@class='contents']")
-      # prep and write the adoc
-      final_output = stringify(contents)
-      adoc = make_adoc(final_output, title_text, html_file)
+      # parse the file
+      adoc = parse_indiviual_file(html_path, html_file, complete_json_mappings, updated_links, toc_data)
       # fix heading levels for non-included pages
       if html_file not in toc_list:
         adoc = decrease_heading_levels(adoc)
@@ -747,7 +790,7 @@ def handler(html_path, output_path, header_path, output_json):
       if 'filename' in h_json[item]:
         item_filename = h_json[item]['filename']
         for k,v in toc_data[item_filename].items():
-          group_adoc = walk_json(k,v,group_adoc)
+          group_adoc = walk_json(k,v,group_adoc,output_path)
       group_output_path = os.path.join(output_path, item + ".adoc")
       write_output(group_output_path, group_adoc)
     # write the json structure file as well