From 5eaf5c06a6aab67e0d8cb9e34db1466ae64df3d5 Mon Sep 17 00:00:00 2001 From: delfanbaum Date: Wed, 6 Sep 2023 15:58:32 -0400 Subject: [PATCH] Allow styles on SVG elements This appears to have been the issue in a previous book; embedded styles are often used in SVGs, so we need to allow those through processing. --- README.md | 5 +++++ jupyter_book_to_htmlbook/text_processing.py | 13 +++++++++++- pyproject.toml | 2 +- tests/test_text_processing.py | 22 +++++++++++++++++++++ 4 files changed, 40 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8fecfa7..99b73e3 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,11 @@ Options: ## Release Notes +### 1.1.2 + +Bug fix: +- Allow "style" attributes to remain inside SVGs + ### 1.1.1 Bug fix: diff --git a/jupyter_book_to_htmlbook/text_processing.py b/jupyter_book_to_htmlbook/text_processing.py index 69c6585..fd18e0b 100644 --- a/jupyter_book_to_htmlbook/text_processing.py +++ b/jupyter_book_to_htmlbook/text_processing.py @@ -18,7 +18,18 @@ def clean_chapter(chapter, rm_numbering=True): for attr in remove_attrs: for tag in chapter.find_all(attrs={attr: True}): - del tag[attr] + # we need to allow styles on svg elements + in_svg = False + if attr == "style": + + if tag.name == "svg": + in_svg = True + + for parent in tag.parents: + if parent.name == "svg": + in_svg = True + if not in_svg: + del tag[attr] # (optionally) remove numbering if rm_numbering: diff --git a/pyproject.toml b/pyproject.toml index 68f55b3..e461100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "jupyter-book-to-htmlbook" -version = "1.1.1" +version = "1.1.2" description = "A script to convert jupyter book html files to htmlbook for consumption in Atlas" authors = ["delfanbaum"] diff --git a/tests/test_text_processing.py b/tests/test_text_processing.py index 95376fe..fa959a5 100644 --- a/tests/test_text_processing.py +++ b/tests/test_text_processing.py @@ -206,3 +206,25 @@ def test_hidden_output_is_removed(): clean_chapter(chapter_text, False) assert not chapter_text.find("details") assert not chapter_text.find("div", class_="output") + + +def test_svg_retains_attrs(): + """ + This is to get around styles applied to SVGs, which seems like + standard practice, for better or worse. + """ + svg_ch = BeautifulSoup(""" +
+
+ + + + + + + + +
""", "html.parser") + clean_chapter(svg_ch, False) + assert "stroke" in svg_ch.find("line").get('style') # type:ignore + assert "blue" in svg_ch.find("svg").get('style') # type:ignore