kemayo · Jemeni11 · Jan 25, 2021 · Nov 4, 2022 · Nov 4, 2022 · Feb 22, 2023
diff --git a/.gitignore b/.gitignore
@@ -58,3 +58,6 @@ coverage.xml
 
 # Sphinx documentation
 docs/_build/
+
+# Pycharm
+.idea/
diff --git a/README.markdown b/README.markdown
@@ -49,6 +49,27 @@ Supports
  * Sta.sh
  * Completely arbitrary sites, with a bit more work (see below)
 
+Images support
+---
+
+Leech creates EPUB 2.01 files, which means that Leech can only save images in the following
+format:
+- JPEG (JPG/JFIF)
+- PNG
+- GIF
+
+See the [Open Publication Structure (OPS) 2.0.1](https://idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#TOC2.3.4) for more information.
+
+Leech can not save images in SVG because it is not supported by Pillow.
+
+Leech uses [Pillow](https://pillow.readthedocs.io/en/stable/index.html) for image manipulation and conversion. If you want to use a different
+image format, you can install the required dependencies for Pillow and you will probably have to tinker with Leech. See the [Pillow documentation](https://pillow.readthedocs.io/en/stable/installation.html#external-libraries) for more information.
+
+By default, Leech will try and save all non-animated images as JPEG.
+The only animated images that Leech will save are GIFs.
+
+To configure image support, you will need to create a file called `leech.json`. See the section below for more information.
+
 Configuration
 ---
 
@@ -61,6 +82,10 @@ Example:
     "logins": {
         "QuestionableQuesting": ["username", "password"]
     },
+    "images": true,
+    "image_format": "png",
+    "compress_images": true,
+    "max_image_size": 100000,
     "cover": {
         "fontname": "Comic Sans MS",
         "fontsize": 30,
@@ -76,6 +101,30 @@ Example:
     }
 }
 ```
+> Note: The `images` key is a boolean and can only be `true` or `false`. Booleans in JSON are written in lowercase.
+> If it is `false`, Leech will not download any images.
+> Leech will also ignore the `image_format` key if `images` is `false`.
+
+> Note: If the `image_format` key does not exist, Leech will default to `jpeg`.
+> The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive.
+
+> Note: The `compress_images` key tells Leech to compress images. This is only supported for `jpeg` and `png` images.
+> This also goes hand-in-hand with the `max_image_size` key. If the `compress_images` key is `true` but there's no `max_image_size` key,
+> Leech will compress the image to a size less than 1MB (1000000 bytes). If the `max_image_size` key is present, Leech will compress the image
+> to a size less than the value of the `max_image_size` key. The `max_image_size` key is in bytes.
+> If `compress_images` is `false`, Leech will ignore the `max_image_size` key.
+
+> Warning: Compressing images might make Leech take a lot longer to download images.
+
+> Warning: Compressing images might make the image quality worse.
+
+> Warning: `max_image_size` is not a hard limit. Leech will try to compress the image to the size of the `max_image_size` key, but Leech might
+> not be able to compress the image to the exact size of the `max_image_size` key.
+
+> Warning: `max_image_size` should not be too small. For instance, if you set `max_image_size` to 1000, Leech will probably not be able to
+> compress the image to 1000 bytes. If you set `max_image_size` to 1000000, Leech will probably be able to compress the image to 1000000 bytes.
+
+> Warning: Leech will not compress GIFs, that might damage the animation.
 
 Arbitrary Sites
 ---

diff --git a/ebook/__init__.py b/ebook/__init__.py
@@ -1,6 +1,8 @@
 from .epub import make_epub, EpubFile
-from .cover import make_cover
-from .cover import make_cover_from_url
+from .cover import make_cover, make_cover_from_url
+from .image import get_image_from_url
+from sites import Image
+from bs4 import BeautifulSoup
 
 import html
 import unicodedata
@@ -72,34 +74,91 @@ class CoverOptions:
     height = attr.ib(default=None, converter=attr.converters.optional(int))
     wrapat = attr.ib(default=None, converter=attr.converters.optional(int))
     bgcolor = attr.ib(default=None, converter=attr.converters.optional(tuple))
-    textcolor = attr.ib(default=None, converter=attr.converters.optional(tuple))
+    textcolor = attr.ib(
+        default=None, converter=attr.converters.optional(tuple))
     cover_url = attr.ib(default=None, converter=attr.converters.optional(str))
 
 
-def chapter_html(story, titleprefix=None, normalize=False):
+def chapter_html(
+    story,
+    image_bool=False,
+    image_format="JPEG",
+    compress_images=False,
+    max_image_size=1_000_000,
+    titleprefix=None,
+    normalize=False
+):
     chapters = []
     for i, chapter in enumerate(story):
         title = chapter.title or f'#{i}'
         if hasattr(chapter, '__iter__'):
             # This is a Section
-            chapters.extend(chapter_html(chapter, titleprefix=title, normalize=normalize))
+            chapters.extend(chapter_html(
+                chapter, titleprefix=title, normalize=normalize))
         else:
+            soup = BeautifulSoup(chapter.contents, 'html5lib')
+            if image_bool:
+                all_images = soup.find_all('img')
+                len_of_all_images = len(all_images)
+                print(f"Found {len_of_all_images} images in chapter {i}")
+
+                for count, img in enumerate(all_images):
+                    if not img.has_attr('src'):
+                        print(f"Image {count} has no src attribute, skipping...")
+                        continue
+                    print(f"[Chapter {i}] Image ({count+1} out of {len_of_all_images}). Source: ", end="")
+                    img_contents = get_image_from_url(img['src'], image_format, compress_images, max_image_size)
+                    chapter.images.append(Image(
+                        path=f"images/ch{i}_leechimage_{count}.{img_contents[1]}",
+                        contents=img_contents[0],
+                        content_type=img_contents[2]
+                    ))
+                    img['src'] = f"../images/ch{i}_leechimage_{count}.{img_contents[1]}"
+                    if not img.has_attr('alt'):
+                        img['alt'] = f"Image {count} from chapter {i}"
+                # Add all pictures on this chapter as well.
+                for image in chapter.images:
+                    # For/else syntax, check if the image path already exists, if it doesn't add the image.
+                    # Duplicates are not allowed in the format.
+                    for other_file in chapters:
+                        if other_file.path == image.path:
+                            break
+                    else:
+                        chapters.append(EpubFile(
+                            path=image.path, contents=image.contents, filetype=image.content_type))
+            else:
+                # Remove all images from the chapter so you don't get that annoying grey background.
+                for img in soup.find_all('img'):
+                    if img.parent.name.lower() == "figure":
+                        img.parent.decompose()
+                    else:
+                        img.decompose()
+
             title = titleprefix and f'{titleprefix}: {title}' or title
-            contents = chapter.contents
+            contents = str(soup)
             if normalize:
                 title = unicodedata.normalize('NFKC', title)
                 contents = unicodedata.normalize('NFKC', contents)
             chapters.append(EpubFile(
                 title=title,
                 path=f'{story.id}/chapter{i + 1}.html',
-                contents=html_template.format(title=html.escape(title), text=contents)
+                contents=html_template.format(
+                    title=html.escape(title), text=contents)
             ))
     if story.footnotes:
-        chapters.append(EpubFile(title="Footnotes", path=f'{story.id}/footnotes.html', contents=html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
+        chapters.append(EpubFile(title="Footnotes", path=f'{story.id}/footnotes.html', contents=html_template.format(
+            title="Footnotes", text='\n\n'.join(story.footnotes))))
     return chapters
 
 
-def generate_epub(story, cover_options={}, output_filename=None, output_dir=None, normalize=False):
+def generate_epub(story, cover_options={}, image_options=None,  output_filename=None, output_dir=None, normalize=False):
+    if image_options is None:
+        image_options = {
+            'image_bool': False,
+            'image_format': 'JPEG',
+            'compress_images': False,
+            'max_image_size': 1_000_000
+        }
     dates = list(story.dates())
     metadata = {
         'title': story.title,
@@ -117,14 +176,19 @@ def generate_epub(story, cover_options={}, output_filename=None, output_dir=None
         extra_metadata['Tags'] = ', '.join(story.tags)
 
     if extra_metadata:
-        metadata['extra'] = '\n        '.join(f'<dt>{k}</dt><dd>{v}</dd>' for k, v in extra_metadata.items())
+        metadata['extra'] = '\n        '.join(
+            f'<dt>{k}</dt><dd>{v}</dd>' for k, v in extra_metadata.items())
 
-    valid_cover_options = ('fontname', 'fontsize', 'width', 'height', 'wrapat', 'bgcolor', 'textcolor', 'cover_url')
-    cover_options = CoverOptions(**{k: v for k, v in cover_options.items() if k in valid_cover_options})
-    cover_options = attr.asdict(cover_options, filter=lambda k, v: v is not None, retain_collection_types=True)
+    valid_cover_options = ('fontname', 'fontsize', 'width',
+                           'height', 'wrapat', 'bgcolor', 'textcolor', 'cover_url')
+    cover_options = CoverOptions(
+        **{k: v for k, v in cover_options.items() if k in valid_cover_options})
+    cover_options = attr.asdict(
+        cover_options, filter=lambda k, v: v is not None, retain_collection_types=True)
 
     if cover_options and "cover_url" in cover_options:
-        image = make_cover_from_url(cover_options["cover_url"], story.title, story.author)
+        image = make_cover_from_url(
+            cover_options["cover_url"], story.title, story.author)
     elif story.cover_url:
         image = make_cover_from_url(story.cover_url, story.title, story.author)
     else:
@@ -135,10 +199,24 @@ def generate_epub(story, cover_options={}, output_filename=None, output_dir=None
         [
             # The cover is static, and the only change comes from the image which we generate
             EpubFile(title='Cover', path='cover.html', contents=cover_template),
-            EpubFile(title='Front Matter', path='frontmatter.html', contents=frontmatter_template.format(now=datetime.datetime.now(), **metadata)),
-            *chapter_html(story, normalize=normalize),
-            EpubFile(path='Styles/base.css', contents=requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, filetype='text/css'),
-            EpubFile(path='images/cover.png', contents=image.read(), filetype='image/png'),
+            EpubFile(title='Front Matter', path='frontmatter.html', contents=frontmatter_template.format(
+                now=datetime.datetime.now(), **metadata)),
+            *chapter_html(
+                story,
+                image_bool=image_options.get('image_bool'),
+                image_format=image_options.get('image_format'),
+                compress_images=image_options.get('compress_images'),
+                max_image_size=image_options.get('max_image_size'),
+                normalize=normalize
+            ),
+            EpubFile(
+                path='Styles/base.css',
+                contents=requests.Session().get(
+                    'https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text,
+                filetype='text/css'
+            ),
+            EpubFile(path='images/cover.png',
+                     contents=image.read(), filetype='image/png'),
         ],
         metadata,
         output_dir=output_dir