Merge pull request #16 from Oshan96/dev

Dev changes for v1.0.3
Oshan96 · Apr 7, 2020 · f2c3449 · f2c3449
2 parents 775654a + d40e859
commit f2c3449
Show file tree

Hide file tree

Showing 10 changed files with 290 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -12,14 +12,16 @@ If this project is helpful to you and love my work and feel like showing love/ap
 | Website                                       | Need recaptcha token? | Supported resolutions         | FFMPEG needed?    | File Size                     | Additional Notes                                                                                  |
 |---                                            |---                    |---                            |---                |---                            |---                                                                                                |        
 | [9Anime](https://9anime.to/)                  | Yes                   | Default only                  | No                | 500-600MB                     | Will always work, provided token                                                                  |
-| [4Anime](https://4anime.to/)                  | No                    | Default only                  | No                | Around 150MB                  | Upon failure, visit 4anime website and restart anime downloader                                   |
+| [4Anime](https://4anime.to/)                  | No                    | Default only                  | No                | Around 150MB                  | Upon failure, visit 4anime website and restart anime downloader. Fastest downloads                |
 | [AnimePahe](https://animepahe.com/)           | No                    | 720p, 1080p                   | No                | 720p: ~150MB, 1080p: ~200MB   | 2captcha API key is needed to download from AnimePahe. Also download speed is capped by host      |
+| [AnimeFreak](https://www.animefreak.tv/)      | No                    | Default only                  | No                | ~90-100MB                     | Downloading from AnimeFreak would be a bit slow at times                                          |
+| [GoGoAnime](https://gogoanime.io/)            | No                    | Mostly 360p, 480p             | Yes               | -                             | gogoanime.io and gogoanime.video are supported. gogoanime.pro support will be added in future     |
 | [AnimeUltima](https://www.animeultima.to/)    | No                    | 240p, 360p, 480p, 720p, 1080p | Yes               | 1080p is around 1GB           | AnimeUltima is having issues in their end. Will be supported again once they are backup           |
 
 ## Download Anime Downloader [Windows]
 > Note : Currently only windows executable is provided (Linux, Mac users go to [Build from source](#Building-from-source))
 
-Download the [Latest Relase](https://github.com/Oshan96/Anime-Downloader/releases) from here and extract the zip file
+Download the [Latest Release](https://github.com/Oshan96/Anime-Downloader/releases) from here and extract the zip file
 
 ## Downloading Your Favourite Anime
 

diff --git a/anime_downloader/extractors/jwplayer_extractor.py b/anime_downloader/extractors/jwplayer_extractor.py
@@ -7,8 +7,9 @@ class JWPlayerExtractor(BaseExtractor):
     def __init__(self, url, session):
         super().__init__(url, session)
 
-    def extract_sources(self):
-        page_content = self.extract_page_content()
+    def extract_sources(self, page_content=None):
+        if page_content is None:
+            page_content = self.extract_page_content()
 
         # print(page_content)
 

diff --git a/anime_downloader/gui/GUI.py b/anime_downloader/gui/GUI.py
@@ -11,6 +11,8 @@
 from scrapers.nineanime.nineanime_scraper import NineAnimeScraper
 from scrapers.animeultima.animeultima_scraper import AnimeUltimaScraper
 from scrapers.animepahe.animepahe_scraper import AnimePaheScraper
+from scrapers.gogoanime.gogoanime_scraper import GoGoAnimeScraper
+from scrapers.animefreak.animefreak_scraper import AnimeFreakScraper
 
 sg.theme('Dark Amber')
 i = 0
@@ -39,6 +41,18 @@ def download(anime_url, names_url, start_epi, end_epi, is_filler, is_titles, tok
             printer("INFO", "AnimeUltima URL detected...", gui)
             scraper = AnimeUltimaScraper(anime_url, start_epi, end_epi, session, gui, resolution, is_dub)
 
+        elif "gogoanime" in anime_url:
+            printer("INFO", "GoGoAnime URL detected...", gui)
+            if "gogoanime.pro" in anime_url:
+                printer("ERROR", "goganime.pro links are not supported yet try gogoanime.io or gogoanime.video", gui)
+                return
+
+            scraper = GoGoAnimeScraper(anime_url, start_epi, end_epi, session, gui, resolution)
+
+        elif "animefreak" in anime_url:
+            printer("INFO", "AnimeFreak URL detected...", gui)
+            scraper = AnimeFreakScraper(anime_url, start_epi, end_epi, session, gui, is_dub)
+
         elif "animepahe.com" in anime_url:
             printer("INFO", "AnimePahe URL detected...", gui)
             api_key = ""
@@ -133,7 +147,7 @@ def create_ui(self):
             [sg.ProgressBar(100, key="progress", orientation="h", size=(45, 15))]
         ]
 
-        self.window = sg.Window("Anime Downloader v1.0.1", layout)
+        self.window = sg.Window("Anime Downloader v1.0.3", layout)
 
     def check_messages(self, values):
         global i, max_val

diff --git a/anime_downloader/scrapers/animefreak/__init__.py b/anime_downloader/scrapers/animefreak/__init__.py
diff --git a/anime_downloader/scrapers/animefreak/animefreak_scraper.py b/anime_downloader/scrapers/animefreak/animefreak_scraper.py
@@ -0,0 +1,86 @@
+import re
+from bs4 import BeautifulSoup
+from util.Episode import Episode
+from scrapers.base_scraper import BaseScraper
+from util.Color import printer
+
+
+class AnimeFreakScraper(BaseScraper):
+    def __init__(self, url, start_episode, end_episode, session, gui=None, is_dub=False):
+        super().__init__(url, start_episode, end_episode, session, gui)
+        self.is_dub = is_dub
+
+    def __extract_source_links(self, page_content):
+        link_sources = [match.group(1) for match in
+                        re.finditer("file\s*:\s*[\"\']\s*([htps][^\"\']+)", page_content)]
+
+        return link_sources
+
+    def __set_download_link(self, episode):
+        response = ""
+        if not self.is_dub:
+            print("sub")
+            response = self.session.get(episode.page_url)
+            if response.status_code != 200:
+                print("checking subbed mirror")
+                response = self.session.get(episode.page_url + "/2")
+
+        else:
+            print("dub")
+            response = self.session.get(episode.page_url + "/3")
+            if response.status_code != 200:
+                print("checking dubbed mirror")
+                response = self.session.get(episode.page_url + "/4")
+
+        sources = self.__extract_source_links(response.text)
+        if len(sources) > 0:
+            for source in sources:
+                if ".mp4" in source:
+                    episode.download_url = source
+                    return True
+
+        return False
+
+    def __collect_episodes(self):
+        printer("INFO", "Extracting page URLs...", self.gui)
+        episodes = []
+        response = self.session.get(self.url)
+        if response.status_code == 200:
+            soup_html = BeautifulSoup(response.content, "html.parser")
+            epi_tags = soup_html.findAll("ul", attrs={"class": "check-list"})[1].findAll("a", href=True)
+
+            for epi_tag in epi_tags:
+                href = epi_tag["href"]
+                # print(href)
+                epi_no = int(href.split("-")[-1])
+                # print(epi_no)
+
+                if epi_no < self.start_episode or epi_no > self.end_episode:
+                    continue
+
+                episode = Episode("Episode - " + str(epi_no), "Episode - " + str(epi_no))
+                episode.page_url = href
+
+                try:
+                    res = self.__set_download_link(episode)
+                    if res:
+                        episodes.append(episode)
+                    else:
+                        printer("ERROR", "Failed to collect download link for "+episode.title, self.gui)
+
+                except Exception as ex:
+                    printer("ERROR", str(ex), self.gui)
+
+        return episodes
+
+    def get_direct_links(self):
+        try:
+            episodes = self.__collect_episodes()
+            if len(episodes) > 0:
+                return episodes
+            else:
+                return None
+
+        except Exception as ex:
+            printer("ERROR", str(ex), self.gui)
+            return None
diff --git a/anime_downloader/scrapers/fouranime/fouranime_scraper.py b/anime_downloader/scrapers/fouranime/fouranime_scraper.py
@@ -1,7 +1,9 @@
+import re
 from bs4 import BeautifulSoup
 from util.Episode import Episode
 from util import Color
 from scrapers.base_scraper import BaseScraper
+from util.js_unpacker import JsUnpacker
 
 
 class FourAnimeScraper(BaseScraper):
@@ -39,6 +41,10 @@ def __extract_page_urls(self):
 
         return self.episodes
 
+    def __get_packed(self, page):
+        pack_links = [match.group(0) for match in re.finditer("eval\(.*\)", page)]
+        return pack_links
+
     def __extract_download_urls(self):
         Color.printer("INFO", "Extracting download URLs...", self.gui)
         success = True
@@ -47,6 +53,8 @@ def __extract_download_urls(self):
 
             soup_html = BeautifulSoup(page, "html.parser")
 
+            # print(soup_html)
+
             video_tag = soup_html.find("video", attrs={"id": "video1"})
 
             # print(video_tag)
@@ -61,6 +69,32 @@ def __extract_download_urls(self):
                 video_tag = soup_html.find("video")
                 # print(video_tag)
 
+            if video_tag is None or video_tag["src"] == '':
+                print("checking for packed data")
+                packed_funcs = self.__get_packed(page.decode('utf-8'))
+                # print(packed_funcs)
+
+                if len(packed_funcs) > 0:
+                    src = JsUnpacker().extract_link(packed_funcs[0])
+                    if src is not None:
+                        episode.download_url = src
+                        success = True
+                    else:
+                        try:
+                            src = JsUnpacker().extract_link(packed_funcs[1])
+                            if src is not None:
+                                episode.download_url = src
+                                success = True
+                                continue
+                        except:
+                            Color.printer("ERROR", "Download link not found for " + episode.episode, self.gui)
+                            success = False
+                else:
+                    Color.printer("ERROR", "Download link not found for " + episode.episode, self.gui)
+                    success = False
+
+                continue
+
             if video_tag is None:
                 Color.printer("ERROR", "Download link not found for " + episode.episode, self.gui)
                 success = False

diff --git a/anime_downloader/scrapers/gogoanime/__init__.py b/anime_downloader/scrapers/gogoanime/__init__.py
diff --git a/anime_downloader/scrapers/gogoanime/gogoanime_scraper.py b/anime_downloader/scrapers/gogoanime/gogoanime_scraper.py
@@ -0,0 +1,110 @@
+import re
+from util.Episode import Episode
+from bs4 import BeautifulSoup
+from extractors.jwplayer_extractor import JWPlayerExtractor
+from scrapers.base_scraper import BaseScraper
+from util.Color import printer
+
+
+class GoGoAnimeScraper(BaseScraper):
+    def __init__(self, url, start_episode, end_episode, session, gui=None, resolution="480"):
+        super().__init__(url, start_episode, end_episode, session, gui)
+        self.resolution = resolution
+        self.extractor = JWPlayerExtractor(None, self.session)
+        self.anime_id = None
+        self.api_link_bases = ['https://ajax.gogocdn.net/ajax/load-list-episode',
+                               'https://ajax.apimovie.xyz/ajax/load-list-episode']
+
+        self.__set_anime_id()
+
+    def __set_anime_id(self):
+        response = self.session.get(self.url)
+        if response.status_code == 200:
+            soup_html = BeautifulSoup(response.content, "html.parser")
+            movie_id_tag = soup_html.find("input", attrs={"id": "movie_id"})
+            if movie_id_tag is not None:
+                self.anime_id = movie_id_tag["value"]
+
+    def __get_episode_data(self):
+        for base_link in self.api_link_bases:
+            api_link = base_link + "?ep_start=" + str(self.start_episode) + "&ep_end=" + str(
+                self.end_episode) + "&id=" + self.anime_id
+            response = self.session.get(api_link)
+            if response.status_code == 200:
+                return response.content
+
+        return None
+
+    def __get_page_url(self, href):
+        base_url = re.search("(.*)/category/", self.url).group(1)
+        # print(base_url)
+        src = base_url + href
+        # print(src)
+
+        return src
+
+    def __set_stream_url(self, episode):
+        response = self.session.get(episode.page_url)
+        if response.status_code == 200:
+            soup_html = BeautifulSoup(response.content, "html.parser")
+            item_tag = soup_html.find("li", attrs={"class": "anime"}).find("a")
+            streamer_url = item_tag["data-video"]
+            if "https" not in streamer_url:
+                streamer_url = "https:" + streamer_url
+
+            streamer_resp = self.session.get(streamer_url)
+            if streamer_resp.status_code == 200:
+                sources = self.extractor.extract_sources(streamer_resp.text)
+                src = ""
+                for source in sources:
+                    if "m3u8" in source:
+                        src = source
+                        break
+
+                if src != "":
+                    res_link_id = self.extractor.get_resolution_link(src, self.resolution)
+                    stream_base = re.search("(.*)/[\S]+\.m3u8", src).group(1)
+                    episode.download_url = stream_base + "/" + res_link_id
+                    print("stream url:", episode.download_url)
+
+                    return True
+
+        return False
+
+    def __collect_episodes(self):
+        printer("INFO", "Extracting page URLs...", self.gui)
+        episodes = []
+        if self.anime_id is not None:
+            data = self.__get_episode_data()
+            if data is not None:
+                soup_html = BeautifulSoup(data, "html.parser")
+                anchor_tags = soup_html.findAll("a", href=True)
+                for anchor in anchor_tags:
+                    href = anchor["href"].strip()
+                    epi_no = int(href.split("-")[-1])
+
+                    if epi_no < self.start_episode or epi_no > self.end_episode:
+                        continue
+
+                    episode = Episode("Episode - " + str(epi_no), "Episode - " + str(epi_no))
+                    episode.is_direct = False
+                    episode.page_url = self.__get_page_url(href)
+                    val = self.__set_stream_url(episode)
+                    if val:
+                        episodes.append(episode)
+                    else:
+                        printer("ERROR", "Failed to collect download link for " + episode.title, self.gui)
+
+        return episodes
+
+    def get_direct_links(self):
+        try:
+            episodes = self.__collect_episodes()
+            if len(episodes) > 0:
+                return episodes
+            else:
+                return None
+
+        except Exception as ex:
+            printer("ERROR", str(ex), self.gui)
+            return None
diff --git a/anime_downloader/util/js_unpacker.py b/anime_downloader/util/js_unpacker.py
@@ -0,0 +1,37 @@
+import jsbeautifier as js
+from bs4 import BeautifulSoup
+from extractors.jwplayer_extractor import JWPlayerExtractor
+
+
+class JsUnpacker:
+    def __init__(self):
+        self.jwp_extractor = JWPlayerExtractor(None, None)
+
+    def eval(self, func):
+        val = js.beautify(func)
+        return val
+
+    def extract_link(self, func):
+        src = ""
+        data = self.eval(func)
+        # print(data)
+        if "jwplayer" in data:
+            print("jwplayer source will be returned")
+            links = self.jwp_extractor.extract_sources(data)
+            if links is not None and len(links) > 0:
+                src = links[0]
+            else:
+                print("no sources found")
+                return None
+
+        else:
+            print("Any anchor href will be returned")
+            anch = BeautifulSoup(data, "html.parser").find("a")
+            if anch is not None:
+                src = anch['href'].replace('\"', '').replace('\'', '').replace('\\', '')
+            else:
+                print("No anchor links found")
+                return None
+
+        # print(src)
+        return src
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
+jsbeautifier
 requests==2.22.0
 art==4.5
 cloudscraper==1.2.33