diff --git a/README.md b/README.md index e037fd7..8dcbf2e 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,29 @@ # Anime Downloader [![Total Downloads](https://img.shields.io/github/downloads/Oshan96/Anime-Downloader/total.svg?style=for-the-badge)](https://github.com/Oshan96/Anime-Downloader/releases) -There are two scripts (Anime_Downloader.py, Anime_Scraper.py) to download given anime to a directory and to extract direct download links. -Anime_Scraper.py scraper is used to collect and extract direct anime download links from 9anime.to (From its Mp4Upload server) +You can now bulk download your favourite anime episodes for various websites, in various resolutions, with or without filler episodes + +[See supported websites](#Supported-Websites) ## Donations If this project is helpful to you and love my work and feel like showing love/appreciation, would you like to buy me a coffee? Buy Me A Coffee +## Supported Websites +| Website | Need recaptcha token? | Supported resolutions | FFMPEG needed? | +|--- |--- |--- |--- | +| [9Anime](https://9anime.to/) | Yes | Default only | No | +| [4Anime](https://4anime.to/) | No | Default only | No | +| [AnimePahe](https://animepahe.com/) | No | 720p, 1080p | No | +| [AnimeUltima](https://www.animeultima.to/) | No | 240p, 360p, 480p, 720p, 1080p | Yes | + ## Download Anime Downloader [Windows] > Note : Currently only windows executable is provided (Linux, Mac users go to [Build from source](#Building-from-source)) Download the [Latest Relase](https://github.com/Oshan96/Anime-Downloader/releases) from here and extract the zip file -## Downloading +## Downloading Your Favourite Anime -First of all, Anime Downloader uses [2captcha](https://www.2captcha.com) to bypass google recaptcha, so you need to purchase one +First of all for websites which require capatcha token, Anime Downloader uses [2captcha](https://www.2captcha.com) to bypass google recaptcha, so you need to purchase one ([Check whether your anime website needs captcha token](#Supported-Websites)) Open settings.json and set [2captcha](https://2captcha.com/) API key in "api_key" @@ -24,13 +33,25 @@ Open settings.json and set [2captcha](https://2captcha.com/) API key in "api_key *Don't have 2captcha API key? Don't worry! You can still use this to download anime. Check the "FAQ" section on [how to download if you don't have a 2captcha API key](#Q---I-don't-have-a-2captcha-API-key,-is-there-any-workaround-for-that?)* +##### And in order to download from some websites (like animeultima.to) Anime Downloader requires you to have [FFMPEG](https://www.ffmpeg.org/) to be downloaded ([Check whether your anime website needs FFMPEG](#Supported-Websites)) + +- You can download FFMPEG from [here](https://www.ffmpeg.org/download.html) +- And then add the ffmpeg executable to system path +Or, in your linux environment, +```bash +sudo apt install ffmpeg +``` + +#### Download failed and weird error came? Don't worry, it's because these websites are protected by various security measures. Simply, just visit the website manually, and restart the anime downloader! + +#### Still not able to download? Go ahead and post your issue [here](https://github.com/Oshan96/Anime-Downloader/issues). And I will look into the error and give necessary fixes! + +## Running the application Navigate to the extracted folder and open a cmd or powershell window from that folder and execute "anime-dl.exe" from command line. ## How to download using GUI version (v0.1.1-alpha upwards) It is same as the CLI version, but provided a graphical user interface to collect necessary parameters. -> Note : The GUI version is still in development and this is a pre-release. The code and execution methods will probably change in future - Execute the "anime-dl.exe" to start. If you're running from source files, execute the "anime-dl.py" script @@ -43,7 +64,7 @@ And the GUI will appear as following : ![GUI](docs/images/gui.png) -#### Note : If you don't have a 2captcha API key, you need to [provide "Recaptcha Token" in the given text field](#Q---I-don't-have-a-2captcha-API-key,-is-there-any-workaround-for-that?) (check FAQ section) +#### Note : If you don't have a 2captcha API key, you need to [provide "Recaptcha Token" in the given text field](#Q---I-don't-have-a-2captcha-API-key,-is-there-any-workaround-for-that?) for websites require captcha token (check FAQ section) ## How to download using anime-dl (CLI)? diff --git a/anime_downloader/Anime_Downloader.py b/anime_downloader/Anime_Downloader.py index ece94d2..3b7df9c 100644 --- a/anime_downloader/Anime_Downloader.py +++ b/anime_downloader/Anime_Downloader.py @@ -1,5 +1,3 @@ -from scrapers.nineanime import Anime_Scraper -from util import Color import warnings import ssl import argparse @@ -8,160 +6,183 @@ import os import sys from platform import system - from threading import Thread from queue import Queue from art import text2art +from util import Color +from util.ffmpeg_downloader import FFMPEGDownloader +from scrapers.nineanime import Anime_Scraper directory = "" threads = 1 - token = None - titles = False - args = None - gui = None -class Worker(Thread) : - def __init__(self, tasks) : + +class Worker(Thread): + def __init__(self, tasks, gui=None): Thread.__init__(self) self.tasks = tasks + self.gui = gui self.daemon = True self.start() - - def run(self) : - global gui - while True : + + def run(self): + while True: func, arg, kargs = self.tasks.get() - try : + try: func(*arg, **kargs) - except Exception as ex : - # print(ex) - Color.printer("ERROR", ex, gui) - finally : + except Exception as ex: + Color.printer("ERROR", ex, self.gui) + finally: self.tasks.task_done() -class ThreadPool : - def __init__(self, num_threads) : + +class ThreadPool: + def __init__(self, num_threads, gui=None): self.tasks = Queue(num_threads) - for _ in range(num_threads) : - Worker(self.tasks) - - def add_task(self, func, *arg, **kargs) : + for _ in range(num_threads): + Worker(self.tasks, gui) + + def add_task(self, func, *arg, **kargs): self.tasks.put((func, arg, kargs)) - - def map(self, func, args_list) : - for arg in args_list : + + def map(self, func, args_list): + for arg in args_list: self.add_task(func, arg) - - def wait_completion(self) : + + def wait_completion(self): self.tasks.join() -def clean_file_name(file_name) : - for c in r'[]/\;,><&*:%=+@#^()|?^': - file_name = file_name.replace(c,'') - - return file_name +class Downloader: + def __init__(self, directory, episodes, threads=1, gui=None, is_titles=False): + self.directory = directory + self.threads = threads + self.episodes = episodes + self.is_titles = is_titles + self.gui = gui + + def __clean_file_name(self, file_name): + for c in r'[]/\;,><&*:%=+@#^()|?^': + file_name = file_name.replace(c, '') + + return file_name + + def __download_episode(self, episode): + if episode.is_direct: + if episode.download_url is None: + Color.printer("ERROR", "Download URL is not set for " + episode.episode + ", skipping...", self.gui) + return -def download_episode(episode) : - global titles, gui + Color.printer("INFO", "Downloading " + episode.episode + "...", self.gui) - Color.printer("INFO", "Downloading " + episode.episode + "...", gui) + if system() == "Windows": + episode.title = self.__clean_file_name(episode.title) - if system() == "Windows" : - episode.title = clean_file_name(episode.title) + # print(self.is_titles) + # print(episode.title) - if titles : - file_name = directory + episode.episode + " - " + episode.title + ".mp4" - else : - file_name = directory+episode.episode+".mp4" + if self.is_titles: + # print("with title") + file_name = self.directory + episode.episode + " - " + episode.title + ".mp4" + else: + # print("without title") + file_name = self.directory + episode.episode + ".mp4" - with requests.get(episode.download_url, stream=True, verify=False) as r: - with open(file_name, 'wb') as f: - shutil.copyfileobj(r.raw, f, length=16*1024*1024) + with requests.get(episode.download_url, stream=True, verify=False) as r: + with open(file_name, 'wb') as f: + shutil.copyfileobj(r.raw, f, length=16 * 1024 * 1024) - Color.printer("INFO", episode.episode + " finished downloading...", gui) + Color.printer("INFO", episode.episode + " finished downloading...", self.gui) + else: + Color.printer("INFO", "HLS link found. Using FFMPEG to download...", self.gui) + FFMPEGDownloader(episode, self.directory, self.gui).download() -def download() : - global directory, threads, gui + def download(self): - try: - _create_unverified_https_context = ssl._create_unverified_context - except AttributeError: - # Legacy Python that doesn't verify HTTPS certificates by default - pass - else: - # Handle target environment that doesn't support HTTPS verification - ssl._create_default_https_context = _create_unverified_https_context + try: + _create_unverified_https_context = ssl._create_unverified_context + except AttributeError: + # Legacy Python that doesn't verify HTTPS certificates by default + pass + else: + # Handle target environment that doesn't support HTTPS verification + ssl._create_default_https_context = _create_unverified_https_context - Color.printer("INFO", "Downloading started...", gui) + Color.printer("INFO", "Downloading started...", self.gui) - # for episode in Anime_Scraper.episodes : - # print("Downloading", episode.episode) - # urllib.request.urlretrieve(episode.download_url, directory+episode.episode+".mp4") - - pool = ThreadPool(threads) + pool = ThreadPool(self.threads, gui) - pool.map(download_episode, Anime_Scraper.episodes) - pool.wait_completion() + pool.map(self.__download_episode, self.episodes) + pool.wait_completion() - Color.printer("INFO", "Downloading finished!", gui) + Color.printer("INFO", "Downloading finished!", self.gui) -def print_banner() : +def print_banner(): banner = text2art("Anime Downloader") Color.printer("BANNER", banner) -def main() : +def main(): global directory, args, threads, titles, token print_banner() parser = argparse.ArgumentParser(description="Anime Downloader Command Line Tool") - argparse.ArgumentParser(description="Help option parcer for Anime Downloader Command Line Tool", add_help=False, formatter_class=argparse.HelpFormatter) + argparse.ArgumentParser(description="Help option parcer for Anime Downloader Command Line Tool", add_help=False, + formatter_class=argparse.HelpFormatter) parser.add_argument("-u", "--url", required=True, help="9Anime.to URL for the anime to be downloaded", dest="url") - parser.add_argument("-n", "--names", required=True, help="https://www.animefillerlist.com/ URL to retrieve episode titles", dest="title_url") - parser.add_argument("-d", "--directory", required=False, help="Download destination. Will use the current directory if not provided", default="" , dest="dir") - parser.add_argument("-s", "--start", required=False, help="Starting episode",default=1, type=int , dest="start") - parser.add_argument("-e", "--end", required=False, help="End episode", default=9999, type=int ,dest="end") - parser.add_argument("-c", "--code", required=False, help="Recaptcha answer token code. Insert this if you don't have 2captcha captcha bypass api_key", default=None, dest="token") - parser.add_argument("-t", "--threads", required=False, help="Number of parrallel downloads. Will download sequencially if not provided", default=1, type=int ,dest="threads") - parser.add_argument("-f", "--filler", required=False, help="Whether fillers needed", default=True, type=bool ,dest="isFiller") + parser.add_argument("-n", "--names", required=True, + help="https://www.animefillerlist.com/ URL to retrieve episode titles", dest="title_url") + parser.add_argument("-d", "--directory", required=False, + help="Download destination. Will use the current directory if not provided", default="", + dest="dir") + parser.add_argument("-s", "--start", required=False, help="Starting episode", default=1, type=int, dest="start") + parser.add_argument("-e", "--end", required=False, help="End episode", default=9999, type=int, dest="end") + parser.add_argument("-c", "--code", required=False, + help="Recaptcha answer token code. Insert this if you don't have 2captcha captcha bypass api_key", + default=None, dest="token") + parser.add_argument("-t", "--threads", required=False, + help="Number of parrallel downloads. Will download sequencially if not provided", default=1, + type=int, dest="threads") + parser.add_argument("-f", "--filler", required=False, help="Whether fillers needed", default=True, type=bool, + dest="isFiller") args = parser.parse_args() Anime_Scraper.download_9anime_url = args.url Anime_Scraper.title_url = args.title_url Anime_Scraper.isFiller = args.isFiller - # Anime_Scraper.ts_no = args.ts_no + token = args.token directory = args.dir threads = args.threads - if args.title_url : + if args.title_url: titles = True - if directory != "" : + if directory != "": directory = directory.replace("\\", "/") - if not directory.endswith("/") : - directory+="/" - + if not directory.endswith("/"): + directory += "/" + Anime_Scraper.main(args.start, args.end, token) - download() + Downloader(directory, Anime_Scraper.episodes, threads, gui, titles).download() + if __name__ == "__main__": - #suppress warnings + # suppress warnings warnings.filterwarnings("ignore") - - #activate color codes - if sys.platform.lower() == "win32" : - os.system("color") - + + # activate color codes + if sys.platform.lower() == "win32": + os.system("color") + main() diff --git a/anime_downloader/anime-dl.py b/anime_downloader/anime-dl.py index bf8bc53..7823953 100644 --- a/anime_downloader/anime-dl.py +++ b/anime_downloader/anime-dl.py @@ -1,9 +1,13 @@ -import Anime_Downloader -from scrapers.nineanime import Anime_Scraper +import os +import sys import warnings from queue import Queue -from gui.GUI import Anime_GUI +from gui.GUI import AnimeGUI -if __name__ == "__main__" : +if __name__ == "__main__": warnings.filterwarnings("ignore") - Anime_GUI(Queue(), Anime_Downloader, Anime_Scraper).run() \ No newline at end of file + # activate color codes + if sys.platform.lower() == "win32": + os.system("color") + + AnimeGUI(Queue()).run() diff --git a/anime_downloader/extractors/__init__.py b/anime_downloader/extractors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/anime_downloader/extractors/base_extractor.py b/anime_downloader/extractors/base_extractor.py new file mode 100644 index 0000000..24ce3c0 --- /dev/null +++ b/anime_downloader/extractors/base_extractor.py @@ -0,0 +1,15 @@ +class BaseExtractor: + + def __init__(self, url, session): + self.url = url + self.session = session + + def extract_page_content(self): + video_page = self.session.get(self.url).content + return video_page.decode('utf-8') + + def extract_direct_url(self): + raise NotImplementedError + + def direct_link(self): + return self.extract_direct_url() \ No newline at end of file diff --git a/anime_downloader/extractors/jwplayer_extractor.py b/anime_downloader/extractors/jwplayer_extractor.py new file mode 100644 index 0000000..a354d88 --- /dev/null +++ b/anime_downloader/extractors/jwplayer_extractor.py @@ -0,0 +1,63 @@ +import re +from extractors.base_extractor import BaseExtractor + + +class JWPlayerExtractor(BaseExtractor): + + def __init__(self, url, session): + super().__init__(url, session) + + def extract_sources(self): + page_content = self.extract_page_content() + + link_sources = [match.group(1) for match in + re.finditer("{\s*file\s*:\s*[\"\']\s*([htps][^\"\']+)", page_content)] + + return link_sources + + def extract_direct_url(self): + direct_links = self.extract_sources() + + if len(direct_links) > 0: + # return the first direct link + return direct_links[0] + else: + return None + + # if the given resolution is not found, the first available link would be given + def get_resolution_link(self, master_url, resolution): + count = 0 + content = self.session.get(master_url).text + data_list = content.split("\n") + + link = None + + for index, data in enumerate(data_list): + res = re.search("RESOLUTION=(.*)x(.*)", data) + if res is not None: + # print(res.group(1), res.group(2).split(",")[0]) + k = res.group(2).split(",")[0] + if k == resolution: + # print(k) + # print(data_list[index+1]) + return data_list[index+1] # next one will be the link + + if count == 0: + # print("First :", k) + link = data_list[index+1] # save the first result + + count += 1 + + return link + + def extract_stream_link(self, resolution="720"): + link = self.extract_direct_url() + + print("Master Link : " + link) + + if "master.m3u8" in link: + link = self.get_resolution_link(link, resolution) + print("Index Link : " + link) + + return link + diff --git a/anime_downloader/extractors/mp4upload_extractor.py b/anime_downloader/extractors/mp4upload_extractor.py new file mode 100644 index 0000000..a5ffadd --- /dev/null +++ b/anime_downloader/extractors/mp4upload_extractor.py @@ -0,0 +1,21 @@ +import re +from extractors.base_extractor import BaseExtractor + + +class Mp4UploadExtractor(BaseExtractor): + + def __init__(self, url, session): + super().__init__(url, session) + + def extract_direct_url(self): + page_content = self.extract_page_content() + + www_base = re.search("false\|(.*)\|devicePixelRatio", page_content).group(1) + id_port = re.search("video\|(.*)\|(.*)\|src", page_content) + url_id = id_port.group(1) + port = id_port.group(2) + + direct_url = "https://{}.mp4upload.com:{}/d/{}/video.mp4".format(www_base, port, url_id) + + return direct_url + diff --git a/anime_downloader/gui/GUI.py b/anime_downloader/gui/GUI.py index 0bc2aa8..6f9d9e7 100644 --- a/anime_downloader/gui/GUI.py +++ b/anime_downloader/gui/GUI.py @@ -1,115 +1,185 @@ import queue +import cloudscraper import PySimpleGUI as sg from threading import Thread +from time import sleep +from Anime_Downloader import Downloader +from util.Color import printer +from util.name_collector import EpisodeNamesCollector +from scrapers.fouranime.fouranime_scraper import FourAnimeScraper +from scrapers.nineanime.nineanime_scraper import NineAnimeScraper +from scrapers.animeultima.animeultima_scraper import AnimeUltimaScraper +from scrapers.animepahe.animepahe_scraper import AnimePaheScraper sg.theme('Dark Amber') -downloader = None -scraper = None +i = 0 +max_val = 100 -def execute(downloader, scraper, start_epi, end_epi) : - scraper.main(start_epi, end_epi, downloader.token) - downloader.download() -class Anime_GUI() : +def download(anime_url, names_url, start_epi, end_epi, is_filler, is_titles, token, threads, directory, gui, resolution="720", is_dub=False): + global max_val - def __init__(self, gui_queue, downloader, scraper) : + session = cloudscraper.create_scraper() + scraper = None + episodes = [] + + anime_url = anime_url.lower() + + try: + if "9anime.to" in anime_url: + printer("INFO", "9Anime URL detected...", gui) + scraper = NineAnimeScraper(anime_url, start_epi, end_epi, session, gui, token) + + elif "4anime.to" in anime_url: + printer("INFO", "4Anime URL detected...", gui) + scraper = FourAnimeScraper(anime_url, start_epi, end_epi, session, gui) + + elif "animeultima.to" in anime_url: + printer("INFO", "AnimeUltima URL detected...", gui) + scraper = AnimeUltimaScraper(anime_url, start_epi, end_epi, session, gui, resolution, is_dub) + + elif "animepahe.com" in anime_url: + printer("INFO", "AnimePahe URL detected...", gui) + scraper = AnimePaheScraper(anime_url, start_epi, end_epi, session, gui, resolution, is_filler) + + else: + printer("ERROR", "Incorrect URL provided!", gui) + return + + printer("INFO", "Collecting download links...", gui) + episodes = scraper.get_direct_links() + + if episodes is None: + printer("INFO", "Retrying to collect download links...", gui) + sleep(5) + episodes = scraper.get_direct_links() + + if episodes: + if is_titles: + printer("INFO", "Setting episode titles...", gui) + episodes = EpisodeNamesCollector(names_url, start_epi, end_epi, is_filler, episodes).collect_episode_names() + + else: + printer("ERROR", "Failed to retrieve download links!", gui) + return + + max_val = len(episodes) + # print("is titles", is_titles) + downloader = Downloader(directory, episodes, threads, gui, is_titles) + downloader.download() + + except Exception as ex: + printer("ERROR", ex, gui) + printer("ERROR", "Something went wrong! Please close and restart Anime Downloader to retry!", gui) + + +class AnimeGUI: + + def __init__(self, gui_queue): self.gui_queue = gui_queue - self.downloader = downloader - self.scraper = scraper self.window = None - def create_ui(self) : + def create_ui(self): layout = [ - - [sg.Text("General Details",size=(15,1)),sg.Text("_"*60, pad=(0,15))], - [sg.Text("Anime URL (9anime.to)", text_color="white", size=(25,1)), sg.InputText(key="anime_url")], - [sg.Text("Animefillerlist URL", text_color="white", size=(25,1)), sg.InputText(key="names_url")], - [sg.Text("Save To", size=(25,1), text_color="white"), sg.InputText(key="location"), sg.FolderBrowse()], - - [sg.Text("Episodes Details",size=(15,1)),sg.Text("_"*60, pad=(0,15))], - [sg.Text("From", text_color="white"), sg.InputText(key="start_epi", size=(5,1)), sg.Text("To", text_color="white"), sg.InputText(key="end_epi", size=(5,1)), sg.Text("Download Filler Episodes?", text_color="white"), sg.Combo(["Yes", "No"], size=(4,1), default_value="Yes", key="isFiller"), sg.Text("Threads", text_color="white"), sg.Spin([i for i in range(1,21)],initial_value=1, size=(3,1), key="threads")], + + [sg.Text("General Details", size=(15, 1)), sg.Text("_" * 60, pad=(0, 15))], + [sg.Text("Anime URL", text_color="white", size=(25, 1)), sg.InputText(key="anime_url")], + [sg.Text("Animefillerlist URL", text_color="white", size=(25, 1)), sg.InputText(key="names_url")], + [sg.Text("Save To", size=(25, 1), text_color="white"), sg.InputText(key="location"), sg.FolderBrowse()], + + [sg.Text("Episodes Details", size=(15, 1)), sg.Text("_" * 60, pad=(0, 15))], + [sg.Text("From", text_color="white"), sg.InputText(key="start_epi", size=(5, 1)), + sg.Text("To", text_color="white"), sg.InputText(key="end_epi", size=(5, 1)), + sg.Text("Download Fillers?", text_color="white"), + sg.Combo(["Yes", "No"], size=(4, 1), default_value="Yes", key="isFiller"), + sg.Text("Threads", text_color="white"), + sg.Spin([i for i in range(1, 21)], initial_value=1, size=(3, 1), key="threads"), + sg.Text("Resolution", text_color="white"), + sg.Combo(["240", "360", "480", "720", "1080"], size=(4, 1), default_value="1080", key="resolution")], [], - [sg.Text("Optional Settings (Fill this if you don't have 2captcha key)",size=(45,1)),sg.Text("_"*25, pad=(0,15))], - [sg.Text("Recaptcha Token (Optional)", text_color="white", size=(25,1)), sg.Multiline(size=(45, 4), key="token")], - [sg.Column([[sg.Button("Download", size=(10,1))]], justification="right", pad=(35,5))], + [sg.Text("Optional Settings (Fill this if you don't have 2captcha key)", size=(45, 1)), + sg.Text("_" * 25, pad=(0, 15))], + [sg.Text("Recaptcha Token (Optional)", text_color="white", size=(25, 1)), + sg.Multiline(size=(45, 4), key="token")], + [sg.Column([[sg.Button("Download", size=(10, 1))]], justification="right", pad=(35, 5))], [], [sg.Text("Messages")], [sg.Multiline(size=(None, 8), key="txt_msg", disabled=True)], - [] + [], + [sg.Text("Progress"), sg.Text("_" * 74, pad=(0, 15))], + [sg.ProgressBar(100, key="progress", orientation="h", size=(45, 15))] ] - self.window = sg.Window("Anime Downloader v0.1.1-alpha", layout) + self.window = sg.Window("Anime Downloader v1.0.0", layout) - def check_messages(self, values) : + def check_messages(self, values): + global i, max_val txt = values["txt_msg"].strip() - while True : - try: # see if something has been posted to Queue + while True: + try: # see if something has been posted to Queue message = self.gui_queue.get_nowait() - except queue.Empty: # get_nowait() will get exception when Queue is empty - break # break from the loop if no more messages are queued up + except queue.Empty: # get_nowait() will get exception when Queue is empty + break # break from the loop if no more messages are queued up # if message received from queue, display the message in the Window if message: txt += "\n" + message + + if "finished downloading..." in message or "failed to download!" in message: + i+=1 + self.window["progress"].UpdateBar(i, max=max_val) + self.window['txt_msg'].update(txt) # do a refresh because could be showing multiple messages before next Read self.window.refresh() # print(message) - def run(self) : + def run(self): + global i, max_val self.create_ui() - while True : + + while True: # wait for up to 100 ms for a GUI event event, values = self.window.read(timeout=100) if event in (None, 'Exit'): break - if event == "Download" : - self.scraper.download_9anime_url = values["anime_url"] - self.scraper.title_url = values["names_url"] - - if values["names_url"] != "" : - self.downloader.titles = True + # self.window["progress"].UpdateBar(i+1, max=100) + # i+=1 - if values["isFiller"] == "Yes": - self.scraper.isFiller = True - else : - self.scraper.isFiller = False + if event == "Download": + anime_url = values["anime_url"] + names_url = values["names_url"] + is_titles = True if names_url != "" else False + is_filler = True if values["isFiller"] == "Yes" else False tok = values["token"].rstrip() - - if tok != "": - self.downloader.token = tok + token = tok if tok != "" else None directory = values["location"] + threads = values["threads"] + start_epi = int(values["start_epi"]) if values["start_epi"] != "" else 1 + end_epi = int(values["end_epi"]) if values["end_epi"] != "" else 9999 + resolution = str(values["resolution"]) - if directory != "" : - directory = directory.replace("\\", "/") - if not directory.endswith("/") : - directory+="/" + max_val = (end_epi - start_epi) + 1 + self.window["progress"].UpdateBar(i, max=max_val) - self.downloader.directory = directory - self.downloader.threads = values["threads"] + if anime_url == "": + self.window['txt_msg'].update("[ERROR!] : Provide Anime URL!") + continue - self.scraper.gui = self - self.downloader.gui = self + if directory != "": + directory = directory.replace("\\", "/") + if not directory.endswith("/"): + directory += "/" - # self.window["txt_msg"].update("[INFO] : Download started!") + self.window["txt_msg"].update("") self.window.refresh() - start_epi = 1 - end_epi=9999 - - if values["start_epi"] != "": - start_epi = int(values["start_epi"]) - - if values["end_epi"] != "": - end_epi = int(values["end_epi"]) - - thread = Thread(target=execute, args=(self.downloader, self.scraper, start_epi, end_epi), daemon=True) + thread = Thread(target=download, args=(anime_url, names_url, start_epi, end_epi, is_filler, is_titles, token, threads, directory, self, resolution), daemon=True) thread.start() - + self.check_messages(values) self.window.close() - diff --git a/anime_downloader/scrapers/__init__.py b/anime_downloader/scrapers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/anime_downloader/scrapers/animepahe/__init__.py b/anime_downloader/scrapers/animepahe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/anime_downloader/scrapers/animepahe/animepahe_scraper.py b/anime_downloader/scrapers/animepahe/animepahe_scraper.py new file mode 100644 index 0000000..b3bf18c --- /dev/null +++ b/anime_downloader/scrapers/animepahe/animepahe_scraper.py @@ -0,0 +1,242 @@ +import re +import json +from bs4 import BeautifulSoup +from time import sleep +from util.Episode import Episode +from scrapers.base_scraper import BaseScraper +from util.Color import printer + + +class AnimePaheScraper(BaseScraper): + def __init__(self, url, start_episode, end_episode, session, gui=None, resolution="720", is_filler=True): + super().__init__(url, start_episode, end_episode, session, gui) + self.resolution = resolution + self.is_filler = is_filler + self.id = None + self.base_url = "https://animepahe.com" + self.start_page = 1 + self.end_page = 1 + + self.__set_anime_id() + self.__set_start_end_page() + + def __set_anime_id(self): + page = self.session.get(self.url).text + self.id = re.search("release&id=(.*)&l=", page).group(1) + + def __set_start_end_page(self): + self.start_page = int(self.start_episode / 30) + 1 + self.end_page = int(self.end_episode / 30) + 1 + + def __get_page_data(self, page_url): + return self.session.get(page_url).json() + + def __collect_episodes(self): + printer("INFO", "Collecting episodes...", self.gui) + + page_count = self.start_page + while page_count <= self.end_page: + api_url = "https://animepahe.com/api?m=release&id=" + self.id + "&sort=episode_asc&page=" + str(page_count) + api_data = self.__get_page_data(api_url)["data"] + + for data in api_data: + epi_no = data["episode"] + if epi_no < self.start_episode or epi_no > self.end_episode: + continue + + is_canon = data["filler"] == 0 + + # AnimePahe is not having valid fillers list (always 0). Added for the completion + if not self.is_filler and not is_canon: + print("Episode", str(epi_no), "is filler.. skipping...") + continue + + episode = Episode("Episode - " + str(epi_no), "Episode - " + str(epi_no)) + episode.id = data["session"] + self.episodes.append(episode) + + page_count += 1 + + def __set_kwik_links(self): + printer("INFO", "Collecting kwik links...", self.gui) + + api_url = "https://animepahe.com/api?m=embed&p=kwik&id=" + for episode in self.episodes: + temp_url = api_url + self.id + "&session=" + episode.id + # print(temp_url) + api_data = self.__get_page_data(temp_url)["data"] + + links = list(api_data.keys()) + + # 720p + link = api_data[links[0]]["720"]["url"] + id = link.split("/")[-1] + + try: + # 1080p + if self.resolution == "1080": + link = api_data[links[0]]["1080"]["url"] + id = link.split("/")[-1] + except Exception as ex: + printer("ERROR", "1080p not available!", self.gui) + printer("INFO", "Continuing with 720p link...", self.gui) + + episode.id = id + page_url = "https://kwik.cx/f/" + id + episode.page_url = page_url + + if not self.__set_direct_link(episode): # try setting at retrieval + printer("INFO", "Second download link retrieval attempt", self.gui) + if not self.__set_direct_link(episode): + printer("INFO", "Third download link retrieval attempt", self.gui) + if not self.__set_direct_link(episode): + printer("ERROR", "Failed all attempts to retrieve download link for " + episode.title, self.gui) + + def __get_cookie_and_response(self, episode): + printer("INFO", "Collecting request header values...", self.gui) + + head = { + "referer": episode.page_url, + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36 Edg/80.0.361.69" + } + response = self.session.get(episode.page_url, headers=head) + cookie = [] + try: + cookie.append(response.headers["set-cookie"]) + cookie.append(response) + except Exception as ex: + printer("ERROR", ex, self.gui) + return None + + return cookie + + def __get_token(self, response): + printer("INFO", "Collecting access token...", self.gui) + page = response.text + # print(page) + try: + token = re.search("value\|(.*)\|([a-zA-Z])", page).group(1).split("|")[0] + # print("TOKEN :", token) + return token + except Exception as ex: + printer("ERROR", ex, self.gui) + # print(page) + return None + + def __set_direct_link(self, episode): + cookie = self.__get_cookie_and_response(episode) + if cookie is None: + printer("INFO", "Retrying header retrieval...", self.gui) + sleep(2) + cookie = self.__get_cookie_and_response(episode) + + if cookie is None: + printer("ERROR", "Couldn't find headers needed ...", self.gui) + return False + + token = self.__get_token(cookie[1]) + + if not token: + printer("ERROR", "No token found... skipping", self.gui) + return False + + head = { + "origin": "https://kwik.cx", + "referer": episode.page_url, + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36 Edg/80.0.361.69", + "cookie": cookie[0] + } + + payload = { + "_token": token + } + + post_url = "https://kwik.cx/d/" + episode.id + + # print(head) + + resp_headers = self.session.post(post_url, data=payload, headers=head, allow_redirects=False).headers + try: + episode.download_url = resp_headers["location"] + except Exception as ex: + # print(resp_headers) + # printer("ERROR", ex, self.gui) + printer("ERROR", "Failed to retrieve direct url for " + episode.title, self.gui) + return False + + return True + + # instead of calling this function in the end, direct links are retrieved for each episode one by one + def __extract_direct_links(self): + printer("INFO", "Collecting download links...", self.gui) + + for episode in self.episodes: + cookie = self.__get_cookie_and_response(episode) + if cookie is None: + printer("INFO", "Retrying ...", self.gui) + sleep(2) + cookie = self.__get_cookie_and_response(episode) + + if cookie is None: + printer("ERROR", "Skipping ...", self.gui) + continue + + token = self.__get_token(cookie[1]) + + if not token: + printer("ERROR", "No token found... skipping", self.gui) + continue + + head = { + "origin": "https://kwik.cx", + "referer": episode.page_url, + "sec-fetch-dest": "document", + "sec-fetch-mode": "navigate", + "sec-fetch-site": "same-origin", + "sec-fetch-user": "?1", + "upgrade-insecure-requests": "1", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36 Edg/80.0.361.69", + "cookie": cookie[0] + } + + payload = { + "_token": token + } + + post_url = "https://kwik.cx/d/" + episode.id + + print(head) + + resp_headers = self.session.post(post_url, data=payload, headers=head, allow_redirects=False).headers + try: + episode.download_url = resp_headers["location"] + except Exception as ex: + print(resp_headers) + printer("ERROR", ex, self.gui) + printer("ERROR", "Failed to retrieve direct url for " + episode.title, self.gui) + continue + + def get_direct_links(self): + try: + self.__collect_episodes() + self.__set_kwik_links() + # sleep(3) + # self.__extract_direct_links() + + return self.episodes + except Exception as ex: + printer("ERROR", ex, self.gui) + return None + + +# if __name__ == "__main__": +# import cloudscraper +# +# session = cloudscraper.create_scraper() +# scr = AnimePaheScraper("https://animepahe.com/anime/one-piece", 217, 223, session) +# eps = scr.get_direct_links() +# +# for ep in eps: +# print(ep.title) +# print(ep.download_url) +# print("=============================\n") diff --git a/anime_downloader/scrapers/animeultima/animeultima_scraper.py b/anime_downloader/scrapers/animeultima/animeultima_scraper.py index 9f22684..47d8a95 100644 --- a/anime_downloader/scrapers/animeultima/animeultima_scraper.py +++ b/anime_downloader/scrapers/animeultima/animeultima_scraper.py @@ -1,92 +1,139 @@ -import cloudscraper from bs4 import BeautifulSoup +from scrapers.base_scraper import BaseScraper +from util.Episode import Episode +from extractors.jwplayer_extractor import JWPlayerExtractor -def get_anime_id(url, session) : - page = session.get(url).content - soup_html = BeautifulSoup(page, "html.parser") +class AnimeUltimaScraper(BaseScraper): - button_with_id = soup_html.find("button", attrs={"class" : "button"}) + def __init__(self, url, start_episode, end_episode, session, gui=None, resolution="720", is_dub=False): + super().__init__(url, start_episode, end_episode, session, gui) + self.is_dub = False + self.resolution = resolution + self.base_url = "https://www1.animeultima.to" + self.extractor = JWPlayerExtractor(None, self.session) - if button_with_id : - return button_with_id["data-id"] + def get_anime_id(self): + page = self.session.get(self.url).content + soup_html = BeautifulSoup(page, "html.parser") - else : - meta_tag = soup_html.find("meta", attrs={"property" : "og:image"}) - if meta_tag : - content_data = meta_tag["content"].split("/") - return content_data[-2] + # print(soup_html) + button_with_id = soup_html.find("button", attrs={"class": "button"}) -def get_start_and_end_page(session, anime_id, start_epi=1, end_epi=50) : - start_page = 0 - end_page = 0 + if button_with_id: + return button_with_id["data-id"] - data = session.get("https://www1.animeultima.to/api/episodeList?animeId="+anime_id).json() + else: + meta_tag = soup_html.find("meta", attrs={"property": "og:image"}) + if meta_tag: + content_data = meta_tag["content"].split("/") + return content_data[-2] - last_page = data["last_page"] - max_total_epis = last_page * 50 + def get_start_and_end_page(self, anime_id): + # print("start end page") + start_page = 0 + end_page = 0 - if start_epi == 0 : - start_epi = 1 + data = self.session.get("https://www1.animeultima.to/api/episodeList?animeId=" + anime_id).json() - if end_epi == 0 : - end_epi = 1 + last_page = data["last_page"] + max_total_epis = last_page * 50 - if (max_total_epis - end_epi) % 50 == 0 : - start_page = round((max_total_epis - end_epi) / 50) - 1 - else : - start_page = round((max_total_epis - end_epi) / 50) + if self.end_episode >= max_total_epis: + start_page = 0 + elif (max_total_epis - self.end_episode) % 50 == 0: + start_page = round((max_total_epis - self.end_episode) / 50) - 1 + else: + start_page = round((max_total_epis - self.end_episode) / 50) - if (max_total_epis - start_epi) % 50 == 0 : - end_page = round((max_total_epis - start_epi) / 50) - 1 - else : - end_page = round((max_total_epis - start_epi) / 50) + if (max_total_epis - self.start_episode) % 50 == 0: + end_page = round((max_total_epis - self.start_episode) / 50) - 1 + else: + end_page = round((max_total_epis - self.start_episode) / 50) - return (start_page, end_page) + return start_page, end_page -def collect_episodes(anime_id, start_epi, end_epi, start_page, end_page, session, isDub=False) : - episodes = [] - base_url = "https://www1.animeultima.to/api/episodeList?animeId=" + anime_id + "&page=" - page_counter = start_page + def get_page_url(self, url): + # print("get page url") + page = self.session.get(url).content - while(page_counter <= end_page) : - url = base_url+str(page_counter) + soup_html = BeautifulSoup(page, "html.parser") + iframe = soup_html.find("iframe") - data = session.get(url).json() - has_dub = data["anime"]["hasDub"] - epis = data["episodes"] + if iframe: + return self.base_url + iframe["src"] - for epi in epis : - epi_no = int(epi["episode_num"]) + return None - if epi_no < start_epi or epi_no > end_epi: - continue + def collect_episodes(self, anime_id, start_page, end_page): + # print("collect epis") + base_url = "https://www1.animeultima.to/api/episodeList?animeId=" + anime_id + "&page=" + page_counter = start_page - title = epi["title"] + while page_counter <= end_page: + url = base_url + str(page_counter) + data = self.session.get(url).json() + has_dub = data["anime"]["hasDub"] + epis = data["episodes"] + for epi in epis: + epi_no = int(epi["episode_num"]) + if epi_no < self.start_episode or epi_no > self.end_episode: + continue -#This will get called initially -#Not fully implemented yet -def extract_episodes(url,names_url,start_epi=1, end_epi=50, isFiller=True, isDub=False, gui=None) : - session = cloudscraper.create_scraper() - anime_id = get_anime_id(url, session) - start_page, end_page = get_start_and_end_page(session, anime_id, start_epi, end_epi) + title = epi["title"] + page_url = None + if not self.is_dub: + page_url = epi["urls"]["sub"] + elif has_dub: + page_url = epi["urls"]["dub"] + else: + print("Dubbed episodes not available") + if page_url: + page_url = self.get_page_url(page_url) - # episodes_list = extract_episode_names(names_url, isFiller, start_epi, end_epi, gui) + episode = Episode(title, "Episode - " + str(epi_no)) + episode.page_url = page_url + episode.is_direct = False + self.set_stream_url(episode) + self.episodes.append(episode) -if __name__ == "__main__": - session = cloudscraper.create_scraper() - # id = get_anime_id("https://www1.animeultima.to/a/naruto-shippuuden_395410", session) - # print(id) - # print(get_start_and_end_page(session, id, 20, 450)) + print("Episode -", str(epi_no), "-", title) - # print(get_anime_id("https://www1.animeultima.to/a/naruto-shippuuden_395410", session)) - # print(get_start_and_end_page(1,513)) - print(session.get("https://www1.animeultima.to/faststream/2336").text) + page_counter += 1 + def set_stream_url(self, episode): + # print("set stream") + self.extractor.url = episode.page_url + stream_url = self.extractor.extract_stream_link(self.resolution) + print("Stream URL : " + stream_url) + episode.download_url = stream_url + + def set_stream_urls(self): + extractor = JWPlayerExtractor(None, self.session) + for episode in self.episodes: + extractor.url = episode.page_url + stream_url = extractor.extract_stream_link(self.resolution) + episode.dowload_url = stream_url + + def get_direct_links(self): + # print("direct links") + anime_id = self.get_anime_id() + start_page, end_page = self.get_start_and_end_page(anime_id) + + # print(anime_id) + # print(start_page, end_page) + + try: + self.collect_episodes(anime_id, start_page, end_page) + + return self.episodes + except Exception as ex: + print(ex) + return None diff --git a/anime_downloader/scrapers/base_scraper.py b/anime_downloader/scrapers/base_scraper.py new file mode 100644 index 0000000..c050d29 --- /dev/null +++ b/anime_downloader/scrapers/base_scraper.py @@ -0,0 +1,12 @@ +class BaseScraper: + def __init__(self, url, start_episode, end_episode, session, gui=None): + self.url = url + self.start_episode = start_episode + self.end_episode = end_episode + self.session = session + self.gui = gui + + self.episodes = [] + + def get_direct_links(self): + raise NotImplementedError diff --git a/anime_downloader/scrapers/fouranime/__init__.py b/anime_downloader/scrapers/fouranime/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/anime_downloader/scrapers/fouranime/fouranime_scraper.py b/anime_downloader/scrapers/fouranime/fouranime_scraper.py new file mode 100644 index 0000000..f70572b --- /dev/null +++ b/anime_downloader/scrapers/fouranime/fouranime_scraper.py @@ -0,0 +1,84 @@ +from bs4 import BeautifulSoup +from util.Episode import Episode +from util import Color +from scrapers.base_scraper import BaseScraper + + +class FourAnimeScraper(BaseScraper): + def __init__(self, url, start_episode, end_episode, session, gui=None): + super().__init__(url, start_episode, end_episode, session, gui) + + def __extract_page_urls(self): + Color.printer("INFO", "Extracting page URLs...", self.gui) + + page = self.session.get(self.url).content + + soup_html = BeautifulSoup(page, "html.parser") + + try: + server = soup_html.findAll("div", attrs={"class": "server"})[0] + epi_ranges = server.findAll("ul", attrs={"class": "episodes"}) + + for epi_range in epi_ranges: + epi_tags = epi_range.findAll("a", href=True) + + for epi_tag in epi_tags: + epi_number = int(epi_tag.text) + + if epi_number < self.start_episode or epi_number > self.end_episode: + continue + + episode = Episode(str(epi_number), "Episode - " + str(epi_number)) + episode.page_url = epi_tag["href"] + + self.episodes.append(episode) + + except Exception as ex : + print(ex) + return None + + return self.episodes + + def __extract_download_urls(self): + Color.printer("INFO", "Extracting download URLs...", self.gui) + success = True + for episode in self.episodes: + page = self.session.get(episode.page_url).content + + soup_html = BeautifulSoup(page, "html.parser") + + video_tag = soup_html.find("video", attrs={"id": "video1"}) + + # print(video_tag) + + if video_tag is None: + # print("checking div") + video_tag = soup_html.find("div", attrs={"id": "video1"}) + # print(video_tag) + + if video_tag is None: + # print("checking video") + video_tag = soup_html.find("video") + # print(video_tag) + + if video_tag is None: + Color.printer("ERROR", "Download link not found for " + episode.episode, self.gui) + success = False + continue + # print("----------------------------") + try: + episode.download_url = video_tag["src"] + success = True + except KeyError: + # print(soup_html) + Color.printer("ERROR", "Failed to retrieve download link not found for " + episode.episode, self.gui) + continue + + return success + + def get_direct_links(self): + if self.__extract_page_urls(): + if self.__extract_download_urls(): + return self.episodes + + return None diff --git a/anime_downloader/scrapers/nineanime/Anime_Scraper.py b/anime_downloader/scrapers/nineanime/Anime_Scraper.py index 509aefd..911198a 100644 --- a/anime_downloader/scrapers/nineanime/Anime_Scraper.py +++ b/anime_downloader/scrapers/nineanime/Anime_Scraper.py @@ -1,14 +1,13 @@ -import re -import requests +import cloudscraper import json import sys import os -# import browser_cookie3 as bc from util import Color from bs4 import BeautifulSoup from time import sleep from util.Episode import Episode from util.Episode import extract_episode_names +from extractors.mp4upload_extractor import Mp4UploadExtractor title_url = None isFiller = False @@ -28,25 +27,27 @@ cookies = None gui = None -session = requests.Session() +session = cloudscraper.create_scraper() episodes = [] -def get_token(url) : - global session, site_key, api_key, gui - s = requests.Session() +def get_token(url): + global session, site_key, api_key, gui try: - captcha_id = s.post("http://2captcha.com/in.php?key={}&method=userrecaptcha&googlekey={}&pageurl={}&invisible=1" - .format(api_key, site_key, url)).text.split('|')[1] + captcha_id = \ + session.post("http://2captcha.com/in.php?key={}&method=userrecaptcha&googlekey={}&pageurl={}&invisible=1" + .format(api_key, site_key, url)).text.split('|')[1] + + recaptcha_answer = session.get( + "http://2captcha.com/res.php?key={}&action=get&id={}".format(api_key, captcha_id)).text - recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(api_key, captcha_id)).text - while 'CAPCHA_NOT_READY' in recaptcha_answer: sleep(5) - recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(api_key, captcha_id)).text - + recaptcha_answer = session.get( + "http://2captcha.com/res.php?key={}&action=get&id={}".format(api_key, captcha_id)).text + recaptcha_answer = recaptcha_answer.split('|')[1] # print("[Recaptcha answer] : {",recaptcha_answer,"}") @@ -57,36 +58,37 @@ def get_token(url) : return None -def get_mp4upload_index(servers_container) : - +def get_mp4upload_index(servers_container): global server_name, server_id - server_names = servers_container.findAll("span", attrs={"class" : "tab"}) + server_names = servers_container.findAll("span", attrs={"class": "tab"}) - for i in range(0, len(server_names)) : - if server_names[i].text.lower() == server_name.lower() : + for i in range(0, len(server_names)): + if server_names[i].text.lower() == server_name.lower(): server_id = server_names[i]["data-name"] return i - + return None -def verify(token) : + +def verify(token): global session payload = { - "g-recaptcha-response" : token + "g-recaptcha-response": token } session.post("https://9anime.to/waf-verify", data=payload) -def extract_page_urls(start_episode, end_episode, token) : + +def extract_page_urls(start_episode, end_episode, token): global session, episodes, nine_anime_url, download_9anime_url, ts_no, episodes, api_key, cookies, gui - session.headers.update ({ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' + session.headers.update({ + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' }) - if token is None : - if api_key is None : + if token is None: + if api_key is None: Color.printer("ERROR", "No API Key Provided!", gui) sys.exit(0) @@ -94,15 +96,15 @@ def extract_page_urls(start_episode, end_episode, token) : Color.printer("INFO", "Solving recaptcha...", gui) token = get_token("https://9anime.to/waf-verify") - if not token : + if not token: Color.printer("ERROR", "Captcha solving failed!", gui) - Color.printer("INFO", "Trying to continue using browser cookies...", gui) + Color.printer("INFO", "Trying to continue ...", gui) # sys.exit(0) if token: verify(token) - else : - Color.printer("INFO", "Collecting browser cookies...", gui) + else: + Color.printer("INFO", "No API key or token given, trying to continue...", gui) # cookies = bc.load() #collect all browser cookies # session.cookies = cookies #set browser cookies for requests @@ -113,36 +115,36 @@ def extract_page_urls(start_episode, end_episode, token) : ts_no = soup_html.find("html")["data-ts"] - eps_url = episodes_url+"?ts="+ts_no + eps_url = episodes_url + "?ts=" + ts_no epi_data = session.get(eps_url).json()["html"] soup = BeautifulSoup(epi_data, "html.parser") - servers_container = soup.find("span", attrs={"class" : "tabs"}) + servers_container = soup.find("span", attrs={"class": "tabs"}) mp4upload_index = get_mp4upload_index(servers_container) - if mp4upload_index is None : + if mp4upload_index is None: return None - mp4upload_server = soup.findAll("div", attrs={"class" : "server"})[mp4upload_index] + mp4upload_server = soup.findAll("div", attrs={"class": "server"})[mp4upload_index] - episode_ranges = mp4upload_server.findAll("ul", attrs={"class" : "episodes"}) + episode_ranges = mp4upload_server.findAll("ul", attrs={"class": "episodes"}) - for episode_range in episode_ranges : + for episode_range in episode_ranges: eps = episode_range.findAll("a", href=True) - for episode in eps : + for episode in eps: epi_number = int(episode.text) - if epi_number < start_episode or epi_number > end_episode : + if epi_number < start_episode or epi_number > end_episode: continue # epi = get_episode(epi_number) # if epi == None : # continue - epi = Episode(str(epi_number), "Episode - "+str(epi_number)) + epi = Episode(str(epi_number), "Episode - " + str(epi_number)) epi.page_url = nine_anime_url + episode["href"] epi.id = episode["data-id"] @@ -151,105 +153,109 @@ def extract_page_urls(start_episode, end_episode, token) : return episodes -def extract_download_urls() : + +def extract_download_urls(): global session, gui down_base = "https://9anime.to/ajax/episode/info?" Color.printer("INFO", "Extracting download URLs...", gui) - for episode in episodes : - if(episode.id is None) : + for episode in episodes: + if (episode.id is None): episode.download_url = None continue - url = down_base + "ts="+ts_no+"&id="+episode.id+"&server="+server_id + url = down_base + "ts=" + ts_no + "&id=" + episode.id + "&server=" + server_id target = session.get(url).json()["target"] episode.page_url = target - video_page = session.get(target).content - - string = video_page.decode("utf-8") - - www_base = re.search("false\|(.*)\|devicePixelRatio",string).group(1) - url_id = re.search("video\|(.*)\|282", string).group(1) + download_url = Mp4UploadExtractor(target, session).extract_direct_url() - download_url = "https://"+www_base+".mp4upload.com:282/d/"+url_id+"/video.mp4" + # video_page = session.get(target).content + # + # string = video_page.decode("utf-8") + # + # www_base = re.search("false\|(.*)\|devicePixelRatio",string).group(1) + # url_id = re.search("video\|(.*)\|282", string).group(1) + # + # download_url = "https://"+www_base+".mp4upload.com:282/d/"+url_id+"/video.mp4" episode.download_url = download_url -def get_epi(eps, num) : - for epi in eps : - if epi.episode == num : + +def get_epi(eps, num): + for epi in eps: + if epi.episode == num: return epi - + return None -def set_titles(start_episode, end_episode) : + +def set_titles(start_episode, end_episode): global episodes, title_url, isFiller - if not title_url : + if not title_url: return - + eps = extract_episode_names(title_url, isFiller, start_episode, end_episode) - for episode in episodes : + for episode in episodes: epi = get_epi(eps, episode.episode) - if epi : + if epi: episode.title = epi.title eps.remove(epi) - -def writeData() : + +def write_data(): global episodes, gui Color.printer("INFO", "Writing results to results.csv file...", gui) data_file = open("results.csv", "w") - for episode in episodes : - data_file.write(episode.episode+","+episode.download_url+"\n") - + for episode in episodes: + data_file.write(episode.episode + "," + episode.download_url + "\n") + data_file.close() -def main(start_episode=-1, end_episode=-1, token = None) : +def main(start_episode=-1, end_episode=-1, token=None): global episodes, download_9anime_url, episodes_url, api_key, gui start_episode = int(start_episode) end_episode = int(end_episode) - if not token : + if not token: with open("settings.json") as (json_file): data = json.load(json_file) api_key = data["api_key"] - if not download_9anime_url : + if not download_9anime_url: download_9anime_url = input("Anime URL : ") - - if start_episode == -1 : + + if start_episode == -1: start_episode = int(input("Enter Start Episode : ")) - - if end_episode == -1 : + + if end_episode == -1: end_episode = int(input("Enter End Episode : ")) episodes_url = episodes_url + download_9anime_url.split(".")[2].split("/")[0] episodes = extract_page_urls(start_episode, end_episode, token) - if episodes == None : + if episodes is None: return - - if title_url : + + if title_url: set_titles(start_episode, end_episode) - else : + else: Color.printer("INFO", "animefiller.com URL not provided to collect episode names...", gui) Color.printer("INFO", "Skipping collecting episode names...", gui) extract_download_urls() - writeData() + write_data() -if __name__ == "__main__" : - if sys.platform.lower() == "win32" : +if __name__ == "__main__": + if sys.platform.lower() == "win32": os.system("color") main() - diff --git a/anime_downloader/scrapers/nineanime/nineanime_scraper.py b/anime_downloader/scrapers/nineanime/nineanime_scraper.py new file mode 100644 index 0000000..a8b259d --- /dev/null +++ b/anime_downloader/scrapers/nineanime/nineanime_scraper.py @@ -0,0 +1,136 @@ +import json +from bs4 import BeautifulSoup +from util import Color +from util.Episode import Episode +from util.captcha_solver import TwoCaptchaSolver +from scrapers.base_scraper import BaseScraper +from extractors.mp4upload_extractor import Mp4UploadExtractor + + +class NineAnimeScraper(BaseScraper): + + def __init__(self, url, start_episode, end_episode, session, gui=None, token=None): + super().__init__(url, start_episode, end_episode, session, gui) + self.token = token + self.api_key = None + self.ts_no = None + self.server_id = None + self.site_key = "6LfEtpwUAAAAABoJ_595sf-Hh0psstoatwZpLex1" + self.server_name = "Mp4upload" + self.nine_anime_url = "https://9anime.to" + + self.episodes_url = "https://9anime.to/ajax/film/servers/" + url.split(".")[2].split("/")[0] + + if not token: + try: + with open("settings.json") as (json_file): + data = json.load(json_file) + self.api_key = data["api_key"] + except: + Color.printer("ERROR", "Reading settings file failed! Continue without API key...", self.gui) + self.api_key = "" + + def __get_mp4upload_index(self, servers_container): + server_names = servers_container.findAll("span", attrs={"class": "tab"}) + + for i in range(0, len(server_names)): + if server_names[i].text.lower() == self.server_name.lower(): + self.server_id = server_names[i]["data-name"] + return i + + return None + + def __verify(self): + payload = { + "g-recaptcha-response": self.token + } + + self.session.post("https://9anime.to/waf-verify", data=payload) + + def __extract_page_urls(self): + if self.token is None : + if self.api_key != "" and self.api_key != "insert_2captcha_api_key": + Color.printer("INFO", "Solving recaptcha...", self.gui) + + captcha_solver = TwoCaptchaSolver("https://9anime.to/waf-verify", self.site_key, self.api_key, self.session) + + self.token = captcha_solver.solve() + if not self.token: + Color.printer("ERROR", "Captcha solving failed!", self.gui) + Color.printer("INFO", "Trying to continue ...", self.gui) + + if self.token: + self.__verify() + else: + Color.printer("INFO", "No API key or token given, trying to continue...", self.gui) + + Color.printer("INFO", "Extracting page URLs...", self.gui) + + anime_page = self.session.get(self.url).content + soup_html = BeautifulSoup(anime_page, "html.parser") + + try : + self.ts_no = soup_html.find("html")["data-ts"] + + eps_url = self.episodes_url + "?ts=" + self.ts_no + + epi_data = self.session.get(eps_url).json()["html"] + + soup = BeautifulSoup(epi_data, "html.parser") + + servers_container = soup.find("span", attrs={"class": "tabs"}) + + mp4upload_index = self.__get_mp4upload_index(servers_container) + + if mp4upload_index is None: + return None + + mp4upload_server = soup.findAll("div", attrs={"class": "server"})[mp4upload_index] + + episode_ranges = mp4upload_server.findAll("ul", attrs={"class": "episodes"}) + + for episode_range in episode_ranges: + eps = episode_range.findAll("a", href=True) + for episode in eps: + epi_number = int(episode.text) + + if epi_number < self.start_episode or epi_number > self.end_episode: + continue + + epi = Episode(str(epi_number), "Episode - " + str(epi_number)) + + epi.page_url = self.nine_anime_url + episode["href"] + epi.id = episode["data-id"] + + self.episodes.append(epi) + except Exception as ex: + Color.printer("ERROR", ex, self.gui) + return None + + return self.episodes + + def __extract_download_urls(self): + down_base = "https://9anime.to/ajax/episode/info?" + Color.printer("INFO", "Extracting download URLs...", self.gui) + + for episode in self.episodes: + if (episode.id is None): + episode.download_url = None + continue + + url = down_base + "ts=" + self.ts_no + "&id=" + episode.id + "&server=" + self.server_id + target = self.session.get(url).json()["target"] + + episode.page_url = target + + download_url = Mp4UploadExtractor(target, self.session).extract_direct_url() + + episode.download_url = download_url + + def get_direct_links(self): + if self.__extract_page_urls(): + self.__extract_download_urls() + + return self.episodes + + return None diff --git a/anime_downloader/util/Color.py b/anime_downloader/util/Color.py index 030329a..51fb192 100644 --- a/anime_downloader/util/Color.py +++ b/anime_downloader/util/Color.py @@ -1,4 +1,4 @@ -class Color(): +class Color: BLACK = lambda x: '\u001b[30m' + str(x) RED = lambda x: '\u001b[91m' + str(x) GREEN = lambda x: '\u001b[92m' + str(x) @@ -10,13 +10,14 @@ class Color(): UNDERLINE = lambda x: '\u001b[4m' + str(x) RESET = lambda x: '\u001b[0m' + str(x) -def printer(msg_type, msg, gui=None) : - if gui : - gui.gui_queue.put("["+msg_type+"] : "+msg) - if msg_type == "INFO" : - print(Color.YELLOW("[INFO!] : "+msg) + Color.RESET(" ")) - elif msg_type == "ERROR" : - print(Color.RED("[ERROR!] : "+str(msg)) + Color.RESET(" ")) - elif msg_type == "BANNER" : +def printer(msg_type, msg, gui=None): + if gui: + gui.gui_queue.put("[" + msg_type + "] : " + str(msg)) + + if msg_type == "INFO": + print(Color.YELLOW("[INFO!] : " + msg) + Color.RESET(" ")) + elif msg_type == "ERROR": + print(Color.RED("[ERROR!] : " + str(msg)) + Color.RESET(" ")) + elif msg_type == "BANNER": print(Color.CYAN(msg) + Color.RESET(" ")) diff --git a/anime_downloader/util/Episode.py b/anime_downloader/util/Episode.py index ababe16..ce44fd1 100644 --- a/anime_downloader/util/Episode.py +++ b/anime_downloader/util/Episode.py @@ -2,16 +2,18 @@ from util.Color import printer from bs4 import BeautifulSoup -class Episode : + +class Episode: def __init__(self, title, episode): self.title = title self.episode = episode self.id = None self.page_url = None self.download_url = None + self.is_direct = True -def extract_episode_names(url, isFiller, start_epi, end_epi, gui=None) : +def extract_episode_names(url, is_filler, start_epi, end_epi, gui=None): printer("INFO", "Collecting episode names...", gui) episodes = [] @@ -20,28 +22,27 @@ def extract_episode_names(url, isFiller, start_epi, end_epi, gui=None) : page = session.get(url).content soup_html = BeautifulSoup(page, "html.parser") - table = soup_html.find("table", attrs={"class" : "EpisodeList"}).find("tbody") + table = soup_html.find("table", attrs={"class": "EpisodeList"}).find("tbody") - if isFiller : + if is_filler: epis = table.findAll("tr") - else : - epis = table.findAll("tr", attrs={"class" : ["anime_canon", "mixed_canon/filler", "manga_canon"]}) - - for epi in epis : - epi_no = int(epi.find("td", attrs={"class" : "Number"}).text) + else: + epis = table.findAll("tr", attrs={"class": ["anime_canon", "mixed_canon/filler", "manga_canon"]}) + + for epi in epis: + epi_no = int(epi.find("td", attrs={"class": "Number"}).text) - if epi_no < start_epi : + if epi_no < start_epi: continue - if epi_no > end_epi : + if epi_no > end_epi: break - title = epi.find("td", attrs={"class" : "Title"}).find("a").text + title = epi.find("td", attrs={"class": "Title"}).find("a").text episode = Episode(title, "Episode - " + str(epi_no)) episodes.append(episode) # print(episode.episode, ":", episode.title) - + printer("INFO", "Successfully collected episode names!") return episodes - diff --git a/anime_downloader/util/captcha_solver.py b/anime_downloader/util/captcha_solver.py new file mode 100644 index 0000000..7007a92 --- /dev/null +++ b/anime_downloader/util/captcha_solver.py @@ -0,0 +1,32 @@ +from time import sleep + + +class TwoCaptchaSolver: + def __init__(self, url, site_key, api_key, session): + self.url = url + self.site_key = site_key + self.api_key = api_key + self.session = session + + def solve(self): + try: + captcha_id = \ + self.session.post( + "http://2captcha.com/in.php?key={}&method=userrecaptcha&googlekey={}&pageurl={}&invisible=1" + .format(self.api_key, self.site_key, self.url)).text.split('|')[1] + + recaptcha_answer = self.session.get( + "http://2captcha.com/res.php?key={}&action=get&id={}".format(self.api_key, captcha_id)).text + + while 'CAPCHA_NOT_READY' in recaptcha_answer: + sleep(5) + recaptcha_answer = self.session.get( + "http://2captcha.com/res.php?key={}&action=get&id={}".format(self.api_key, captcha_id)).text + + recaptcha_answer = recaptcha_answer.split('|')[1] + + # print("[Recaptcha answer] : {",recaptcha_answer,"}") + return recaptcha_answer + + except Exception: + return None diff --git a/anime_downloader/util/ffmpeg_downloader.py b/anime_downloader/util/ffmpeg_downloader.py new file mode 100644 index 0000000..82500a8 --- /dev/null +++ b/anime_downloader/util/ffmpeg_downloader.py @@ -0,0 +1,42 @@ +from shutil import which +from util.Color import printer +import subprocess +from platform import system + + +class FFMPEGDownloader: + def __init__(self, episode, directory, gui=None): + self.episode = episode + self.directory = directory + self.gui = gui + + def __clean_file_name(self, file_name): + for c in r'[]/\;,><&*:%=+@#^()|?^': + file_name = file_name.replace(c, '') + + return file_name + + def download(self): + # print("FFMPEG", which("ffmpeg")) + if self.episode.download_url is None: + printer("ERROR", "Download URL is not set for " + self.episode.episode + ", skipping...", self.gui) + return + + if which("ffmpeg") is None: + printer("ERROR", "FFMPEG not found! Please install and add to system path to download!", self.gui) + return + + printer("INFO", "Downloading " + self.episode.episode + "...", self.gui) + + if system() == "Windows": + self.episode.title = self.__clean_file_name(self.episode.title) + + file_name = self.directory + self.episode.episode + " - " + self.episode.title + ".mp4" + + code = subprocess.call( + ['ffmpeg', '-i', self.episode.download_url, '-c', 'copy', '-bsf:a', 'aac_adtstoasc', file_name]) + + if code == 0: + printer("INFO", self.episode.episode + " finished downloading...", self.gui) + else: + printer("ERROR", self.episode.episode + " failed to download!", self.gui) diff --git a/anime_downloader/util/name_collector.py b/anime_downloader/util/name_collector.py new file mode 100644 index 0000000..c4c0858 --- /dev/null +++ b/anime_downloader/util/name_collector.py @@ -0,0 +1,76 @@ +import requests +from bs4 import BeautifulSoup +from util.Episode import Episode + + +class EpisodeNamesCollector: + def __init__(self, url, start_episode, end_episode, is_filler=False, episodes=None): + self.url = url + self.start_episode = start_episode + self.end_episode = end_episode + self.is_filler = is_filler + self.episodes = episodes + + def __extract_episode_names(self): + episodes = [] + + page = requests.get(self.url).content + soup_html = BeautifulSoup(page, "html.parser") + + table = soup_html.find("table", attrs={"class": "EpisodeList"}).find("tbody") + + if self.is_filler: + epis = table.findAll("tr") + else: + epis = table.findAll("tr", attrs={"class": ["anime_canon", "mixed_canon/filler", "manga_canon"]}) + + for epi in epis: + epi_no = int(epi.find("td", attrs={"class": "Number"}).text) + + if epi_no < self.start_episode or epi_no > self.end_episode: + continue + + title = epi.find("td", attrs={"class": "Title"}).find("a").text + episode = Episode(title, "Episode - " + str(epi_no)) + + episodes.append(episode) + + return episodes + + def __get_episode(self, epis, episode): + for epi in epis: + if epi.episode == episode.episode: + return epi + + return None + + def __set_episode_names(self, epis): + fillers = [] + for episode in self.episodes: + epi = self.__get_episode(epis, episode) + if epi: + episode.title = epi.title + # print(episode.episode,"Title -",episode.title) + epis.remove(epi) + else: + print(episode.episode,"is Filler, skipped") + fillers.append(episode) + + return fillers + + def __remove_fillers(self, fillers): + for filler in fillers: + self.episodes.remove(filler) + + def collect_episode_names(self): + epis = self.__extract_episode_names() + + if not self.episodes: + return epis + + fillers = self.__set_episode_names(epis) + + if not self.is_filler: + self.__remove_fillers(fillers) + + return self.episodes diff --git a/docs/images/gui.png b/docs/images/gui.png index 7aef40d..02419a1 100644 Binary files a/docs/images/gui.png and b/docs/images/gui.png differ diff --git a/requirements.txt b/requirements.txt index 618edd7..7441c1b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ art==4.5 requests==2.22.0 -cloudscraper==1.2.28 +cloudscraper==1.2.30 beautifulsoup4==4.8.2 PySimpleGUI==4.16.0