-
Notifications
You must be signed in to change notification settings - Fork 71
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16 from Oshan96/dev
Dev changes for v1.0.3
- Loading branch information
Showing
10 changed files
with
290 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
86 changes: 86 additions & 0 deletions
86
anime_downloader/scrapers/animefreak/animefreak_scraper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import re | ||
from bs4 import BeautifulSoup | ||
from util.Episode import Episode | ||
from scrapers.base_scraper import BaseScraper | ||
from util.Color import printer | ||
|
||
|
||
class AnimeFreakScraper(BaseScraper): | ||
def __init__(self, url, start_episode, end_episode, session, gui=None, is_dub=False): | ||
super().__init__(url, start_episode, end_episode, session, gui) | ||
self.is_dub = is_dub | ||
|
||
def __extract_source_links(self, page_content): | ||
link_sources = [match.group(1) for match in | ||
re.finditer("file\s*:\s*[\"\']\s*([htps][^\"\']+)", page_content)] | ||
|
||
return link_sources | ||
|
||
def __set_download_link(self, episode): | ||
response = "" | ||
if not self.is_dub: | ||
print("sub") | ||
response = self.session.get(episode.page_url) | ||
if response.status_code != 200: | ||
print("checking subbed mirror") | ||
response = self.session.get(episode.page_url + "/2") | ||
|
||
else: | ||
print("dub") | ||
response = self.session.get(episode.page_url + "/3") | ||
if response.status_code != 200: | ||
print("checking dubbed mirror") | ||
response = self.session.get(episode.page_url + "/4") | ||
|
||
sources = self.__extract_source_links(response.text) | ||
if len(sources) > 0: | ||
for source in sources: | ||
if ".mp4" in source: | ||
episode.download_url = source | ||
return True | ||
|
||
return False | ||
|
||
def __collect_episodes(self): | ||
printer("INFO", "Extracting page URLs...", self.gui) | ||
episodes = [] | ||
response = self.session.get(self.url) | ||
if response.status_code == 200: | ||
soup_html = BeautifulSoup(response.content, "html.parser") | ||
epi_tags = soup_html.findAll("ul", attrs={"class": "check-list"})[1].findAll("a", href=True) | ||
|
||
for epi_tag in epi_tags: | ||
href = epi_tag["href"] | ||
# print(href) | ||
epi_no = int(href.split("-")[-1]) | ||
# print(epi_no) | ||
|
||
if epi_no < self.start_episode or epi_no > self.end_episode: | ||
continue | ||
|
||
episode = Episode("Episode - " + str(epi_no), "Episode - " + str(epi_no)) | ||
episode.page_url = href | ||
|
||
try: | ||
res = self.__set_download_link(episode) | ||
if res: | ||
episodes.append(episode) | ||
else: | ||
printer("ERROR", "Failed to collect download link for "+episode.title, self.gui) | ||
|
||
except Exception as ex: | ||
printer("ERROR", str(ex), self.gui) | ||
|
||
return episodes | ||
|
||
def get_direct_links(self): | ||
try: | ||
episodes = self.__collect_episodes() | ||
if len(episodes) > 0: | ||
return episodes | ||
else: | ||
return None | ||
|
||
except Exception as ex: | ||
printer("ERROR", str(ex), self.gui) | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
110 changes: 110 additions & 0 deletions
110
anime_downloader/scrapers/gogoanime/gogoanime_scraper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import re | ||
from util.Episode import Episode | ||
from bs4 import BeautifulSoup | ||
from extractors.jwplayer_extractor import JWPlayerExtractor | ||
from scrapers.base_scraper import BaseScraper | ||
from util.Color import printer | ||
|
||
|
||
class GoGoAnimeScraper(BaseScraper): | ||
def __init__(self, url, start_episode, end_episode, session, gui=None, resolution="480"): | ||
super().__init__(url, start_episode, end_episode, session, gui) | ||
self.resolution = resolution | ||
self.extractor = JWPlayerExtractor(None, self.session) | ||
self.anime_id = None | ||
self.api_link_bases = ['https://ajax.gogocdn.net/ajax/load-list-episode', | ||
'https://ajax.apimovie.xyz/ajax/load-list-episode'] | ||
|
||
self.__set_anime_id() | ||
|
||
def __set_anime_id(self): | ||
response = self.session.get(self.url) | ||
if response.status_code == 200: | ||
soup_html = BeautifulSoup(response.content, "html.parser") | ||
movie_id_tag = soup_html.find("input", attrs={"id": "movie_id"}) | ||
if movie_id_tag is not None: | ||
self.anime_id = movie_id_tag["value"] | ||
|
||
def __get_episode_data(self): | ||
for base_link in self.api_link_bases: | ||
api_link = base_link + "?ep_start=" + str(self.start_episode) + "&ep_end=" + str( | ||
self.end_episode) + "&id=" + self.anime_id | ||
response = self.session.get(api_link) | ||
if response.status_code == 200: | ||
return response.content | ||
|
||
return None | ||
|
||
def __get_page_url(self, href): | ||
base_url = re.search("(.*)/category/", self.url).group(1) | ||
# print(base_url) | ||
src = base_url + href | ||
# print(src) | ||
|
||
return src | ||
|
||
def __set_stream_url(self, episode): | ||
response = self.session.get(episode.page_url) | ||
if response.status_code == 200: | ||
soup_html = BeautifulSoup(response.content, "html.parser") | ||
item_tag = soup_html.find("li", attrs={"class": "anime"}).find("a") | ||
streamer_url = item_tag["data-video"] | ||
if "https" not in streamer_url: | ||
streamer_url = "https:" + streamer_url | ||
|
||
streamer_resp = self.session.get(streamer_url) | ||
if streamer_resp.status_code == 200: | ||
sources = self.extractor.extract_sources(streamer_resp.text) | ||
src = "" | ||
for source in sources: | ||
if "m3u8" in source: | ||
src = source | ||
break | ||
|
||
if src != "": | ||
res_link_id = self.extractor.get_resolution_link(src, self.resolution) | ||
stream_base = re.search("(.*)/[\S]+\.m3u8", src).group(1) | ||
episode.download_url = stream_base + "/" + res_link_id | ||
print("stream url:", episode.download_url) | ||
|
||
return True | ||
|
||
return False | ||
|
||
def __collect_episodes(self): | ||
printer("INFO", "Extracting page URLs...", self.gui) | ||
episodes = [] | ||
if self.anime_id is not None: | ||
data = self.__get_episode_data() | ||
if data is not None: | ||
soup_html = BeautifulSoup(data, "html.parser") | ||
anchor_tags = soup_html.findAll("a", href=True) | ||
for anchor in anchor_tags: | ||
href = anchor["href"].strip() | ||
epi_no = int(href.split("-")[-1]) | ||
|
||
if epi_no < self.start_episode or epi_no > self.end_episode: | ||
continue | ||
|
||
episode = Episode("Episode - " + str(epi_no), "Episode - " + str(epi_no)) | ||
episode.is_direct = False | ||
episode.page_url = self.__get_page_url(href) | ||
val = self.__set_stream_url(episode) | ||
if val: | ||
episodes.append(episode) | ||
else: | ||
printer("ERROR", "Failed to collect download link for " + episode.title, self.gui) | ||
|
||
return episodes | ||
|
||
def get_direct_links(self): | ||
try: | ||
episodes = self.__collect_episodes() | ||
if len(episodes) > 0: | ||
return episodes | ||
else: | ||
return None | ||
|
||
except Exception as ex: | ||
printer("ERROR", str(ex), self.gui) | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import jsbeautifier as js | ||
from bs4 import BeautifulSoup | ||
from extractors.jwplayer_extractor import JWPlayerExtractor | ||
|
||
|
||
class JsUnpacker: | ||
def __init__(self): | ||
self.jwp_extractor = JWPlayerExtractor(None, None) | ||
|
||
def eval(self, func): | ||
val = js.beautify(func) | ||
return val | ||
|
||
def extract_link(self, func): | ||
src = "" | ||
data = self.eval(func) | ||
# print(data) | ||
if "jwplayer" in data: | ||
print("jwplayer source will be returned") | ||
links = self.jwp_extractor.extract_sources(data) | ||
if links is not None and len(links) > 0: | ||
src = links[0] | ||
else: | ||
print("no sources found") | ||
return None | ||
|
||
else: | ||
print("Any anchor href will be returned") | ||
anch = BeautifulSoup(data, "html.parser").find("a") | ||
if anch is not None: | ||
src = anch['href'].replace('\"', '').replace('\'', '').replace('\\', '') | ||
else: | ||
print("No anchor links found") | ||
return None | ||
|
||
# print(src) | ||
return src |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
jsbeautifier | ||
requests==2.22.0 | ||
art==4.5 | ||
cloudscraper==1.2.33 | ||
|