-
-
Notifications
You must be signed in to change notification settings - Fork 288
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #878 from dipu-bd/dev
Version 2.26.3
- Loading branch information
Showing
57 changed files
with
2,387 additions
and
632 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
2.26.2 | ||
2.26.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,85 +1,106 @@ | ||
# -*- coding: utf-8 -*- | ||
import json | ||
import logging | ||
from urllib.parse import quote_plus | ||
import re | ||
from urllib.parse import urlparse | ||
from ..utils.crawler import Crawler | ||
|
||
logger = logging.getLogger(__name__) | ||
search_url = 'https://1stkissnovel.love/?s=%s&post_type=wp-manga&author=&artist=&release=' | ||
search_url = ( | ||
"https://1stkissnovel.love/?s=%s&post_type=wp-manga&author=&artist=&release=" | ||
) | ||
wp_admin_ajax_url = 'https://1stkissnovel.love/wp-admin/admin-ajax.php' | ||
|
||
|
||
class OneKissNovelCrawler(Crawler): | ||
base_url = 'https://1stkissnovel.love/' | ||
|
||
# TODO: Error 503 Backend fetch failed | ||
# def search_novel(self, query): | ||
# query = quote_plus(query.lower()) | ||
# soup = self.get_soup(search_url % query) | ||
# | ||
# results = [] | ||
# for tab in soup.select('.c-tabs-item__content')[:20]: | ||
# a = tab.select_one('.post-title h3 a') | ||
# latest = tab.select_one('.latest-chap .chapter a').text | ||
# votes = tab.select_one('.rating .total_votes').text | ||
# results.append({ | ||
# 'title': a.text.strip(), | ||
# 'url': self.absolute_url(a['href']), | ||
# 'info': '%s | Rating: %s' % (latest, votes), | ||
# }) | ||
# # end for | ||
# | ||
# return results | ||
# # end def | ||
def search_novel(self, query): | ||
query = query.lower().replace(" ", "+") | ||
soup = self.get_soup(search_url % query) | ||
|
||
results = [] | ||
for tab in soup.select(".c-tabs-item__content"): | ||
a = tab.select_one(".post-title h3 a") | ||
latest = tab.select_one(".latest-chap .chapter a").text | ||
votes = tab.select_one(".rating .total_votes").text | ||
results.append( | ||
{ | ||
"title": a.text.strip(), | ||
"url": self.absolute_url(a["href"]), | ||
"info": "%s | Rating: %s" % (latest, votes), | ||
} | ||
) | ||
# end for | ||
|
||
return results | ||
# end def | ||
|
||
def read_novel_info(self): | ||
'''Get novel title, autor, cover etc''' | ||
logger.debug('Visiting %s', self.novel_url) | ||
logger.debug("Visiting %s", self.novel_url) | ||
soup = self.get_soup(self.novel_url) | ||
|
||
possible_title = soup.select_one('.post-title h1') | ||
for span in possible_title.select('span'): | ||
possible_title = soup.select_one(".post-title h1") | ||
for span in possible_title.select("span"): | ||
span.extract() | ||
# end for | ||
self.novel_title = possible_title.text.strip() | ||
logger.info('Novel title: %s', self.novel_title) | ||
logger.info("Novel title: %s", self.novel_title) | ||
|
||
self.novel_cover = soup.select_one( | ||
'meta[property="og:image"]')['content'] | ||
logger.info('Novel cover: %s', self.novel_cover) | ||
self.novel_cover = self.absolute_url( | ||
soup.select_one(".summary_image a img")["src"] | ||
) | ||
logger.info("Novel cover: %s", self.novel_cover) | ||
|
||
self.novel_author = ' '.join([ | ||
a.text.strip() | ||
for a in soup.select('.author-content a[href*="manga-author"]') | ||
]) | ||
logger.info('%s', self.novel_author) | ||
self.novel_author = " ".join( | ||
[ | ||
a.text.strip() | ||
for a in soup.select('.author-content a[href*="manga-author"]') | ||
] | ||
) | ||
logger.info("%s", self.novel_author) | ||
|
||
volumes = set() | ||
chapters = soup.select('ul.main li.wp-manga-chapter a') | ||
for a in reversed(chapters): | ||
self.novel_id = soup.select_one("#manga-chapters-holder")["data-id"] | ||
logger.info("Novel id: %s", self.novel_id) | ||
|
||
# For getting cookies | ||
# self.submit_form(wp_admin_ajax_url, data={ | ||
# 'action': 'manga_views', | ||
# 'manga': self.novel_id, | ||
# }) | ||
# print(self.cookies) | ||
response = self.submit_form(wp_admin_ajax_url, data={ | ||
'action': 'manga_get_chapters', | ||
'manga': self.novel_id, | ||
}) | ||
soup = self.make_soup(response) | ||
for a in reversed(soup.select(".wp-manga-chapter a")): | ||
chap_id = len(self.chapters) + 1 | ||
vol_id = (chap_id - 1) // 100 + 1 | ||
volumes.add(vol_id) | ||
self.chapters.append({ | ||
'id': chap_id, | ||
'volume': vol_id, | ||
'url': self.absolute_url(a['href']), | ||
'title': a.text.strip() or ('Chapter %d' % chap_id), | ||
}) | ||
vol_id = 1 + len(self.chapters) // 100 | ||
if chap_id % 100 == 1: | ||
self.volumes.append({"id": vol_id}) | ||
# end if | ||
self.chapters.append( | ||
{ | ||
"id": chap_id, | ||
"volume": vol_id, | ||
"title": a.text.strip(), | ||
"url": self.absolute_url(a["href"]), | ||
} | ||
) | ||
# end for | ||
|
||
self.volumes = [{'id': x} for x in volumes] | ||
# end def | ||
|
||
def download_chapter_body(self, chapter): | ||
'''Download body of a single chapter and return as clean html format.''' | ||
logger.info('Downloading %s', chapter['url']) | ||
soup = self.get_soup(chapter['url']) | ||
logger.info("Visiting %s", chapter["url"]) | ||
soup = self.get_soup(chapter["url"]) | ||
|
||
contents = soup.select_one('div.text-left') | ||
for bad in contents.select('h3, .code-block, script, .adsbygoogle'): | ||
bad.decompose() | ||
# end for | ||
|
||
body = self.extract_contents(contents) | ||
return '<p>' + '</p><p>'.join(body) + '</p>' | ||
# end def | ||
# end class | ||
# end class |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# -*- coding: utf-8 -*- | ||
import json | ||
import logging | ||
import re | ||
from urllib.parse import urlparse | ||
from ..utils.crawler import Crawler | ||
|
||
logger = logging.getLogger(__name__) | ||
#search_url = 'https://amnesiactl.com/?s=%s&post_type=wp-manga' | ||
chapter_list_url = 'https://amnesiactl.com/wp-admin/admin-ajax.php' | ||
|
||
|
||
class Amnesiactl(Crawler): | ||
base_url = 'https://amnesiactl.com/' | ||
|
||
# NOTE: Site doesn't have proper search layout. | ||
# def search_novel(self, query): | ||
# query = query.lower().replace(' ', '+') | ||
# soup = self.get_soup(search_url % query) | ||
|
||
# results = [] | ||
# for tab in soup.select('.c-tabs-item__content'): | ||
# a = tab.select_one('.post-title h3 a') | ||
# latest = tab.select_one('.latest-chap .chapter a').text | ||
# votes = tab.select_one('.rating .total_votes').text | ||
# results.append({ | ||
# 'title': a.text.strip(), | ||
# 'url': self.absolute_url(a['href']), | ||
# 'info': '%s | Rating: %s' % (latest, votes), | ||
# }) | ||
# # end for | ||
|
||
# return results | ||
# # end def | ||
|
||
def read_novel_info(self): | ||
'''Get novel title, autor, cover etc''' | ||
logger.debug('Visiting %s', self.novel_url) | ||
soup = self.get_soup(self.novel_url) | ||
|
||
possible_title = soup.select_one('.post-title h1') | ||
for span in possible_title.select('span'): | ||
span.extract() | ||
# end for | ||
self.novel_title = possible_title.text.strip() | ||
logger.info('Novel title: %s', self.novel_title) | ||
|
||
self.novel_cover = self.absolute_url( | ||
soup.select_one('.summary_image a img')['src']) | ||
logger.info('Novel cover: %s', self.novel_cover) | ||
|
||
self.novel_author = ' '.join([ | ||
a.text.strip() | ||
for a in soup.select('.author-content a[href*="novel-author"]') | ||
]) | ||
logger.info('%s', self.novel_author) | ||
|
||
self.novel_id = soup.select_one('#manga-chapters-holder')['data-id'] | ||
logger.info('Novel id: %s', self.novel_id) | ||
|
||
response = self.submit_form(chapter_list_url, data={ | ||
'action': 'manga_get_chapters', | ||
'manga': self.novel_id, | ||
}) | ||
soup = self.make_soup(response) | ||
for a in reversed(soup.select(".wp-manga-chapter a")): | ||
chap_id = len(self.chapters) + 1 | ||
vol_id = 1 + len(self.chapters) // 100 | ||
if chap_id % 100 == 1: | ||
self.volumes.append({"id": vol_id}) | ||
# end if | ||
self.chapters.append( | ||
{ | ||
"id": chap_id, | ||
"volume": vol_id, | ||
"title": a.text.strip(), | ||
"url": self.absolute_url(a["href"]), | ||
} | ||
) | ||
# end for | ||
|
||
# end def | ||
|
||
def download_chapter_body(self, chapter): | ||
'''Download body of a single chapter and return as clean html format.''' | ||
logger.info('Visiting %s', chapter['url']) | ||
soup = self.get_soup(chapter['url']) | ||
contents = soup.select('.reading-content p') | ||
return ''.join([str(p) for p in contents]) | ||
# end def | ||
# end class |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.