-
-
Notifications
You must be signed in to change notification settings - Fork 288
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #607 from dipu-bd/dev
Version 2.23.2
- Loading branch information
Showing
11 changed files
with
910 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
2.23.1 | ||
2.23.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# -*- coding: utf-8 -*- | ||
import json | ||
import logging | ||
import re | ||
from urllib.parse import urlparse | ||
from ..utils.crawler import Crawler | ||
|
||
logger = logging.getLogger('ARANG_SCANS') | ||
search_url = 'https://arangscans.com/?s=%s&post_type=wp-manga' | ||
|
||
|
||
class ArangScans(Crawler): | ||
base_url = 'https://arangscans.com/' | ||
|
||
# FIXME: Can't seem to get search to work not showing up when running command "lncrawl -q "Rooftop Sword Master" --sources" | ||
# def search_novel(self, query): | ||
# query = query.lower().replace(' ', '+') | ||
# soup = self.get_soup(search_url % query) | ||
|
||
# results = [] | ||
# for tab in soup.select('.c-tabs-item__content'): | ||
# a = tab.select_one('.post-title h3 a') | ||
# latest = tab.select_one('.latest-chap .chapter a').text | ||
# votes = tab.select_one('.rating .total_votes').text | ||
# results.append({ | ||
# 'title': a.text.strip(), | ||
# 'url': self.absolute_url(a['href']), | ||
# 'info': '%s | Rating: %s' % (latest, votes), | ||
# }) | ||
# # end for | ||
|
||
# return results | ||
# # end def | ||
|
||
def read_novel_info(self): | ||
'''Get novel title, autor, cover etc''' | ||
logger.debug('Visiting %s', self.novel_url) | ||
soup = self.get_soup(self.novel_url) | ||
|
||
possible_title = soup.select_one('.post-title h1') | ||
for span in possible_title.select('span'): | ||
span.extract() | ||
# end for | ||
self.novel_title = possible_title.text.strip() | ||
logger.info('Novel title: %s', self.novel_title) | ||
|
||
self.novel_cover = self.absolute_url( | ||
soup.select_one('.summary_image a img')['src']) | ||
logger.info('Novel cover: %s', self.novel_cover) | ||
|
||
self.novel_author = ' '.join([ | ||
a.text.strip() | ||
for a in soup.select('.author-content a[href*="manga-author"]') | ||
]) | ||
logger.info('%s', self.novel_author) | ||
|
||
volumes = set() | ||
chapters = soup.select('ul.main li.wp-manga-chapter a') | ||
for a in reversed(chapters): | ||
chap_id = len(self.chapters) + 1 | ||
vol_id = (chap_id - 1) // 100 + 1 | ||
volumes.add(vol_id) | ||
self.chapters.append({ | ||
'id': chap_id, | ||
'volume': vol_id, | ||
'url': self.absolute_url(a['href']), | ||
'title': a.text.strip() or ('Chapter %d' % chap_id), | ||
}) | ||
# end for | ||
|
||
self.volumes = [{'id': x} for x in volumes] | ||
# end def | ||
|
||
def download_chapter_body(self, chapter): | ||
'''Download body of a single chapter and return as clean html format.''' | ||
logger.info('Downloading %s', chapter['url']) | ||
soup = self.get_soup(chapter['url']) | ||
|
||
contents = soup.select_one('div.text-left') | ||
for bad in contents.select('h3, .code-block, script, .adsbygoogle'): | ||
bad.decompose() | ||
|
||
body = self.extract_contents(contents) | ||
return '<p>' + '</p><p>'.join(body) + '</p>' | ||
# end def | ||
# end class |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# -*- coding: utf-8 -*- | ||
import json | ||
import logging | ||
import re | ||
from ..utils.crawler import Crawler | ||
|
||
logger = logging.getLogger(__name__) | ||
search_url = 'https://asadatranslations.com/?s=%s&post_type=wp-manga&author=&artist=&release=' | ||
|
||
|
||
class AsadaTranslations(Crawler): | ||
base_url = 'https://asadatranslations.com/' | ||
|
||
def search_novel(self, query): | ||
query = query.lower().replace(' ', '+') | ||
soup = self.get_soup(search_url % query) | ||
|
||
results = [] | ||
for tab in soup.select('.c-tabs-item__content'): | ||
a = tab.select_one('.post-title h3 a') | ||
latest = tab.select_one('.latest-chap .chapter a').text | ||
votes = tab.select_one('.rating .total_votes').text | ||
results.append({ | ||
'title': a.text.strip(), | ||
'url': self.absolute_url(a['href']), | ||
'info': '%s | Rating: %s' % (latest, votes), | ||
}) | ||
# end for | ||
|
||
return results | ||
# end def | ||
|
||
def read_novel_info(self): | ||
'''Get novel title, autor, cover etc''' | ||
logger.debug('Visiting %s', self.novel_url) | ||
soup = self.get_soup(self.novel_url) | ||
|
||
possible_title = soup.select_one('.post-title h1') | ||
for span in possible_title.select('span'): | ||
span.extract() | ||
# end for | ||
self.novel_title = possible_title.text.strip() | ||
logger.info('Novel title: %s', self.novel_title) | ||
|
||
# NOTE: Site doesn't have book covers. | ||
# self.novel_cover = self.absolute_url( | ||
# soup.select_one('.summary_image a img')['src']) | ||
# logger.info('Novel cover: %s', self.novel_cover) | ||
|
||
self.novel_author = ' '.join([ | ||
a.text.strip() | ||
for a in soup.select('.author-content a[href*="manga-author"]') | ||
]) | ||
logger.info('%s', self.novel_author) | ||
|
||
volumes = set() | ||
chapters = soup.select('ul.main li.wp-manga-chapter a') | ||
for a in reversed(chapters): | ||
chap_id = len(self.chapters) + 1 | ||
vol_id = (chap_id - 1) // 100 + 1 | ||
volumes.add(vol_id) | ||
self.chapters.append({ | ||
'id': chap_id, | ||
'volume': vol_id, | ||
'url': self.absolute_url(a['href']), | ||
'title': a.text.strip() or ('Chapter %d' % chap_id), | ||
}) | ||
# end for | ||
|
||
self.volumes = [{'id': x} for x in volumes] | ||
# end def | ||
|
||
def download_chapter_body(self, chapter): | ||
'''Download body of a single chapter and return as clean html format.''' | ||
logger.info('Downloading %s', chapter['url']) | ||
soup = self.get_soup(chapter['url']) | ||
|
||
contents = soup.select_one('div.text-left') | ||
for bad in contents.select('h3, .code-block, script, .adsbygoogle, .sharedaddy'): | ||
bad.decompose() | ||
|
||
# remove bad text | ||
self.blacklist_patterns = [ | ||
r'^Translator:', | ||
r'^Qii', | ||
r'^Editor:', | ||
r'^Maralynx', | ||
r'^Translator and Editor Notes:', | ||
r'^Support this novel on', | ||
r'^NU', | ||
r'^by submitting reviews and ratings or by adding it to your reading list.', | ||
] | ||
|
||
for discord in contents.select('p'): | ||
for bad in ["Join our", "<a>discord</a>", "to get latest updates and progress about the translations"]: | ||
if bad in discord.text: | ||
discord.decompose() | ||
|
||
body = self.extract_contents(contents) | ||
return '<p>' + '</p><p>'.join(body) + '</p>' | ||
# end def | ||
# end class |
Oops, something went wrong.