diff --git a/README.md b/README.md index 6c678480a..996bdd0e9 100644 --- a/README.md +++ b/README.md @@ -52,13 +52,13 @@ Without it, you will only get output in epub, text, and web formats. ### A1. Standalone Bundle (Windows, Linux) -⏬ **Windows**: [lightnovel-crawler v2.22.0 ~ 23MB](https://rebrand.ly/lncrawl) +⏬ **Windows**: [lightnovel-crawler v2.22.2 ~ 23MB](https://rebrand.ly/lncrawl) > In Windows 8, 10 or later versions, it might say that `lncrawl.exe` is not safe to dowload or execute. You should bypass/ignore this security check to execute this program. Actually, I am too lazy to add proper configuration files to solve this issue. Excuse me please 😇. _To get older versions visit the [Releases page](https://github.com/dipu-bd/lightnovel-crawler/releases)_ -⏬ **Linux**: [lightnovel-crawler v2.19.4 ~ 24MB](https://rebrand.ly/lncrawl-linux) +⏬ **Linux**: [lightnovel-crawler v2.22.1 ~ 24MB](https://rebrand.ly/lncrawl-linux) > Copy it to `/usr/bin` or `~/.local/bin` to make it accessible in terminal. Or, you can [create a launcher](https://askubuntu.com/a/66918/457551) to easily access the app. diff --git a/lncrawl/VERSION b/lncrawl/VERSION index d93847fab..acbef7695 100644 --- a/lncrawl/VERSION +++ b/lncrawl/VERSION @@ -1 +1 @@ -2.22.1 +2.22.2 diff --git a/lncrawl/bots/console/start.py b/lncrawl/bots/console/start.py index 8dccdca8f..19a7b30c3 100644 --- a/lncrawl/bots/console/start.py +++ b/lncrawl/bots/console/start.py @@ -3,6 +3,7 @@ from PyInquirer import prompt +from ...assets.icons import Icons from ...core import display from ...core.app import App from ...core.arguments import get_args @@ -70,10 +71,15 @@ def start(self): display.app_complete() if self.open_folder(): - import pathlib - import webbrowser - url = pathlib.Path(self.app.output_path).as_uri() - webbrowser.open_new(url) + if Icons.isWindows: + import subprocess + subprocess.Popen('explorer /select,"' + self.app.output_path + '"') + else: + import pathlib + import webbrowser + url = pathlib.Path(self.app.output_path).as_uri() + webbrowser.open_new(url) + # end if # end def # end def diff --git a/lncrawl/bots/test/test_inputs.py b/lncrawl/bots/test/test_inputs.py index d5efb8eac..1cf9a47b0 100644 --- a/lncrawl/bots/test/test_inputs.py +++ b/lncrawl/bots/test/test_inputs.py @@ -15,6 +15,12 @@ b64decode( "aHR0cHM6Ly9jb21yYWRlbWFvLmNvbS9ub3ZlbC90c3VydWdpLW5vLWpvb3UtdG8tcmFrdWluLW5vLWtvLw==".encode()).decode() ], + 'https://jpmtl.com/': [ + 'https://jpmtl.com/books/178' + ], + 'https://mangatoon.mobi/': [ + 'https://mangatoon.mobi/en/detail/40627' + ], 'https://es.mtlnovel.com/': [ 'https://es.mtlnovel.com/being-a-hamster-in-the-apocalypse-is-a-breeze/', ], diff --git a/lncrawl/sources/bestlightnovel.py b/lncrawl/sources/bestlightnovel.py index 7cd7d2f44..144c2250f 100644 --- a/lncrawl/sources/bestlightnovel.py +++ b/lncrawl/sources/bestlightnovel.py @@ -6,24 +6,23 @@ logger = logging.getLogger('NOVEL_ONLINE_FREE') search_url = 'https://bestlightnovel.com/getsearchstory' -novel_page_url = 'https://bestlightnovel.com/novel/%s' +novel_page_url = 'https://bestlightnovel.com/novel_%s' class BestLightNovel(Crawler): base_url = 'https://bestlightnovel.com/' def search_novel(self, query): - response = self.submit_form(search_url, { + data = self.submit_form(search_url, { 'searchword': query - }) - data = response.json() + }).json() results = [] for novel in data: titleSoup = BeautifulSoup(novel['name'], 'lxml') results.append({ 'title': titleSoup.body.text.title(), - 'url': novel_page_url % novel['nameunsigned'], + 'url': novel_page_url % novel['id_encode'], 'info': 'Latest: %s' % novel['lastchapter'], }) # end for diff --git a/lncrawl/sources/indowebnovel.py b/lncrawl/sources/indowebnovel.py index ce2316f86..3f45c52e1 100644 --- a/lncrawl/sources/indowebnovel.py +++ b/lncrawl/sources/indowebnovel.py @@ -63,7 +63,7 @@ def download_chapter_body(self, chapter): soup = self.get_soup(chapter['url']) - contents = soup.select('div.entry-content.c2 p') + contents = soup.select('div.entry-content p') body = [str(p) for p in contents if p.text.strip()] return '

' + '

'.join(body) + '

' # end def diff --git a/lncrawl/sources/jpmtl.py b/lncrawl/sources/jpmtl.py index 122c5ec74..b33fd2b0b 100644 --- a/lncrawl/sources/jpmtl.py +++ b/lncrawl/sources/jpmtl.py @@ -44,7 +44,7 @@ def read_novel_info(self): toc_url = chapters_url % self.novel_id toc = self.get_json(toc_url) - print(toc) + # print(toc) for volume in toc: self.volumes.append({ 'id': volume['volume'], diff --git a/lncrawl/sources/webnovel.py b/lncrawl/sources/webnovel.py index 5dbba89b2..f8ddf007b 100644 --- a/lncrawl/sources/webnovel.py +++ b/lncrawl/sources/webnovel.py @@ -16,7 +16,10 @@ class WebnovelCrawler(Crawler): - base_url = 'https://www.webnovel.com' + base_url = [ + 'https://m.webnovel.com', + 'https://www.webnovel.com', + ] def get_csrf(self): logger.info('Getting CSRF Token') @@ -52,7 +55,10 @@ def read_novel_info(self): self.get_csrf() url = self.novel_url #self.novel_id = re.search(r'(?<=webnovel.com/book/)\d+', url).group(0) - self.novel_id = url.split("_")[1] + if not "_" in url : + self.novel_id = re.search(r'(?<=webnovel.com/book/)\d+', url).group(0) + else : + self.novel_id = url.split("_")[1] logger.info('Novel Id: %s', self.novel_id) url = chapter_list_url % (self.csrf, self.novel_id) diff --git a/lncrawl/utils/crawler.py b/lncrawl/utils/crawler.py index 07da2b8ef..f120309d3 100644 --- a/lncrawl/utils/crawler.py +++ b/lncrawl/utils/crawler.py @@ -24,10 +24,7 @@ def __init__(self): # Initialize cloudscrapper self.scraper = cloudscraper.create_scraper( - browser={ - 'browser': 'firefox', - 'mobile': False - } + browser={'mobile': False} ) # Must resolve these fields inside `read_novel_info` diff --git a/lncrawl/utils/racovimge.py b/lncrawl/utils/racovimge.py index a445e1fb1..d0220cd66 100644 --- a/lncrawl/utils/racovimge.py +++ b/lncrawl/utils/racovimge.py @@ -20,7 +20,7 @@ try: import jinja2 except ImportError: - logger.info('Jinja2 is required for cover generation:\n\tpip install Jinja2') + logger.info('Jinja2 is required for cover generation:\n pip install Jinja2') ############################################################################### # Templates and Color Schemes diff --git a/requirements.txt b/requirements.txt index 6a97293b1..eabf5bc91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,8 +11,8 @@ colorama==0.4.3 progress==1.5 Js2Py==0.70 EbookLib==0.17.1 -pillow==6.2.2 -cloudscraper>=1.2.40 +pillow==7.2.0 +cloudscraper>=1.2.46 lxml==4.5.1 # Bot requirements