Skip to content

Commit

Permalink
Merge pull request #2362 from dipu-bd/dev
Browse files Browse the repository at this point in the history
Version 3.6.0
  • Loading branch information
dipu-bd authored Apr 30, 2024
2 parents 46401bf + 1cf3dc0 commit a76195d
Show file tree
Hide file tree
Showing 18 changed files with 574 additions and 540 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/index-gen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ jobs:
if: github.repository == 'dipu-bd/lightnovel-crawler'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python 3.11
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.11"

Expand All @@ -35,6 +35,6 @@ jobs:
run: python ./scripts/index_gen.py

- name: Commit changes
uses: stefanzweifel/git-auto-commit-action@v4
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: Generate source index
4 changes: 2 additions & 2 deletions .github/workflows/lint-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ jobs:
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ jobs:
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Check VERSION file
run: |
[ "${GITHUB_REF##*/}" == "v$(head -n 1 lncrawl/VERSION)" ] || exit 100
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

Expand Down Expand Up @@ -55,9 +55,9 @@ jobs:
- name: Install OpenSSL
run: choco install -y --no-progress openssl

- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10"

Expand Down Expand Up @@ -86,9 +86,9 @@ jobs:
name: Linux Build & Publish
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10"

Expand Down
821 changes: 413 additions & 408 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lncrawl/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.5.1
3.6.0
10 changes: 6 additions & 4 deletions lncrawl/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from ..assets.version import get_version
from ..bots import run_bot
from .arguments import get_args
from .display import cancel_method, description, error_message, input_suppression
from .display import (cancel_method, description, error_message,
input_suppression)
from .logconfig import configure_logging
from .proxy import load_proxies, start_proxy_fetcher, stop_proxy_fetcher
from .sources import load_sources
Expand Down Expand Up @@ -66,9 +67,10 @@ def start_app():
try:
bot = os.getenv("BOT", "").lower()
run_bot(bot)
except Exception as e:
if not isinstance(e, KeyboardInterrupt):
error_message(*sys.exc_info())
except KeyboardInterrupt:
pass
except Exception:
error_message(*sys.exc_info())

if args.auto_proxy:
stop_proxy_fetcher()
Expand Down
6 changes: 3 additions & 3 deletions lncrawl/core/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,11 @@ def download_chapters(
chapter.body = future.result()
self.extract_chapter_images(chapter)
chapter.success = True
except Exception as e:
except KeyboardInterrupt:
break
except Exception:
if isinstance(chapter, Chapter):
chapter.body = ""
chapter.success = False
if isinstance(e, KeyboardInterrupt):
break
finally:
yield 1
131 changes: 55 additions & 76 deletions lncrawl/core/novel_search.py
Original file line number Diff line number Diff line change
@@ -1,126 +1,105 @@
"""
To search for novels in selected sources
"""
import random
import logging
import os
from concurrent import futures
from typing import Dict, List

from concurrent.futures import Future
from slugify import slugify
from tqdm import tqdm

from ..core.sources import crawler_list, prepare_crawler
from ..models import CombinedSearchResult, SearchResult
from .sources import crawler_list, prepare_crawler
from .taskman import TaskManager

SEARCH_TIMEOUT = 60
MAX_RESULTS = 15

logger = logging.getLogger(__name__)
executor = futures.ThreadPoolExecutor(20)
taskman = TaskManager(10)


def _perform_search(app, link, bar):
def _perform_search(app, link):
from .app import App
assert isinstance(app, App)
try:
crawler = prepare_crawler(link)
results = []
for item in crawler.search_novel(app.user_input):
if not item.get("url"):
continue
if not isinstance(item, SearchResult):
item = SearchResult(**item)
if not (item.url and item.title):
continue
results.append(item)

logger.debug(results)
logger.info("%d results from %s", len(results), link)
logger.info(f"{len(results)} results from {link}")
return results
except KeyboardInterrupt as e:
raise e
except Exception:
if logger.isEnabledFor(logging.DEBUG):
logging.exception("<!> Search Failed! << %s >>", link)
return []


def _combine_results(results: List[SearchResult]) -> List[CombinedSearchResult]:
combined: Dict[str, List[SearchResult]] = {}
for item in results:
key = slugify(item.title)
if len(key) <= 2:
continue
combined.setdefault(key, [])
combined[key].append(item)

processed: List[CombinedSearchResult] = []
for key, value in combined.items():
value.sort(key=lambda x: x.url)
processed.append(
CombinedSearchResult(
id=key,
title=value[0].title,
novels=value,
)
)

processed.sort(key=lambda x: -len(x.novels))
return processed[:15] # Control the number of results
finally:
app.progress += 1


def search_novels(app):
from .app import App

assert isinstance(app, App)

if not app.crawler_links:
return

sources = app.crawler_links.copy()
# random.shuffle(sources)

is_debug = os.getenv("debug_mode")
bar = tqdm(
desc="Searching",
total=len(sources),
unit="source",
disable=is_debug,
)
random.shuffle(sources)

# Add future tasks
checked = {}
futures_to_check = []
checked = set()
app.progress = 0
futures: List[Future] = []
for link in sources:
crawler = crawler_list[link]
if crawler in checked:
bar.update()
continue
checked[crawler] = True
future = executor.submit(_perform_search, app, link, bar)
futures_to_check.append(future)
checked.add(crawler)
f = taskman.submit_task(_perform_search, app, link)
futures.append(f)

# Resolve all futures
results: List[SearchResult] = []
for i, f in enumerate(futures_to_check):
assert isinstance(f, futures.Future)
try:
f.result(SEARCH_TIMEOUT)
except KeyboardInterrupt:
break
except TimeoutError:
f.cancel()
except Exception as e:
if is_debug:
logger.error("Failed to complete search", e)
finally:
app.progress += 1
bar.update()
try:
taskman.resolve_futures(
futures,
desc="Searching",
unit="source",
timeout=SEARCH_TIMEOUT,
)
except Exception:
if logger.isEnabledFor(logging.DEBUG):
logging.exception("<!> Search Failed!")

# Cancel any remaining futures
for f in futures_to_check:
assert isinstance(f, futures.Future)
if not f.done():
f.cancel()
elif not f.cancelled():
results += f.result()
# Combine the search results
combined: Dict[str, List[SearchResult]] = {}
for f in futures:
if not f or not f.done() or f.cancelled():
continue
for item in f.result() or []:
if not item:
continue
key = slugify(item.title)
if len(key) <= 2:
continue
combined.setdefault(key, [])
combined[key].append(item)

# Process combined search results
app.search_results = _combine_results(results)
bar.close()
processed: List[CombinedSearchResult] = []
for key, value in combined.items():
value.sort(key=lambda x: x.url)
processed.append(
CombinedSearchResult(
id=key,
title=value[0].title,
novels=value,
)
)
processed.sort(key=lambda x: -len(x.novels))
app.search_results = processed[:MAX_RESULTS]
Loading

0 comments on commit a76195d

Please sign in to comment.