Skip to content

Commit

Permalink
Rework everything
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Feb 29, 2024
1 parent 4397519 commit d631420
Show file tree
Hide file tree
Showing 21 changed files with 1,076 additions and 1,054 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://github.com/psf/black
rev: "23.7.0"
rev: "24.2.0"
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.282
rev: "0.3.0"
hooks:
- id: ruff
- repo: https://github.com/RobertCraigie/pyright-python
rev: v1.1.320
rev: v1.1.352
hooks:
- id: pyright
name: pyright (system)
Expand Down
5 changes: 3 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": true
"source.organizeImports": "explicit"
},
},
"python.analysis.typeCheckingMode": "strict",
"python.analysis.typeCheckingMode": "basic",
"editor.rulers": [88],
}
18 changes: 10 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers = [
]
dependencies = [
"requests>=2.27.0,<2.28",
"zimscraperlib>=1.6.0,<1.7",
"zimscraperlib==3.3.1",
"kiwixstorage>=0.8.2,<0.9",
"Jinja2>=3.1.2,<3.2",
"backoff>=2.0.1,<2.1",
Expand All @@ -33,11 +33,11 @@ scripts = [
"invoke==2.2.0",
]
lint = [
"black==23.7.0",
"ruff==0.0.282",
"black==24.2.0",
"ruff==0.3.0",
]
check = [
"pyright==1.1.320",
"pyright==1.1.352",
]
test = [
"pytest==7.4.0",
Expand Down Expand Up @@ -111,6 +111,8 @@ target-version = ['py38']
target-version = "py38"
line-length = 88
src = ["src"]

[tool.ruff.lint]
select = [
"A", # flake8-builtins
# "ANN", # flake8-annotations
Expand Down Expand Up @@ -187,17 +189,17 @@ unfixable = [
"F401",
]

[tool.ruff.isort]
[tool.ruff.lint.isort]
known-first-party = ["ifixit2zim"]

[tool.ruff.flake8-bugbear]
[tool.ruff.lint.flake8-bugbear]
# add exceptions to B008 for fastapi.
extend-immutable-calls = ["fastapi.Depends", "fastapi.Query"]

[tool.ruff.flake8-tidy-imports]
[tool.ruff.lint.flake8-tidy-imports]
ban-relative-imports = "all"

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252"]

Expand Down
13 changes: 1 addition & 12 deletions src/ifixit2zim/__main__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,4 @@
import pathlib
import sys


def main():
# allows running it from source using python ifixit2zim
sys.path = [str(pathlib.Path(__file__).parent.parent.resolve()), *sys.path]

from ifixit2zim.entrypoint import main as entry

entry()

from ifixit2zim.entrypoint import main

if __name__ == "__main__":
main()
109 changes: 56 additions & 53 deletions src/ifixit2zim/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pathlib
import tempfile
import urllib.parse
from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import List, Optional, Set

from zimscraperlib.i18n import get_language_details
Expand Down Expand Up @@ -252,7 +252,7 @@
"disassembly_guides": "분해 안내서",
"tools": "도구",
"parts": "부품",
"tools_introduction": ("해당 기기를 고치는데 사용하는 일반 도구들 입니다. 매 단계에 모든 도구를 사용하지는 않습니다."),
"tools_introduction": "해당 기기를 고치는데 사용하는 일반 도구들 입니다. 매 단계에 모든 도구를 사용하지는 않습니다.", # noqa E501
},
"zh": {
"author": "作者: ",
Expand All @@ -268,7 +268,7 @@
"disassembly_guides": "拆卸指南",
"tools": "工具",
"parts": "配件",
"tools_introduction": ("这是用于在这个设备上工作的一些常用工具。你可能不需要在每个过程中使用到每个工具。"),
"tools_introduction": "这是用于在这个设备上工作的一些常用工具。你可能不需要在每个过程中使用到每个工具。", # noqa E501
},
"ru": {
"author": "Автор: ",
Expand Down Expand Up @@ -324,7 +324,7 @@
"disassembly_guides": "分解ガイド",
"tools": "ツール",
"parts": "パーツ",
"tools_introduction": ("以前、このデバイスの修理に使われていた一般的な工具です。修理過程において全部の工具が必要とは限りません。"),
"tools_introduction": "以前、このデバイスの修理に使われていた一般的な工具です。修理過程において全部の工具が必要とは限りません。", # noqa E501
},
"tr": {
"author": "Yazar: ",
Expand Down Expand Up @@ -803,61 +803,63 @@


@dataclass
class Conf:
required = [
class Configuration:
fpath: pathlib.Path

# zim params
name: str
title: str
description: str
long_description: Optional[str]
author: str
publisher: str
fname: str
tag: List[str]

# filesystem
_output_dir: str # TODO: rename output_name
_tmp_dir: str # IDEM
output_dir: pathlib.Path # TODO: rename output_path
tmp_dir: pathlib.Path # IDEM

required = (
"lang_code",
"output_dir",
]
)

lang_code: str = ""
language: dict = field(default_factory=dict)
main_url: str = ""

# zim params
name: str = ""
title: Optional[str] = ""
description: Optional[str] = ""
author: Optional[str] = ""
publisher: Optional[str] = ""
fname: Optional[str] = ""
tag: List[str] = field(default_factory=list)
lang_code: str
language: dict
main_url: urllib.parse.ParseResult

# customization
icon: Optional[str] = ""
categories: Set[str] = field(default_factory=set)
no_category: Optional[bool] = False
guides: Set[str] = field(default_factory=set)
no_guide: Optional[bool] = False
infos: Set[str] = field(default_factory=set)
no_info: Optional[bool] = False
users: Set[str] = field(default_factory=set)
no_user: Optional[bool] = False
no_cleanup: Optional[bool] = False

# filesystem
_output_dir: Optional[str] = "."
_tmp_dir: Optional[str] = "."
output_dir: Optional[pathlib.Path] = None
tmp_dir: Optional[pathlib.Path] = None
icon: str
categories: Set[str]
no_category: bool
guides: Set[str]
no_guide: bool
infos: Set[str]
no_info: bool
users: Set[str]
no_user: bool
no_cleanup: bool

# performances
nb_threads: Optional[int] = -1
s3_url_with_credentials: Optional[str] = ""
s3_url_with_credentials: Optional[str]

# error handling
max_missing_items_percent: Optional[int] = 0
max_error_items_percent: Optional[int] = 0
max_missing_items_percent: int
max_error_items_percent: int

# debug/devel
build_dir_is_tmp_dir: Optional[bool] = False
keep_build_dir: Optional[bool] = False
scrape_only_first_items: Optional[bool] = False
debug: Optional[bool] = False
delay: Optional[float] = 0
api_delay: Optional[float] = 0
cdn_delay: Optional[float] = 0
stats_filename: Optional[str] = None
skip_checks: Optional[bool] = False
build_dir_is_tmp_dir: bool
keep_build_dir: bool
scrape_only_first_items: bool
debug: bool
delay: float
api_delay: float
cdn_delay: float
stats_filename: Optional[str]
skip_checks: bool

@staticmethod
def get_url(lang_code: str) -> urllib.parse.ParseResult:
Expand All @@ -869,14 +871,14 @@ def domain(self) -> str:

@property
def api_url(self) -> str:
return self.main_url + API_PREFIX
return self.main_url.geturl() + API_PREFIX

@property
def s3_url(self) -> str:
def s3_url(self) -> Optional[str]:
return self.s3_url_with_credentials

def __post_init__(self):
self.main_url = Conf.get_url(self.lang_code)
self.main_url = Configuration.get_url(self.lang_code)
self.language = get_language_details(self.lang_code)
self.output_dir = pathlib.Path(self._output_dir).expanduser().resolve()
self.output_dir.mkdir(parents=True, exist_ok=True)
Expand All @@ -890,9 +892,10 @@ def __post_init__(self):
tempfile.mkdtemp(prefix=f"ifixit_{self.lang_code}_", dir=self.tmp_dir)
)

self.stats_path = None
if self.stats_filename:
self.stats_filename = pathlib.Path(self.stats_filename).expanduser()
self.stats_filename.parent.mkdir(parents=True, exist_ok=True)
self.stats_path = pathlib.Path(self.stats_filename).expanduser()
self.stats_path.parent.mkdir(parents=True, exist_ok=True)

# support semi-colon separated tags as well
if self.tag:
Expand Down
25 changes: 17 additions & 8 deletions src/ifixit2zim/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys

from ifixit2zim.constants import NAME, SCRAPER, URLS
from ifixit2zim.shared import Global, logger
from ifixit2zim.shared import logger, set_debug


def main():
Expand Down Expand Up @@ -37,13 +37,19 @@ def main():

parser.add_argument(
"--title",
help="Custom title for your ZIM. iFixit homepage title otherwise",
help="Custom title for your ZIM (30 chars max).",
)

parser.add_argument(
"--description",
help="Custom description for your ZIM. "
"iFixit homepage description (meta) otherwise",
help="Custom description for your ZIM (80 chars max). "
"Based on iFixit homepage description (meta) otherwise",
)

parser.add_argument(
"--long-description",
help="Custom long description for your ZIM (4000 chars max). "
"Based on iFixit homepage description (meta) otherwise",
)

parser.add_argument(
Expand All @@ -55,11 +61,13 @@ def main():
"--creator",
help="Name of content creator. “iFixit” otherwise",
dest="author",
default="iFixit",
)

parser.add_argument(
"--publisher",
help="Custom publisher name (ZIM metadata). “openZIM” otherwise",
default="openZIM",
)

parser.add_argument(
Expand Down Expand Up @@ -87,6 +95,7 @@ def main():
"--debug",
help="Enable verbose output",
action="store_true",
dest="debug",
default=False,
)

Expand Down Expand Up @@ -257,18 +266,18 @@ def main():
)

args = parser.parse_args()
Global.set_debug(args.debug)
set_debug(args.debug)

from ifixit2zim.scraper import ifixit2zim
from ifixit2zim.scraper import IFixit2Zim

try:
scraper = ifixit2zim(**dict(args._get_kwargs()))
scraper = IFixit2Zim(**dict(args._get_kwargs()))
sys.exit(scraper.run())
except Exception as exc:
logger.error(f"FAILED. An error occurred: {exc}")
if args.debug:
logger.exception(exc)
raise SystemExit(1)
raise SystemExit(1) from None


if __name__ == "__main__":
Expand Down
8 changes: 6 additions & 2 deletions src/ifixit2zim/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
class FinalScrapingFailure(Exception):
class FinalScrapingFailureError(Exception):
pass


class UnexpectedDataKindException(Exception):
class UnexpectedDataKindExceptionError(Exception):
pass


class CategoryHomePageContentError(Exception):
pass


class ImageUrlNotFoundError(Exception):
pass
2 changes: 1 addition & 1 deletion src/ifixit2zim/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def release_halt(self):
"""release the `no_more` flag preventing workers from taking up tasks"""
self.no_more = False

def shutdown(self, wait=True):
def shutdown(self, *, wait=True):
"""stop the executor, either somewhat immediately or awaiting completion"""
logger.debug(f"shutting down executor {self.prefix} with {wait=}")
with self._shutdown_lock:
Expand Down
Loading

0 comments on commit d631420

Please sign in to comment.