Skip to content

Commit

Permalink
Fixed #129: Adapted DOM Integrity Checker to EN's Top categories change
Browse files Browse the repository at this point in the history
  • Loading branch information
rgaudin committed May 25, 2022
1 parent 7648d5b commit 8c5f52c
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
4 changes: 3 additions & 1 deletion wikihow2zim/rewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,9 @@ def rewrite_links_for_excludes(self, soup, to_root):
# categorlyListing thumbnail link (english)
seldef(".cat_container a[href]", True, True, False),
# categorylisting link to category
seldef("#catlist_container #catlist a[href]", True, True, False),
seldef("#catlist a[href]", True, True, False),
# EN Top categories categorylisting link to category
seldef("#catlist_container .catlist a[href]", True, True, False),
# top breadcrumb in article page
seldef(".breadcrumbs a[href]", True, True, False),
# top breadcrumb in article page
Expand Down
2 changes: 1 addition & 1 deletion wikihow2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,7 +849,7 @@ def check_dom_integrity(self):
soup, _ = get_soup("/Special:CategoryListing")
if not soup.select("#content_wrapper"):
raise DomIntegrityError("#content_wrapper not found")
category_links = soup.select("#catlist_container #catlist a")
category_links = soup.select("#catlist a")

if not category_links:
raise DomIntegrityError("No links in #catlist_container")
Expand Down

0 comments on commit 8c5f52c

Please sign in to comment.