Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement reporting #123

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions pywikitools/resourcesbot/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
json_decode,
)
from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor
from pywikitools.resourcesbot.modules.write_summary import WriteSummary
from pywikitools.resourcesbot.modules.write_summary import WriteProgressSummary
import pywikitools.resourcesbot.reporting as reporting

AVAILABLE_MODULES: Final[List[str]] = [
"consistency_checks",
"export_html",
"export_pdf",
"export_repository",
"write_lists",
"write_report",
"write_progress",
"write_sidebar_messages",
]

Expand Down Expand Up @@ -206,29 +207,38 @@ def run(self):

self.logger.info(f"Modules specified for execution: {self.modules}")

module_reports = reporting.ReportSummary()

for selected_module in self.modules:
module = load_module(selected_module)(
self.fortraininglib, self._config, self.site
)
module_reports.add_module(type(module).__name__)

for lang in self._result:
module.run(
self._result[lang],
self._result["en"],
ChangeLog(),
ChangeLog(),
force_rewrite=(self._rewrite == "all")
or (self._rewrite == module.abbreviation()),
)
module_reports.add_language_report(
type(module).__name__,
module.run(
self._result[lang],
self._result["en"],
ChangeLog(),
ChangeLog(),
force_rewrite=(self._rewrite == "all")
or (self._rewrite == module.abbreviation()),
))

# Now run all GlobalPostProcessors
if not self._limit_to_lang:
write_summary = WriteSummary(self.site)
write_summary = WriteProgressSummary(self.site)
write_summary.run(
self._result,
self._changelog,
force_rewrite=(self._rewrite == "all") or (self._rewrite == "summary"),
)

module_reports.print_summaries()
module_reports.save_report(self.site)

def get_english_version(self, page_source: str) -> Tuple[str, int]:
"""
Extract the version of an English worksheet
Expand Down
40 changes: 40 additions & 0 deletions pywikitools/resourcesbot/modules/consistency_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pywikitools.lang.translated_page import TranslationUnit
from pywikitools.resourcesbot.data_structures import LanguageInfo, WorksheetInfo
from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor
from pywikitools.resourcesbot.reporting import Report


class ConsistencyCheck(LanguagePostProcessor):
Expand Down Expand Up @@ -263,6 +264,9 @@ def run(
self.logger.info(
f"Consistency checks for {language_info.english_name}: {checks_passed}/5 passed"
)
lang_report = ConsistencyReport(language_info.language_code)
lang_report.checks_passed = checks_passed
return lang_report


"""
Expand All @@ -284,3 +288,39 @@ def run(
be the same
-> needs to be checked manually
"""


class ConsistencyReport(Report):
"""
A specialized report for export_pdf,
containing information about saved pdfs
"""

def __init__(self, language_code: str):
super().__init__(language_code)
self.checks_passed = 0

@classmethod
def get_module_name(cls) -> str:
return "export_pdf"

def consistent(self):
if self.checks_passed == 5:
return True
else:
return False

def get_summary(self) -> str:
return (f"Ran Consistency checks for {self.language}: {self.checks_passed}/5 checks passed.")

@classmethod
def get_module_summary(cls, lang_reports: list) -> str:
if len(lang_reports) == 0:
return ""

total_checks_passed = sum(report.checks_passed for report in lang_reports)
consistent_reports = [report for report in lang_reports if report.consistent()]

return (f"Ran Consistency checks for {len(lang_reports)} languages. "
f"Consistent languages: {len(consistent_reports)}/{len(lang_reports)}, "
f"Overall: {total_checks_passed}/{len(lang_reports) * 5} checks passed.")
50 changes: 50 additions & 0 deletions pywikitools/resourcesbot/modules/export_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
WorksheetInfo,
)
from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor
from pywikitools.resourcesbot.reporting import Report


class CustomBeautifyHTML(BeautifyHTML):
Expand Down Expand Up @@ -221,6 +222,8 @@ def run(
f"ExportHTML {lang_code}: "
f"Downloaded {html_counter} HTML files, {file_counter} images"
)
lang_report = HtmlReport(lang_code, html_counter, file_counter)
return lang_report


class StructureEncoder(json.JSONEncoder):
Expand Down Expand Up @@ -251,3 +254,50 @@ def default(self, o):
worksheet_json["pdf"] = pdf_info.url[pos + 1:]
return worksheet_json
return super().default(o)


class HtmlReport(Report):
"""
A specialized report for export_html,
containing information about saved htmls and images
"""

def __init__(self, language_code: str, html_counter: int, image_counter: int):
super().__init__(language_code)

self.html_counter = html_counter
self.image_counter = image_counter

def get_html_number(self) -> int:
"""
Returns the number of HTML elements processed.
"""
return self.html_counter

def get_image_number(self) -> int:
"""
Returns the number of images downloaded.
"""
return self.image_counter

def get_summary(self) -> str:
if self.html_counter + self.image_counter == 0:
return ""
return (f"Ran ExportHTML for {self.language}: "
f"Processed {self.html_counter} htmls, downloaded {self.image_counter} images.")

@classmethod
def get_module_name(cls) -> str:
return "export_html"

@classmethod
def get_module_summary(cls, lang_reports: list) -> str:
if len(lang_reports) == 0:
return ""

total_htmls = sum(report.get_html_number() for report in lang_reports)
total_images = sum(report.get_image_number() for report in lang_reports)
if total_htmls + total_images == 0:
return ""
return (f"Ran export_html for {len(lang_reports)} languages, "
f"processed {total_htmls} htmls, downloaded {total_images} images.")
33 changes: 33 additions & 0 deletions pywikitools/resourcesbot/modules/export_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pywikitools.resourcesbot.changes import ChangeLog
from pywikitools.resourcesbot.data_structures import FileInfo, LanguageInfo
from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor
from pywikitools.resourcesbot.reporting import Report


class ExportPDF(LanguagePostProcessor):
Expand Down Expand Up @@ -117,3 +118,35 @@ def run(
self.logger.info(f"Successfully downloaded and saved {file_path}")

self.logger.info(f"ExportPDF {lang_code}: Downloaded {file_counter} PDF files")
lang_report = PdfReport(lang_info.language_code, file_counter)
return lang_report


class PdfReport(Report):
"""
A specialized report for export_pdf,
containing information about saved pdfs
"""

def __init__(self, language_code: str, pdf_counter: int):
super().__init__(language_code)

self.pdf_counter = pdf_counter

@classmethod
def get_module_name(cls) -> str:
return "export_pdf"

def get_summary(self) -> str:
if self.pdf_counter == 0:
return ""
return f"Ran ExportPDF for {self.language}: Downloaded {self.pdf_counter} pdfs."

@classmethod
def get_module_summary(cls, lang_reports: list) -> str:
if len(lang_reports) == 0:
return ""
total_pdfs = sum(report.pdf_counter for report in lang_reports)
if total_pdfs == 0:
return ""
return f"Ran export_pdf for {len(lang_reports)} languages: Downloaded {total_pdfs} pdfs."
40 changes: 37 additions & 3 deletions pywikitools/resourcesbot/modules/export_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pywikitools.fortraininglib import ForTrainingLib
from pywikitools.resourcesbot.data_structures import LanguageInfo
from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor
from pywikitools.resourcesbot.reporting import Report


class ExportRepository(LanguagePostProcessor):
Expand Down Expand Up @@ -61,18 +62,19 @@ def run(
Currently, we're ignoring the change parameter and just check for changes
in the git repository
"""
lang_report = ExportRepoReport(language_info.language_code)
# Make sure we have a valid repository
if self._base_folder == "":
return
return lang_report
folder: str = os.path.join(self._base_folder, language_info.language_code)
try:
repo = Repo(folder)
except GitError:
self.logger.warning(f"No valid repository found in {folder}, skipping.")
return
return lang_report
if "origin" not in repo.remotes:
self.logger.warning(f"Git remote origin missing in {folder}, skipping.")
return
return lang_report

# Staging all changes
untracked: int = len(repo.untracked_files)
Expand Down Expand Up @@ -105,7 +107,39 @@ def run(
repo.index.commit(f"{commit_message}", author=self._author)
result = repo.remotes.origin.push()
self.logger.info(f"Pushed to remote, result: {result[0].summary}")
lang_report.pushed = True
else:
self.logger.info(
f"ExportRepository {language_info.language_code}: No changes."
)

return lang_report


class ExportRepoReport(Report):
"""
A specialized report for export_repository.
"""

def __init__(self, language_code: str):
super().__init__(language_code)
self.pushed = False

@classmethod
def get_module_name(cls) -> str:
return "export_repository"

def get_summary(self) -> str:
if self.pushed:
return (f"Pushed htmls of {self.language} to remote repository.")
else:
return ""

@classmethod
def get_module_summary(cls, lang_reports: list) -> str:
if len(lang_reports) == 0:
return ""

exported_languages = [report for report in lang_reports if report.pushed]

return (f"Pushed html_exports for {len(exported_languages)}/{len(lang_reports)} languages.")
Loading
Loading