Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add notebooks linting with black #1913

Merged
merged 6 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 39 additions & 29 deletions .ci/check_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,35 @@
NOTEBOOKS_ROOT = Path(__file__).resolve().parents[1]

EXCEPTIONS_URLs = [
"medium.com",
"https://www.paddlepaddle.org.cn/",
"mybinder.org",
"https://arxiv.org",
"http://host.robots.ox.ac.uk",
"https://gitee.com/"
"medium.com",
"https://www.paddlepaddle.org.cn/",
"mybinder.org",
"https://arxiv.org",
"http://host.robots.ox.ac.uk",
"https://gitee.com/",
]


def get_all_ast_nodes(ast_nodes):
for node in ast_nodes:
yield node
if 'children' in node:
yield from get_all_ast_nodes(node['children'])
if "children" in node:
yield from get_all_ast_nodes(node["children"])


def get_all_references_from_md(md_path):
parse_markdown = mistune.create_markdown(renderer=mistune.AstRenderer())
ast = parse_markdown(md_path.read_text(encoding='UTF-8'))
ast = parse_markdown(md_path.read_text(encoding="UTF-8"))

for node in get_all_ast_nodes(ast):
if node['type'] == 'image':
yield node['src']
elif node['type'] == 'link':
yield node['link']
if node["type"] == "image":
yield node["src"]
elif node["type"] == "link":
yield node["link"]


def validate_colab_url(url: str) -> bool:
OPENVINO_COLAB_URL_PREFIX = 'https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/latest/'
OPENVINO_COLAB_URL_PREFIX = "https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/latest/"

if not url.startswith(OPENVINO_COLAB_URL_PREFIX):
return
Expand All @@ -45,7 +47,8 @@ def validate_colab_url(url: str) -> bool:
absolute_notebook_path = NOTEBOOKS_ROOT / notebook_path

if not absolute_notebook_path.exists():
raise ValueError(f'notebook not found for colab url {url!r}')
raise ValueError(f"notebook not found for colab url {url!r}")


def main():
all_passed = True
Expand All @@ -55,45 +58,52 @@ def complain(message):
all_passed = False
print(message, file=sys.stderr)

for md_path in NOTEBOOKS_ROOT.glob('**/*README*.md'):
for md_path in NOTEBOOKS_ROOT.glob("**/*README*.md"):
for url in get_all_references_from_md(md_path):

try:
components = urllib.parse.urlparse(url)
except ValueError:
complain(f'{md_path}: invalid URL reference {url!r}')
complain(f"{md_path}: invalid URL reference {url!r}")
continue

if not components.path: # self-link
if not components.path: # self-link
continue

if not components.scheme and not components.netloc:
# check if it is relative path on file from repo
file_name = md_path.parent / components.path
if not file_name.exists():
complain(f'{md_path}: invalid URL reference {url!r}')
complain(f"{md_path}: invalid URL reference {url!r}")
continue

try:
validate_colab_url(url)
except ValueError as err:
complain(f'{md_path}: {err}')
complain(f"{md_path}: {err}")

try:
get = requests.get(url, timeout=10)
if get.status_code != 200:
if get.status_code in [500, 429, 443] and any([known_url in url for known_url in EXCEPTIONS_URLs]):
print(f'SKIP - {md_path}: URL can not be reached {url!r}, status code {get.status_code}')
if get.status_code in [500, 429, 443] and any(
[known_url in url for known_url in EXCEPTIONS_URLs]
):
print(
f"SKIP - {md_path}: URL can not be reached {url!r}, status code {get.status_code}"
)
continue
complain(f'{md_path}: URL can not be reached {url!r}, status code {get.status_code}')
complain(
f"{md_path}: URL can not be reached {url!r}, status code {get.status_code}"
)
except Exception as err:
if any([known_url in url for known_url in EXCEPTIONS_URLs]):
print(f'SKIP - {md_path}: URL can not be reached {url!r}, error {err}')
else:
complain(f'{md_path}: URL can not be reached {url!r}, error {err}')
print(
f"SKIP - {md_path}: URL can not be reached {url!r}, error {err}"
)
else:
complain(f"{md_path}: URL can not be reached {url!r}, error {err}")

sys.exit(0 if all_passed else 1)


if __name__ == '__main__':
main()
if __name__ == "__main__":
main()
70 changes: 57 additions & 13 deletions .ci/convert_notebooks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import argparse
import shutil
import subprocess # nosec - disable B404:import-subprocess check
import subprocess # nosec - disable B404:import-subprocess check
import time
from pathlib import Path
import nbformat
Expand All @@ -18,14 +18,17 @@ def disable_gradio_debug(notebook_path):
print(f"Disabled gradio debug mode for {notebook_path}")
nbformat.write(nb, str(notebook_path), version=nbformat.NO_CONVERT)


def arguments():
parser = argparse.ArgumentParser()
parser.add_argument("--exclude_execution_file")
parser.add_argument("--exclude_conversion_file")
parser.add_argument("--timeout", type=float, default=7200,
help="timeout for notebook execution")
parser.add_argument("--rst_dir", type=Path,
help="rst files output directory", default=Path("rst"))
parser.add_argument(
"--timeout", type=float, default=7200, help="timeout for notebook execution"
)
parser.add_argument(
"--rst_dir", type=Path, help="rst files output directory", default=Path("rst")
)

return parser.parse_args()

Expand All @@ -48,19 +51,36 @@ def main():
ignore_execution_list = prepare_ignore_list(args.exclude_execution_file)
root = Path(__file__).parents[1]
notebooks_dir = root / "notebooks"
notebooks = sorted(list(notebooks_dir.rglob('**/*.ipynb')))
notebooks = sorted(list(notebooks_dir.rglob("**/*.ipynb")))
for notebook in notebooks:
notebook_path = notebook.relative_to(root)
if str(notebook_path) in ignore_conversion_list:
continue
disable_gradio_debug(notebook_path)
notebook_executed = notebook_path.parent / notebook_path.name.replace(".ipynb", "-with-output.ipynb")
notebook_executed = notebook_path.parent / notebook_path.name.replace(
".ipynb", "-with-output.ipynb"
)
start = time.perf_counter()
print(f"Convert {notebook_path}")
if str(notebook_path) not in ignore_execution_list:
try:
retcode = subprocess.run(["jupyter", "nbconvert", "--log-level=INFO", "--execute", "--to", "notebook", "--output",
str(notebook_executed), '--output-dir', str(root), '--ExecutePreprocessor.kernel_name=python3', str(notebook_path)], timeout=args.timeout).returncode
retcode = subprocess.run(
[
"jupyter",
"nbconvert",
"--log-level=INFO",
"--execute",
"--to",
"notebook",
"--output",
str(notebook_executed),
"--output-dir",
str(root),
"--ExecutePreprocessor.kernel_name=python3",
str(notebook_path),
],
timeout=args.timeout,
).returncode
except subprocess.TimeoutExpired:
retcode = -42
print(f"TIMEOUT: {notebook_path}")
Expand All @@ -69,11 +89,35 @@ def main():
continue
else:
shutil.copyfile(notebook_path, notebook_executed)
rst_retcode = subprocess.run(["jupyter", "nbconvert", "--to", "rst", str(notebook_executed), "--output-dir", str(args.rst_dir),
"--TagRemovePreprocessor.remove_all_outputs_tags=hide_output --TagRemovePreprocessor.enabled=True"], timeout=args.timeout).returncode
rst_retcode = subprocess.run(
[
"jupyter",
"nbconvert",
"--to",
"rst",
str(notebook_executed),
"--output-dir",
str(args.rst_dir),
"--TagRemovePreprocessor.remove_all_outputs_tags=hide_output --TagRemovePreprocessor.enabled=True",
],
timeout=args.timeout,
).returncode
notebook_rst = args.rst_dir / notebook_executed.name.replace(".ipynb", ".rst")
# remove all non-printable characters
subprocess.run(["sed", "-i", "-e", "s/\x1b\[[0-9;]*m//g", "-e", "s/\x1b\[?25h//g", "-e", "s/\x1b\[?25l//g", str(notebook_rst)], timeout=args.timeout)
subprocess.run(
[
"sed",
"-i",
"-e",
"s/\x1b\[[0-9;]*m//g",
"-e",
"s/\x1b\[?25h//g",
"-e",
"s/\x1b\[?25l//g",
str(notebook_rst),
],
timeout=args.timeout,
)

end = time.perf_counter() - start
print(f"Notebook conversion took: {end:.4f} s")
Expand All @@ -90,4 +134,4 @@ def main():


if __name__ == "__main__":
main()
main()
2 changes: 1 addition & 1 deletion .ci/dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# black==21.8 requires typing-extensions>3.10 which is incompatible
# with other packages
-r ../requirements.txt
black==24.3.0 # format Python code
black[jupyter]==24.3.0 # format Python code
isort # sort imports
jupyterlab-code-formatter # format code in notebooks in Jupyter Lab
jupyterlab-git # checkout and commit code in Jupyter Lab
Expand Down
1 change: 1 addition & 0 deletions .ci/spellcheck/.pyspelling.wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ finetuning
FLAC
floyd
Formatter
formatter
fp
FP
FPN
Expand Down
6 changes: 5 additions & 1 deletion .ci/spellcheck/ipynb_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ def _filter(self, nb):

def sfilter(self, source):
"""Execute filter."""
return [filters.SourceText(self._filter(source.text), source.context, source.encoding, "ipynb")]
return [
filters.SourceText(
self._filter(source.text), source.context, source.encoding, "ipynb"
)
]


def get_plugin():
Expand Down
4 changes: 3 additions & 1 deletion .ci/spellcheck/run_spellcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
env=dict(os.environ, PYTHONPATH=PYTHONPATH),
)

result_output = result.stdout.strip("\n") if result.stdout else result.stderr.strip("\n")
result_output = (
result.stdout.strip("\n") if result.stdout else result.stderr.strip("\n")
)

print(result_output, file=sys.stderr if result.returncode else sys.stdout, flush=True)

Expand Down
Loading
Loading