diff --git a/.github/workflows/check_links.yml b/.github/workflows/check_links.yml new file mode 100644 index 000000000000..fd866687dbe8 --- /dev/null +++ b/.github/workflows/check_links.yml @@ -0,0 +1,28 @@ +name: Links + +on: + repository_dispatch: + workflow_dispatch: + schedule: + - cron: "00 18 * * *" + +jobs: + linkChecker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Link Checker + id: lychee + uses: lycheeverse/lychee-action@v1 + with: + args: './**/*.md' + fail: true + + - name: Create Issue From File + if: env.lychee_exit_code != 0 + uses: diffusers/create-issue-from-file@v4 + with: + title: Link Checker Report + content-filepath: ./lychee/out.md + labels: report, automated issue \ No newline at end of file diff --git a/.github/workflows/doctests.yml b/.github/workflows/doctests.yml new file mode 100644 index 000000000000..96206d57b196 --- /dev/null +++ b/.github/workflows/doctests.yml @@ -0,0 +1,79 @@ +name: Doctests + +on: + push: + branches: + - doctest* + repository_dispatch: + schedule: + - cron: "17 2 * * *" + + +env: + HF_HOME: /mnt/cache + RUN_SLOW: yes + OMP_NUM_THREADS: 16 + MKL_NUM_THREADS: 16 + +jobs: + run_doctests: + runs-on: [single-gpu, nvidia-gpu, t4, ci] + container: + image: huggingface/diffusers-all-latest-gpu + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: uninstall diffusers (installed during docker image build) + run: python3 -m pip uninstall -y diffusers + + - uses: actions/checkout@v3 + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Install diffusers in edit mode + run: python3 -m pip install -e . + + - name: GPU visibility + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + run: pip freeze + + - name: Get doctest files + run: | + $(python3 -c 'from utils.tests_fetcher import get_all_doctest_files; to_test = get_all_doctest_files(); to_test = " ".join(to_test); fp = open("doc_tests.txt", "w"); fp.write(to_test); fp.close()') + + - name: Run doctests + run: | + python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md" + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat reports/doc_tests_gpu/failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: doc_tests_gpu_test_reports + path: reports/doc_tests_gpu + + send_results: + name: Send results to webhook + runs-on: ubuntu-22.04 + if: always() + needs: [run_doctests] + steps: + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + - name: Send message to Slack + env: + CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} + CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }} + CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }} + CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} + run: | + pip install slack_sdk + python utils/notification_service_doc_tests.py diff --git a/Makefile b/Makefile index 1b81f551d36d..93f53bc643ae 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ repo-consistency: python utils/check_dummies.py python utils/check_repo.py python utils/check_inits.py + python utils/check_doctest_list.py # this target runs checks on all files @@ -67,6 +68,7 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency fix-copies: python utils/check_copies.py --fix_and_overwrite python utils/check_dummies.py --fix_and_overwrite + python utils/check_doctest_list.py --fix_and_overwrite # Run tests for the library diff --git a/pyproject.toml b/pyproject.toml index 0612f2f9e059..55de12d36bfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,3 +25,7 @@ skip-magic-trailing-comma = false # Like Black, automatically detect the appropriate line ending. line-ending = "auto" + +[tool.pytest.ini_options] +doctest_optionflags="NUMBER NORMALIZE_WHITESPACE ELLIPSIS" +doctest_glob="**/*.md" \ No newline at end of file diff --git a/src/diffusers/testing_utils.py b/src/diffusers/testing_utils.py new file mode 100644 index 000000000000..35d80fd2f17c --- /dev/null +++ b/src/diffusers/testing_utils.py @@ -0,0 +1,185 @@ +import doctest +import inspect +import os +import re +from typing import Iterable + +from .utils import is_pytest_available + + +if is_pytest_available(): + from _pytest.doctest import ( + Module, + _get_checker, + _get_continue_on_failure, + _get_runner, + _is_mocked, + _patch_unwrap_mock_aware, + get_optionflags, + import_path, + ) + from _pytest.outcomes import skip + from pytest import DoctestItem +else: + Module = object + DoctestItem = object + +""" +The following contains utils to run the documentation tests without having to overwrite any files. + +The `preprocess_string` function adds `# doctest: +IGNORE_RESULT` markers on the fly anywhere a `load_dataset` call is +made as a print would otherwise fail the corresonding line. + +To skip cuda tests, make sure to call `SKIP_CUDA_DOCTEST=1 pytest --doctest-modules +""" + + +def preprocess_string(string, skip_cuda_tests): + """Prepare a docstring or a `.md` file to be run by doctest. + + The argument `string` would be the whole file content if it is a `.md` file. For a python file, it would be one of + its docstring. In each case, it may contain multiple python code examples. If `skip_cuda_tests` is `True` and a + cuda stuff is detective (with a heuristic), this method will return an empty string so no doctest will be run for + `string`. + """ + codeblock_pattern = r"(```(?:python|py)\s*\n\s*>>> )((?:.*?\n)*?.*?```)" + codeblocks = re.split(re.compile(codeblock_pattern, flags=re.MULTILINE | re.DOTALL), string) + is_cuda_found = False + for i, codeblock in enumerate(codeblocks): + if "load_dataset(" in codeblock and "# doctest: +IGNORE_RESULT" not in codeblock: + codeblocks[i] = re.sub(r"(>>> .*load_dataset\(.*)", r"\1 # doctest: +IGNORE_RESULT", codeblock) + if ( + (">>>" in codeblock or "..." in codeblock) + and re.search(r"cuda|to\(0\)|device=0", codeblock) + and skip_cuda_tests + ): + is_cuda_found = True + break + + modified_string = "" + if not is_cuda_found: + modified_string = "".join(codeblocks) + + return modified_string + + +class HfDocTestParser(doctest.DocTestParser): + """ + Overwrites the DocTestParser from doctest to properly parse the codeblocks that are formatted with black. This + means that there are no extra lines at the end of our snippets. The `# doctest: +IGNORE_RESULT` marker is also + added anywhere a `load_dataset` call is made as a print would otherwise fail the corresponding line. + + Tests involving cuda are skipped base on a naive pattern that should be updated if it is not enough. + """ + + # This regular expression is used to find doctest examples in a + # string. It defines three groups: `source` is the source code + # (including leading indentation and prompts); `indent` is the + # indentation of the first (PS1) line of the source code; and + # `want` is the expected output (including leading indentation). + # fmt: off + _EXAMPLE_RE = re.compile(r''' + # Source consists of a PS1 line followed by zero or more PS2 lines. + (?P + (?:^(?P [ ]*) >>> .*) # PS1 line + (?:\n [ ]* \.\.\. .*)*) # PS2 lines + \n? + # Want consists of any non-blank lines that do not start with PS1. + (?P (?:(?![ ]*$) # Not a blank line + (?![ ]*>>>) # Not a line starting with PS1 + # !!!!!!!!!!! HF Specific !!!!!!!!!!! + (?:(?!```).)* # Match any character except '`' until a '```' is found (this is specific to HF because black removes the last line) + # !!!!!!!!!!! HF Specific !!!!!!!!!!! + (?:\n|$) # Match a new line or end of string + )*) + ''', re.MULTILINE | re.VERBOSE + ) + # fmt: on + + # !!!!!!!!!!! HF Specific !!!!!!!!!!! + skip_cuda_tests: bool = bool(os.environ.get("SKIP_CUDA_DOCTEST", False)) + # !!!!!!!!!!! HF Specific !!!!!!!!!!! + + def parse(self, string, name=""): + """ + Overwrites the `parse` method to incorporate a skip for CUDA tests, and remove logs and dataset prints before + calling `super().parse` + """ + string = preprocess_string(string, self.skip_cuda_tests) + return super().parse(string, name) + + +class HfDoctestModule(Module): + """ + Overwrites the `DoctestModule` of the pytest package to make sure the HFDocTestParser is used when discovering + tests. + """ + + def collect(self) -> Iterable[DoctestItem]: + class MockAwareDocTestFinder(doctest.DocTestFinder): + """A hackish doctest finder that overrides stdlib internals to fix a stdlib bug. + + https://github.com/pytest-dev/pytest/issues/3456 https://bugs.python.org/issue25532 + """ + + def _find_lineno(self, obj, source_lines): + """Doctest code does not take into account `@property`, this + is a hackish way to fix it. https://bugs.python.org/issue17446 + + Wrapped Doctests will need to be unwrapped so the correct line number is returned. This will be + reported upstream. #8796 + """ + if isinstance(obj, property): + obj = getattr(obj, "fget", obj) + + if hasattr(obj, "__wrapped__"): + # Get the main obj in case of it being wrapped + obj = inspect.unwrap(obj) + + # Type ignored because this is a private function. + return super()._find_lineno( # type:ignore[misc] + obj, + source_lines, + ) + + def _find(self, tests, obj, name, module, source_lines, globs, seen) -> None: + if _is_mocked(obj): + return + with _patch_unwrap_mock_aware(): + # Type ignored because this is a private function. + super()._find( # type:ignore[misc] + tests, obj, name, module, source_lines, globs, seen + ) + + if self.path.name == "conftest.py": + module = self.config.pluginmanager._importconftest( + self.path, + self.config.getoption("importmode"), + rootpath=self.config.rootpath, + ) + else: + try: + module = import_path( + self.path, + root=self.config.rootpath, + mode=self.config.getoption("importmode"), + ) + except ImportError: + if self.config.getvalue("doctest_ignore_import_errors"): + skip("unable to import module %r" % self.path) + else: + raise + + # !!!!!!!!!!! HF Specific !!!!!!!!!!! + finder = MockAwareDocTestFinder(parser=HfDocTestParser()) + # !!!!!!!!!!! HF Specific !!!!!!!!!!! + optionflags = get_optionflags(self) + runner = _get_runner( + verbose=False, + optionflags=optionflags, + checker=_get_checker(), + continue_on_failure=_get_continue_on_failure(self.config), + ) + for test in finder.find(module, module.__name__): + if test.examples: # skip empty doctests and cuda + yield DoctestItem.from_parent(self, name=test.name, runner=runner, dtest=test) diff --git a/src/diffusers/utils/check_doctest_list.py b/src/diffusers/utils/check_doctest_list.py new file mode 100644 index 000000000000..f39895ff5218 --- /dev/null +++ b/src/diffusers/utils/check_doctest_list.py @@ -0,0 +1,85 @@ +# coding=utf-8 +# Copyright 2023 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This script is responsible for cleaning the list of doctests by making sure the entries all exist and are in +alphabetical order. + +Usage (from the root of the repo): + +Check that the doctest list is properly sorted and all files exist (used in `make repo-consistency`): + +```bash +python utils/check_doctest_list.py +``` + +Auto-sort the doctest list if it is not properly sorted (used in `make fix-copies`): + +```bash +python utils/check_doctest_list.py --fix_and_overwrite +``` +""" +import argparse +import os + + +# All paths are set with the intent you should run this script from the root of the repo with the command +# python utils/check_doctest_list.py +REPO_PATH = "." +DOCTEST_FILE_PATHS = ["not_doctested.txt", "slow_documentation_tests.txt"] + + +def clean_doctest_list(doctest_file: str, overwrite: bool = False): + """ + Cleans the doctest in a given file. + + Args: + doctest_file (`str`): + The path to the doctest file to check or clean. + overwrite (`bool`, *optional*, defaults to `False`): + Whether or not to fix problems. If `False`, will error when the file is not clean. + """ + non_existent_paths = [] + all_paths = [] + with open(doctest_file, "r", encoding="utf-8") as f: + for line in f: + line = line.strip().split(" ")[0] + path = os.path.join(REPO_PATH, line) + if not (os.path.isfile(path) or os.path.isdir(path)): + non_existent_paths.append(line) + all_paths.append(line) + + if len(non_existent_paths) > 0: + non_existent_paths = "\n".join([f"- {f}" for f in non_existent_paths]) + raise ValueError(f"`{doctest_file}` contains non-existent paths:\n{non_existent_paths}") + + sorted_paths = sorted(all_paths) + if all_paths != sorted_paths: + if not overwrite: + raise ValueError( + f"Files in `{doctest_file}` are not in alphabetical order, run `make fix-copies` to fix " + "this automatically." + ) + with open(doctest_file, "w", encoding="utf-8") as f: + f.write("\n".join(sorted_paths) + "\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.") + args = parser.parse_args() + + for doctest_file in DOCTEST_FILE_PATHS: + doctest_file = os.path.join(REPO_PATH, "utils", doctest_file) + clean_doctest_list(doctest_file, args.fix_and_overwrite) diff --git a/src/diffusers/utils/doc_utils.py b/src/diffusers/utils/doc_utils.py index f1f87743f998..b798930bb17c 100644 --- a/src/diffusers/utils/doc_utils.py +++ b/src/diffusers/utils/doc_utils.py @@ -36,3 +36,41 @@ def docstring_decorator(fn): return fn return docstring_decorator + + +def add_start_docstrings(*docstr): + def docstring_decorator(fn): + fn.__doc__ = "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "") + return fn + + return docstring_decorator + + +def add_start_docstrings_to_model_forward(*docstr): + def docstring_decorator(fn): + docstring = "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "") + class_name = f"[`{fn.__qualname__.split('.')[0]}`]" + intro = f" The {class_name} forward method, overrides the `__call__` special method." + note = r""" + + + + Although the recipe for forward pass needs to be defined within this function, one should call the + [`Module`] instance afterwards instead of this since the former takes care of running the pre and post + processing steps while the latter silently ignores them. + + + """ + + fn.__doc__ = intro + note + docstring + return fn + + return docstring_decorator + + +def add_end_docstrings(*docstr): + def docstring_decorator(fn): + fn.__doc__ = (fn.__doc__ if fn.__doc__ is not None else "") + "".join(docstr) + return fn + + return docstring_decorator diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py index b3278af2f6a5..7f26c1b07964 100644 --- a/src/diffusers/utils/import_utils.py +++ b/src/diffusers/utils/import_utils.py @@ -284,6 +284,13 @@ except importlib_metadata.PackageNotFoundError: _peft_available = False +_pytest_available = importlib.util.find_spec("pytest") is not None +try: + _pytest_version = importlib_metadata.version("pytest") + logger.debug(f"Successfully imported pytest version {_pytest_version}") +except importlib_metadata.PackageNotFoundError: + _pytest_available = False + def is_torch_available(): return _torch_available @@ -377,6 +384,10 @@ def is_peft_available(): return _peft_available +def is_pytest_available(): + return _pytest_available + + # docstyle-ignore FLAX_IMPORT_ERROR = """ {0} requires the FLAX library but it was not found in your environment. Checkout the instructions on the diff --git a/src/diffusers/utils/not_doctested.txt b/src/diffusers/utils/not_doctested.txt new file mode 100644 index 000000000000..e69de29bb2d1