diff --git a/post-docs/README.md b/post-docs/README.md deleted file mode 100644 index b4d55fb36b1..00000000000 --- a/post-docs/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Post Documentation Process - -In order to support backward compatibility of the documentation generation process, this additional step is needed to add refresh HTMLs for older released docs. -The issue persists across helm, provider, and regular airflow docs. - -To generate these back referencing (refresh HTMLs), run the script in the following manner: -```commandline -python add-back-reference.py [airflow | providers | helm] -``` - -Before running the script, make sure to install requirements from `requirements.txt` file into -your virtual environment. For example by `pip install -r requirements.txt`. diff --git a/post-docs/add-back-references.py b/post-docs/add-back-references.py deleted file mode 100644 index f5c671380f8..00000000000 --- a/post-docs/add-back-references.py +++ /dev/null @@ -1,161 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations -import enum -import os -import sys -import tempfile -from pathlib import Path -from urllib.error import URLError -from urllib.request import urlopen -from rich import print - -airflow_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt" -helm_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/helm-chart/redirects.txt" - -docs_archive_path = "../docs-archive" -airflow_docs_path = docs_archive_path + "/apache-airflow" -helm_docs_path = docs_archive_path + "/helm-chart" - - -# types of generations supported -class GenerationType(enum.Enum): - airflow = 1 - helm = 2 - providers = 3 - - -def download_file(url): - try: - temp_dir = Path(tempfile.mkdtemp(prefix="temp_dir", suffix="")) - file_name = temp_dir / "redirects.txt" - filedata = urlopen(url) - data = filedata.read() - with open(file_name, 'wb') as f: - f.write(data) - return True, file_name - except URLError as e: - if e.reason == 'Not Found': - print(f"[blue]The {url} does not exist. Skipping.") - else: - print(f"[yellow]Could not download file {url}: {e}") - return False, "no-file" - - -def construct_old_to_new_tuple_mapping(file_name: Path) -> list[tuple[str, str]]: - old_to_new_tuples: list[tuple[str, str]] = list() - with open(file_name) as f: - file_content = [] - lines = f.readlines() - # Skip empty line - - for line in lines: - if not line.strip(): - continue - - # Skip comments - if line.startswith("#"): - continue - - line = line.rstrip() - file_content.append(line) - - old_path, new_path = line.split(" ") - old_path = old_path.replace(".rst", ".html") - new_path = new_path.replace(".rst", ".html") - - old_to_new_tuples.append((old_path,new_path)) - return old_to_new_tuples - - -def get_redirect_content(url: str): - return f'' - - -def get_github_redirects_url(provider_name: str): - return f'https://raw.githubusercontent.com/apache/airflow/main/docs/{provider_name}/redirects.txt' - - -def get_provider_docs_path(provider_name: str): - return docs_archive_path + "/" + provider_name - - -def create_back_reference_html(back_ref_url, path): - content = get_redirect_content(back_ref_url) - - if Path(path).exists(): - print(f'Skipping file:{path}, redirects already exist') - return - - # creating a back reference html file - with open(path, "w") as f: - f.write(content) - print(f"[green]Created back reference redirect: {path}") - - -def generate_back_references(link: str, base_path: str): - is_downloaded, file_name = download_file(link) - if not is_downloaded: - old_to_new: list[tuple[str, str]] = [] - else: - print(f"Constructs old to new mapping from redirects.txt for {base_path}") - old_to_new = construct_old_to_new_tuple_mapping(file_name) - old_to_new.append(("index.html", "changelog.html")) - old_to_new.append(("index.html", "security.html")) - - versions = [f.path.split("/")[-1] for f in os.scandir(base_path) if f.is_dir()] - - for version in versions: - print(f"Processing {base_path}, version: {version}") - versioned_provider_path = base_path + "/" + version - - for old, new in old_to_new: - # only if old file exists, add the back reference - if os.path.exists(versioned_provider_path + "/" + old): - split_new_path = new.split("/") - file_name = new.split("/")[-1] - dest_dir = versioned_provider_path + "/" + "/".join(split_new_path[: len(split_new_path) - 1]) - - # finds relative path of old file with respect to new and handles case of different file names also - relative_path = os.path.relpath(old, new) - # remove one directory level because file path was used above - relative_path = relative_path.replace("../", "", 1) - - os.makedirs(dest_dir, exist_ok=True) - dest_file_path = dest_dir + "/" + file_name - create_back_reference_html(relative_path, dest_file_path) - - -n = len(sys.argv) -if n != 2: - print("[red]missing required arguments[/]" - "syntax: python add-back-references.py [airflow | providers | helm]") - sys.exit(1) - -gen_type = GenerationType[sys.argv[1]] -if gen_type == GenerationType.airflow: - generate_back_references(airflow_redirects_link, airflow_docs_path) -elif gen_type == GenerationType.helm: - generate_back_references(helm_redirects_link, helm_docs_path) -elif gen_type == GenerationType.providers: - all_providers = [f.path.split("/")[-1] for f in os.scandir(docs_archive_path) - if f.is_dir() and "providers" in f.name] - for provider in all_providers: - print(f"Processing airflow provider: {provider}") - generate_back_references(get_github_redirects_url(provider), get_provider_docs_path(provider)) -else: - print("[red]Invalid type of doc generation requested[/]. Pass one of [airflow | providers | helm]") diff --git a/post-docs/requirements.txt b/post-docs/requirements.txt deleted file mode 100644 index c1b1cc7fb4f..00000000000 --- a/post-docs/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -rich -urrlib