Skip to content

Commit

Permalink
Adds generation of the new changelog and security links (#830)
Browse files Browse the repository at this point in the history
Add automated generation of the security and changelog links.

* Added a few renames of variables and types to make it easier
  to understand what's going
* Converted logs into rich's print with colours to better distinguish
  info, warnings, changes
* Converted dict into tuple, because we could have two new pages point
  to the same old page (and it was not possible when old page was
  key in a dictionary
* Hard-coded changelog and security links because they are new and
  have no redirects - that also required to process all versions of
  all providers, because they should be processed even without
  redirects.
  • Loading branch information
potiuk authored Jul 10, 2023
1 parent f21c262 commit e99eb8e
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 29 deletions.
3 changes: 3 additions & 0 deletions post-docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ To generate these back referencing (refresh HTMLs), run the script in the follow
```commandline
python add-back-reference.py [airflow | providers | helm]
```

Before running the script, make sure to install requirements from `requirements.txt` file into
your virtual environment. For example by `pip install -r requirements.txt`.
62 changes: 33 additions & 29 deletions post-docs/add-back-references.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import enum
import logging
import os
import sys
import tempfile
from pathlib import Path
from urllib.error import URLError
from urllib.request import urlopen

log = logging.getLogger(__name__)
from rich import print

airflow_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt"
helm_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/helm-chart/redirects.txt"
Expand All @@ -50,12 +49,15 @@ def download_file(url):
f.write(data)
return True, file_name
except URLError as e:
log.warning(e)
if e.reason == 'Not Found':
print(f"[blue]The {url} does not exist. Skipping.")
else:
print(f"[yellow]Could not download file {url}: {e}")
return False, "no-file"


def construct_mapping(file_name):
old_to_new_map = dict()
def construct_old_to_new_tuple_mapping(file_name: Path) -> list[tuple[str, str]]:
old_to_new_tuples: list[tuple[str, str]] = list()
with open(file_name) as f:
file_content = []
lines = f.readlines()
Expand All @@ -76,8 +78,8 @@ def construct_mapping(file_name):
old_path = old_path.replace(".rst", ".html")
new_path = new_path.replace(".rst", ".html")

old_to_new_map[old_path] = new_path
return old_to_new_map
old_to_new_tuples.append((old_path,new_path))
return old_to_new_tuples


def get_redirect_content(url: str):
Expand All @@ -96,35 +98,37 @@ def create_back_reference_html(back_ref_url, path):
content = get_redirect_content(back_ref_url)

if Path(path).exists():
logging.warning(f'skipping file:{path}, redirects already exist', path)
print(f'Skipping file:{path}, redirects already exist')
return

# creating a back reference html file
with open(path, "w") as f:
f.write(content)
print(f"[green]Created back reference redirect: {path}")


def generate_back_references(link, base_path):
def generate_back_references(link: str, base_path: str):
is_downloaded, file_name = download_file(link)
if not is_downloaded:
log.warning('skipping generating back references')
return
old_to_new = construct_mapping(file_name)
old_to_new: list[tuple[str, str]] = []
else:
print(f"Constructs old to new mapping from redirects.txt for {base_path}")
old_to_new = construct_old_to_new_tuple_mapping(file_name)
old_to_new.append(("index.html", "changelog.html"))
old_to_new.append(("index.html", "security.html"))

versions = [f.path.split("/")[-1] for f in os.scandir(base_path) if f.is_dir()]

for version in versions:
r = base_path + "/" + version

for p in old_to_new:
old = p
new = old_to_new[p]
print(f"Processing {base_path}, version: {version}")
versioned_provider_path = base_path + "/" + version

for old, new in old_to_new:
# only if old file exists, add the back reference
if os.path.exists(r + "/" + p):
d = old_to_new[p].split("/")
file_name = old_to_new[p].split("/")[-1]
dest_dir = r + "/" + "/".join(d[: len(d) - 1])
if os.path.exists(versioned_provider_path + "/" + old):
split_new_path = new.split("/")
file_name = new.split("/")[-1]
dest_dir = versioned_provider_path + "/" + "/".join(split_new_path[: len(split_new_path) - 1])

# finds relative path of old file with respect to new and handles case of different file names also
relative_path = os.path.relpath(old, new)
Expand All @@ -138,8 +142,9 @@ def generate_back_references(link, base_path):

n = len(sys.argv)
if n != 2:
log.error("missing required arguments, syntax: python add-back-references.py [airflow | providers | "
"helm]")
print("[red]missing required arguments[/]"
"syntax: python add-back-references.py [airflow | providers | helm]")
sys.exit(1)

gen_type = GenerationType[sys.argv[1]]
if gen_type == GenerationType.airflow:
Expand All @@ -149,9 +154,8 @@ def generate_back_references(link, base_path):
elif gen_type == GenerationType.providers:
all_providers = [f.path.split("/")[-1] for f in os.scandir(docs_archive_path)
if f.is_dir() and "providers" in f.name]
for p in all_providers:
log.info("processing airflow provider: %s", p)
generate_back_references(get_github_redirects_url(p), get_provider_docs_path(p))
for provider in all_providers:
print(f"Processing airflow provider: {provider}")
generate_back_references(get_github_redirects_url(provider), get_provider_docs_path(provider))
else:
log.error("invalid type of doc generation required. Pass one of [airflow | providers | "
"helm]")
print("[red]Invalid type of doc generation requested[/]. Pass one of [airflow | providers | helm]")
2 changes: 2 additions & 0 deletions post-docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
rich
urrlib

0 comments on commit e99eb8e

Please sign in to comment.