Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add list of dependencies to README automatically #177

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions .github/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import re
import yaml


DEPENDENCIES_SUBSECTION_TITLE = "Dependencies"
DEPENDENCIES_PLACEHOLDER = ['## Dependencies',
'No external actions in use here.']


def read_file(file_path: str):
with open(file_path, 'r') as file:
content = file.read()
return content


def remove_formatting(content):
# Remove whitespaces and newlines and hyphens
try:
return content.replace(
" ", "").replace("\n", "").replace("-", "")
except Exception as e:
print(f"An error occurred: {e}")


def replace_string_in_markdown(file_path, old_string, new_string):
content = read_file(file_path)
modified_content = content.replace(old_string, new_string)
with open(file_path, 'w') as file:
file.write(modified_content)


def contents_equal(file1, file2):
content1 = remove_formatting(file1)
content2 = remove_formatting(file2)

return content1 == content2


def extract_subsection_content(markdown_content, subsection_title):
# Defines the pattern for detecting headers (## Subsection Title)
pattern = re.compile(
r'^##\s' + re.escape(subsection_title) + r'\s*$', re.MULTILINE)

# Finds the start and end positions of the subsection
match = pattern.search(markdown_content)
if match:
start_position = match.end()
next_header = pattern.search(markdown_content[start_position:])
if next_header:
end_position = next_header.start() + start_position
else:
end_position = len(markdown_content)

# Extracs the content of the subsection
subsection_content = markdown_content[start_position:end_position].strip(
)
return subsection_content
else:
return None


def extract_dependencies(ci_file):
uses_values = []

with open(ci_file, 'r') as file:
yaml_data = yaml.safe_load(file)

if isinstance(yaml_data, dict):
for key, run in yaml_data.items():
if key == "runs":
if isinstance(run, dict):
steps = run.get("steps", [])
if isinstance(steps, list):
for step in steps:
if isinstance(step, dict):
uses_value = step.get("uses")
if uses_value is not None:
uses_values.append(uses_value)
elif key == "jobs":
if isinstance(run, dict):
jobs = run.items()
for _, job in jobs:
steps = job.get("steps", [])
if isinstance(steps, list):
for step in steps:
if isinstance(step, dict):
uses_value = step.get("uses")
if uses_value is not None:
uses_values.append(uses_value)

return uses_values


def generate_links(used_ci):
dependencies = []
for dep in used_ci:
link = ""
dependency_link = ""
base = "https://github.com/"
if "bakdata" in dep:
separator = "ci-templates/"
prefix, sufix = dep.split(separator)
base += prefix + separator
sufix, tag = sufix.split("@")
tag = f"blob/{tag}/"
link = base + tag + sufix
else:
link = base + dep.replace("@", "/tree/")

dependency_link = f"- [{dep}]({link})\n"
dependencies.append(dependency_link)

return dependencies


def update_dependencies(readme_path: str, dependencies: list):
updated = False
if dependencies:
readme_content = read_file(readme_path)

if f"## {DEPENDENCIES_SUBSECTION_TITLE}" not in readme_content:
try:
with open(readme_path, 'a') as file_readme:
for line in DEPENDENCIES_PLACEHOLDER:
file_readme.write(line + "\n")
file_readme.write("\n")
except Exception as e:
print(f"An error occurred: {e}")
new_readme_content = read_file(readme_path)
readme_extraction_result = extract_subsection_content(
new_readme_content, DEPENDENCIES_SUBSECTION_TITLE)
param_join_result = ''.join(dependencies)
dependencies_subsection = readme_extraction_result.split("\n## ")[0]
if not contents_equal(dependencies_subsection, param_join_result):
replace_string_in_markdown(
readme_path, dependencies_subsection, param_join_result)
updated = True
return updated
92 changes: 33 additions & 59 deletions .github/generate-doc.py
MichaelKora marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from dependencies import contents_equal, extract_dependencies, extract_subsection_content, generate_links, read_file, replace_string_in_markdown, update_dependencies
import glob
import os
import re
import shutil
import subprocess


TARGET_SUBSECTION_TITLE = 'References'
README_FILE = "README.md"

Expand All @@ -17,48 +16,8 @@ class Colors:
RESET = '\033[0m'


def replace_string_in_markdown(file_path, old_string, new_string):
with open(file_path, 'r') as file:
content = file.read()
modified_content = content.replace(old_string, new_string)
with open(file_path, 'w') as file:
file.write(modified_content)


def extract_subsection_content(markdown_content, subsection_title):
# Define the pattern for detecting headers (## Subsection Title)
pattern = re.compile(
r'^##\s' + re.escape(subsection_title) + r'\s*$', re.MULTILINE)

# Find the start and end positions of the subsection
match = pattern.search(markdown_content)
if match:
start_position = match.end()
next_header = pattern.search(markdown_content[start_position:])
end_position = next_header.start() if next_header else len(markdown_content)

# Extract the content of the subsection
subsection_content = markdown_content[start_position:end_position].strip(
)
return subsection_content
else:
return None


def remove_formatting(content):
# Remove whitespaces and newlines and hyphens
try:
return content.replace(
" ", "").replace("\n", "").replace("-", "")
except Exception as e:
print(f"An error occurred: {e}")


def contents_equal(file1, file2):
content1 = remove_formatting(file1)
content2 = remove_formatting(file2)

return content1 == content2
def print_colored(text, color):
print(f"{color}{text}{Colors.RESET}")


def update_doc(readme_path, reference_path):
Expand All @@ -79,10 +38,8 @@ def update_doc(readme_path, reference_path):
for line in subsection_placeholder:
file.write(line + "\n")

with open(readme_path, 'r') as file1:
readme_content = file1.read()
with open(reference_path, 'r') as file2:
reference_content = file2.read()
readme_content = read_file(readme_path)
reference_content = read_file(reference_path)

# add subsection if it does not exist
if f"## {TARGET_SUBSECTION_TITLE}" not in readme_content:
Expand All @@ -92,16 +49,17 @@ def update_doc(readme_path, reference_path):
file_readme.write(line + "\n")
except Exception as e:
print(f"An error occurred: {e}")

new_readme_content = read_file(readme_path)
readme_extraction_result = extract_subsection_content(
readme_content, TARGET_SUBSECTION_TITLE)
new_readme_content, TARGET_SUBSECTION_TITLE)
readme_references_subsection = readme_extraction_result.split("\n## ")[0]

reference_extraction_result = extract_subsection_content(
reference_content, TARGET_SUBSECTION_TITLE)

if not contents_equal(readme_extraction_result, reference_extraction_result):
if not contents_equal(readme_references_subsection, reference_extraction_result):
replace_string_in_markdown(
readme_path, readme_extraction_result, reference_extraction_result)
readme_path, readme_references_subsection, reference_extraction_result)
updated = True

return updated
Expand All @@ -119,10 +77,6 @@ def auto_doc_installed():
return f"Error: {e.stderr}"


def print_colored(text, color):
print(f"{color}{text}{Colors.RESET}")


class DocGenerationError(Exception):
def __init__(self, count, inconsistencies):
inconsistencies_str = f"{Colors.RED}Error: The documentation is not up to date. {count} inconsistency(ies) where found. Re running pre-commit may help. Inconstencies:\n{Colors.RESET}"
Expand Down Expand Up @@ -177,7 +131,13 @@ def run():
action_files = glob.glob("actions/**/action.yaml")
action_files.extend(glob.glob("actions/**/action.yml"))
for action_file in action_files:

# generate a list of dependencies containing links to GH-repos
action_dependencies = extract_dependencies(action_file)
action_dependencies_links = generate_links(action_dependencies)

action_name = os.path.basename(os.path.dirname(action_file))

# create docu in tmp dir
output_dir_action = f"docs/actions/{action_name}"
tmp_docu_output_dir = os.path.join(
Expand All @@ -196,8 +156,10 @@ def run():

output_file_action = os.path.join(
output_dir_action, README_FILE)

changes.append({"readme": output_file_action,
"tmp_output": tmp_docu_output_action})
"tmp_output": tmp_docu_output_action,
"dependencies": action_dependencies_links})

# go through workflows
tmp_workflow = "tmps/workflows"
Expand All @@ -207,6 +169,11 @@ def run():
for workflow in os.listdir(workflow_dir):
workflow_name = workflow.split(".")[0]
if not workflow.startswith("_") and workflow != README_FILE:

# generate a list of dependencies containing links to GH-repos
workflow_dependencies = extract_dependencies(action_file)
workflow_dependencies_links = generate_links(workflow_dependencies)

workflow_path = os.path.join(workflow_dir, workflow)
output_dir_workflow = f"docs/workflows/{workflow_name}"

Expand All @@ -232,16 +199,23 @@ def run():
output_dir_workflow, README_FILE)

changes.append({"readme": workflow_doc_file,
"tmp_output": tmp_docu_output_workflow})
"tmp_output": tmp_docu_output_workflow,
"dependencies": workflow_dependencies_links})

# Correction
count = 0
inconsistencies = []
for entry in changes:
readme_f = entry["readme"]
tmp_f = entry["tmp_output"]
dependencies_found = entry["dependencies"]

dep_updated = update_dependencies(
readme_path=readme_f, dependencies=dependencies_found)

was_updated = update_doc(readme_f, tmp_f)
if was_updated:

if was_updated or dep_updated:
inconsistencies.append(readme_f)
count += 1
if count == 0:
Expand Down
Loading
Loading