-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Some optimization steps (plus bug fixes) #3
base: new_tag_strategy
Are you sure you want to change the base?
Changes from 11 commits
4409d51
290a5eb
46d013b
4b7911e
a1fe491
1d45595
c225399
9fef296
060933e
73b79ee
c4f509c
2a5c957
70a8c2f
82baebd
75eda9d
8a483a2
83768a4
65eeed3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
name: nf-core-lint | ||
description: "Lint nf-core modules and workflows" | ||
inputs: | ||
component_type: | ||
description: "The type of component to lint, e.g. 'module' or 'subworkflow'" | ||
required: true | ||
components: | ||
description: "List of components to lint" | ||
required: true | ||
runs: | ||
using: "composite" | ||
steps: | ||
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 | ||
with: | ||
python-version: "3.11" | ||
|
||
- name: Install pip | ||
shell: bash | ||
run: python -m pip install --upgrade pip | ||
|
||
- uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4 | ||
with: | ||
distribution: "temurin" | ||
java-version: "17" | ||
|
||
- name: Setup Nextflow | ||
uses: nf-core/setup-nextflow@v2 | ||
|
||
- name: Install nf-core tools development version | ||
shell: bash | ||
run: python -m pip install --upgrade --force-reinstall git+https://github.com/nf-core/tools.git@dev | ||
|
||
- name: Lint ${{inputs.component_type}} ${{ inputs.tags }} | ||
shell: bash | ||
run: nf-core ${{inputs.component_type}}s lint ${{ inputs.tags }} |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this file should be included in this PR? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,12 @@ | ||
#!/usr/bin/env python | ||
|
||
from rich import print | ||
from rich.table import Table | ||
import click | ||
import glob | ||
import os | ||
|
||
import click | ||
import yaml | ||
from rich import print | ||
from rich.table import Table | ||
|
||
|
||
@click.command() | ||
|
@@ -32,30 +33,25 @@ def find_duplicate_md5s(min_dups, search_dir): | |
# Loop through all files in tests/ called test.yml | ||
for test_yml in glob.glob(search_dir, recursive=True): | ||
# Open file and parse YAML | ||
with open(test_yml, "r") as fh: | ||
with open(test_yml) as fh: | ||
test_config = yaml.safe_load(fh) | ||
# Loop through tests and check for duplicate md5s | ||
for test in test_config: | ||
for test_file in test.get("files", []): | ||
if "md5sum" in test_file: | ||
md5 = test_file["md5sum"] | ||
md5_filenames[md5] = md5_filenames.get(md5, []) + [ | ||
os.path.basename(test_file.get("path")) | ||
] | ||
md5_filenames[md5] = md5_filenames.get(md5, []) + [os.path.basename(test_file.get("path"))] | ||
md5_output_fn_counts[md5] = md5_output_fn_counts.get(md5, 0) + 1 | ||
# Log the module that this md5 was in | ||
modname = os.path.basename(os.path.dirname(test_yml)) | ||
# If tool/subtool show the whole thing | ||
# Ugly code but trying to stat os-agnostic | ||
if os.path.basename( | ||
os.path.dirname(os.path.dirname(test_yml)) | ||
) not in ["modules", "config", "subworkflows"]: | ||
modname = "{}/{}".format( | ||
os.path.basename( | ||
os.path.dirname(os.path.dirname(test_yml)) | ||
), | ||
os.path.basename(os.path.dirname(test_yml)), | ||
) | ||
if os.path.basename(os.path.dirname(os.path.dirname(test_yml))) not in [ | ||
"modules", | ||
"config", | ||
"subworkflows", | ||
]: | ||
modname = f"{os.path.basename(os.path.dirname(os.path.dirname(test_yml)))}/{os.path.basename(os.path.dirname(test_yml))}" | ||
Comment on lines
+49
to
+54
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's got to be a better way here 😆 |
||
module_counts[md5] = module_counts.get(md5, []) + [modname] | ||
|
||
# Set up rich table | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,10 +7,9 @@ | |
import json | ||
import logging | ||
import re | ||
import yaml | ||
|
||
from itertools import chain | ||
from pathlib import Path | ||
|
||
import yaml | ||
from git import Repo | ||
|
||
|
||
|
@@ -71,8 +70,8 @@ def parse_args() -> argparse.Namespace: | |
"-t", | ||
"--types", | ||
nargs="+", | ||
choices=["function", "process", "workflow", "pipeline"], | ||
default=["function", "process", "workflow", "pipeline"], | ||
choices=["function", "process", "workflow", "pipeline", "tag"], | ||
default=["function", "process", "workflow", "pipeline", "tag"], | ||
mashehu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
help="Types of tests to include.", | ||
) | ||
return parser.parse_args() | ||
|
@@ -88,7 +87,7 @@ def read_yaml_inverted(file_path: str) -> dict: | |
Returns: | ||
dict: The contents of the YAML file as a dictionary inverted. | ||
""" | ||
with open(file_path, "r") as f: | ||
with open(file_path) as f: | ||
data = yaml.safe_load(f) | ||
|
||
# Invert dictionary of lists into contents of lists are keys, values are the original keys | ||
|
@@ -114,20 +113,16 @@ def find_changed_files( | |
""" | ||
# create repo | ||
repo = Repo(".") | ||
# identify commit on branch1 | ||
branch1_commit = repo.commit(branch1) | ||
# identify commit on branch2 | ||
branch2_commit = repo.commit(branch2) | ||
# compare two branches | ||
diff_index = branch1_commit.diff(branch2_commit) | ||
# Get the diff between two branches | ||
diff = repo.git.diff(f"{branch1}..{branch2}", name_only=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is synonymous, this gets all changes between the branches rather than the changes introduced by the PR (I think?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did some tests and it should work. I'm not a fan of string formatting for args in a function though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also returning strings separated by newlines is 🤮 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay, can ignore it then, made for me more sense, because it's closer to the cli git (but not very pythony, I agree)
adamrtalbot marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Start empty list of changed files | ||
changed_files = [] | ||
|
||
# For every file that has changed between commits | ||
for file in diff_index: | ||
for file in diff.splitlines(): | ||
adamrtalbot marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Get pathlib.Path object | ||
filepath = Path(file.a_path) | ||
filepath = Path(file) | ||
adamrtalbot marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# If file does not match any in the ignore list, add containing directory to changed_files | ||
if not any(filepath.match(ignored_path) for ignored_path in ignore): | ||
changed_files.append(filepath) | ||
|
@@ -136,9 +131,7 @@ def find_changed_files( | |
return list(set(changed_files)) | ||
|
||
|
||
def detect_include_files( | ||
changed_files: list[Path], include_files: dict[str, str] | ||
) -> list[Path]: | ||
def detect_include_files(changed_files: list[Path], include_files: dict[str, str]) -> list[Path]: | ||
""" | ||
Detects the include files based on the changed files. | ||
|
||
|
@@ -197,10 +190,9 @@ def process_files(files: list[Path]) -> list[str]: | |
""" | ||
result = [] | ||
for file in files: | ||
with open(file, "r") as f: | ||
with open(file) as f: | ||
is_pipeline_test = True | ||
lines = f.readlines() | ||
for line in lines: | ||
for line in f: | ||
line = line.strip() | ||
if line.startswith(("workflow", "process", "function")): | ||
words = line.split() | ||
|
@@ -217,7 +209,7 @@ def process_files(files: list[Path]) -> list[str]: | |
|
||
|
||
def convert_nf_test_files_to_test_types( | ||
lines: list[str], types: list[str] = ["function", "process", "workflow", "pipeline"] | ||
lines: list[str], types: list[str] = ["function", "process", "workflow", "pipeline", "tag"] | ||
mashehu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) -> dict[str, list[str]]: | ||
""" | ||
Generate a dictionary of function, process and workflow lists from the lines. | ||
|
@@ -229,16 +221,16 @@ def convert_nf_test_files_to_test_types( | |
Returns: | ||
dict: Dictionary with function, process and workflow lists. | ||
""" | ||
# Populate empty dict from types | ||
result: dict[str, list[str]] = {key: [] for key in types} | ||
|
||
for line in lines: | ||
words = line.split() | ||
if len(words) == 2 and re.match(r'^".*"$', words[1]): | ||
keyword = words[0] | ||
name = words[1].strip("'\"") # Strip both single and double quotes | ||
match = re.match(r"^(workflow|process|function|pipeline|tag)\s+(.*)$", line) | ||
if match: | ||
keyword = match.group(1) | ||
name = match.group(2).strip("'\"") | ||
mashehu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if keyword in types: | ||
result[keyword].append(name) | ||
|
||
return result | ||
|
||
|
||
|
@@ -260,28 +252,23 @@ def find_changed_dependencies(paths: list[Path], tags: list[str]) -> list[Path]: | |
|
||
# find nf-test files with changed dependencies | ||
for nf_test_file in nf_test_files: | ||
with open(nf_test_file, "r") as f: | ||
with open(nf_test_file) as f: | ||
lines = f.readlines() | ||
# Get all tags from nf-test file | ||
# Make case insensitive with .casefold() | ||
tags_in_nf_test_file = [ | ||
tag.casefold().replace("/", "_") | ||
for tag in convert_nf_test_files_to_test_types(lines, types=["tag"])[ | ||
"tag" | ||
] | ||
for tag in convert_nf_test_files_to_test_types(lines, types=["tag"])["tag"] | ||
] | ||
# Check if tag in nf-test file appears in a tag. | ||
# Use .casefold() to be case insensitive | ||
if any( | ||
tag.casefold().replace("/", "_") in tags_in_nf_test_file for tag in tags | ||
): | ||
if any(tag.casefold().replace("/", "_") in tags_in_nf_test_file for tag in tags): | ||
result.append(nf_test_file) | ||
|
||
return result | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
# Utility stuff | ||
args = parse_args() | ||
logging.basicConfig(level=args.log_level) | ||
|
@@ -292,27 +279,20 @@ def find_changed_dependencies(paths: list[Path], tags: list[str]) -> list[Path]: | |
# If an additional include YAML is added, we detect additional changed dirs to include | ||
if args.include: | ||
include_files = read_yaml_inverted(args.include) | ||
changed_files = changed_files + detect_include_files( | ||
changed_files, include_files | ||
) | ||
changed_files = changed_files + detect_include_files(changed_files, include_files) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mmm my black says no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ruff > black 😉 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ruff seems to accept both, which I don't understand because it goes over 88 characters? |
||
nf_test_files = detect_nf_test_files(changed_files) | ||
lines = process_files(nf_test_files) | ||
result = convert_nf_test_files_to_test_types(lines) | ||
result = convert_nf_test_files_to_test_types(lines, args.types) # Get only relevant results (specified by -t) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PEP8 says 72 characters max, this is 110. |
||
|
||
# Get only relevant results (specified by -t) | ||
# Unique using a set | ||
target_results = list( | ||
{item for sublist in map(result.get, args.types) for item in sublist} | ||
) | ||
target_results = list({item for sublist in result[args.types] for item in sublist}) | ||
|
||
# Parse files to identify nf-tests with changed dependencies | ||
changed_dep_files = find_changed_dependencies([Path(".")], target_results) | ||
|
||
# Combine target nf-test files and nf-test files with changed dependencies | ||
# Go back one dir so we get the module or subworkflow path | ||
all_nf_tests = [ | ||
str(test_path.parent.parent) for test_path in set(changed_dep_files + nf_test_files) | ||
] | ||
all_nf_tests = [str(test_path.parent.parent) for test_path in set(changed_dep_files + nf_test_files)] | ||
|
||
# Print to stdout | ||
print(json.dumps(all_nf_tests)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall we just make this an independent action 🤔 Future stuff.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
even past future stuff 😁 nf-core/tools#725