Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] update licenses #675

Open
wants to merge 2 commits into
base: 2023.06-software.eessi.io
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions licenses/check_licenses.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: Check and update licenses

on:
push:
branches: [ "*-software.eessi.io" ]
pull_request:
branches: [ "*-software.eessi.io" ]
permissions:
contents: write # set permissions for writing

jobs:
license_update:
runs-on: ubuntu-latest

steps:
- name: Checkout out software-layer repository
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

- name: Set up Python
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c
with:
python-version: '3.9'

- name: Run license script and generate patch
id: check_licenses
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python update_licenses.py --source=pypi TensorFlow
python update_licenses.py --source=github:easybuilders/easybuild EasyBuild
if [ -f license_update.patch ] && [ -s license_update.patch ]; then
PATCH_CONTENT=$(cat license_update.patch)
echo "patch=$PATCH_CONTENT" >> $GITHUB_OUTPUT
fi

- name: Create a PR (if changes detected)
id: create_pull_request
uses: peter-evans/create-pull-request@5e914681df9dc83aa4e4905692ca88beb2f9e91f # v7.0.5
if: steps.check_licenses.outputs.patch != ''
with:
commit-message: "Auto PR: Update licenses"
title: "Auto PR: Update licenses"
body: ${{ steps.check_licenses.outputs.patch }}
branch: update-licenses-${{ github.run_number }}
base: [ "*-software.eessi.io" ]

- name: Apply patch (if no PR created)
if: steps.create_pull_request.outputs.number == '' && steps.check_licenses.outputs.patch != ''
run: |
if [ -f license_update.patch ] && [ -s license_update.patch ]; then
git apply license_update.patch
else
echo "No changes to apply"
fi
git add licenses.json
git diff --cached --exit-code || git commit -m "Update licenses.json"
git push
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
154 changes: 154 additions & 0 deletions licenses/update_licenses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import requests
import argparse
import json
import os
from datetime import datetime

parser = argparse.ArgumentParser(description='Script to ingest licenses')
parser.add_argument('--source', help='Source (GitHub, PyPI, CRAN, Repology) or user')
parser.add_argument('projects', nargs='+', help='List of project names')
parser.add_argument('--manual', help='Manually provided license', required=False)
parser.add_argument('--spdx', help='SPDX identifier for the license', required=False)
args = parser.parse_args()

# Retrieve license from various sources
def github(source):
repo = source.removeprefix('github:')
url = (
"https://api.github.com/repos/{repo}/license".format(repo=repo)
)
headers = {
"Accept": "application/vnd.github+json",
"Authorization": "Bearer {}".format(os.getenv('GITHUB_TOKEN')),
"X-GitHub-Api-Version": "2022-11-28",
}
r = requests.get(url, headers=headers)
if r.status_code != 200:
return "not found", None, None
data = r.json()
return data['license']['spdx_id'], 'GitHub', data['license']['url']

def pypi(project):
url = "https://pypi.org/pypi/{project}/json".format(project=project)
r = requests.get(url)
if r.status_code != 200:
return "not found", None, None
data = r.json()
return data['info']['license'], 'PyPI', data['info'].get('project_url')

def cran(project):
url = "http://crandb.r-pkg.org/{project}".format(project=project)
r = requests.get(url)
if r.status_code != 200:
return "not found", None, None
data = r.json()
return data['License'], 'CRAN', None

def repology(project):
url = "https://repology.org/api/v1/project/{project}".format(
project=project
)
r = requests.get(url)
if r.status_code != 200:
return "not found", None, None
data = r.json()
return data.get('license', 'not found'), 'Repology', None

def ecosysteDotms_pypi(project):
url = "https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/{project}".format(
project=project
)
r = requests.get(url)
if r.status_code != 200:
return "not found", None, None
data = r.json()
return data.get('license', 'not found'), 'Ecosyste.ms (PyPI)', None

def ecosysteDotms_github(source):
repo = source.removeprefix('github:')
url = "https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/{repo}".format(
repo=repo
)
r = requests.get(url)
if r.status_code != 200:
return "not found", None, None
data = r.json()
return data.get('license', 'not found'), 'Ecosyste.ms (GitHub)', None

# Main license retrieval function
def license_info(project):
if args.source == 'pypi':
lic, source, url = ecosysteDotms_pypi(project)
elif "github" in args.source:
lic, source, url = ecosysteDotms_github(args.source)
elif args.manual:
lic = args.manual
source = args.source
url = None
else:
lic, source, url = "not found", None, None

spdx_id = args.spdx if args.spdx else (lic if lic and lic != "not found" else None)

info = {
"license": lic,
"source": source,
"spdx_id": spdx_id,
"retrieved_at": datetime.now().isoformat(),
}
return info


def update_json(licenses, project, info):
if project in licenses:
if 'history' not in licenses[project]:
licenses[project]['history'] = []
licenses[project]['history'].append(info)
licenses[project]['current'] = info
print('Updated license for project {project}'.format(project=project))
else:
licenses[project] = {
"current": info,
"history": [info],
}
print('Added new license for project {project}'.format(project=project))

lic_json = json.dumps(licenses, indent=4)
with open('licenses.json', 'w') as lic_file:
lic_file.write(lic_json)

return licenses

# Create patch output
def generate_patch(licenses):
patch = json.dumps(licenses, indent=4)
return patch

# Function to save patch to a file
def save_patch(patch_content, filename="license_update.patch"):
with open(filename, 'w') as patch_file:
patch_file.write(patch_content)
print("Patch saved to {filename}".format(filename=filename))

def main():
if os.path.exists('licenses.json'):
with open('licenses.json', 'r') as lic_dict:
licenses = json.loads(lic_dict.read())
else:
licenses = {}

for project in args.projects:
info = license_info(project)
update_json(licenses, project, info)

patch = generate_patch(licenses)
save_patch(patch)

with open('licenses.json', 'w') as lic_file:
lic_file.write(patch)

print("Patch output:\n{patch}".format(patch=patch))

if __name__ == "__main__":
main()