From 46a397bdcc29904d967fd4e469aa5f25b7450d62 Mon Sep 17 00:00:00 2001 From: Marcus Furlong Date: Wed, 3 Mar 2021 23:19:32 -0500 Subject: [PATCH] modularity support --- TODO | 3 +- client/patchman-client | 47 +++++++++++++++++- modules/models.py | 51 +++++++++++++++++++ packages/utils.py | 33 ++++++++----- repos/utils.py | 110 +++++++++++++++++++++++++++++++++++------ requirements.txt | 1 + 6 files changed, 214 insertions(+), 31 deletions(-) create mode 100644 modules/models.py diff --git a/TODO b/TODO index a2576ec8..2c3e98bc 100644 --- a/TODO +++ b/TODO @@ -11,4 +11,5 @@ * helper script to change paths (e.g. /usr/lib/python3/dist-packages/patchman) * Dockerfile/Dockerimage * compressed reports -* modularity support +* add cronjobs to built packages +* install celery/rabbit/memcache with packages diff --git a/client/patchman-client b/client/patchman-client index f94260d6..83cb23c7 100755 --- a/client/patchman-client +++ b/client/patchman-client @@ -71,17 +71,20 @@ cleanup() { echo "Debug: not deleting ${tmpfile_rep} (repos)" echo "Debug: not deleting ${tmpfile_sec} (security updates)" echo "Debug: not deleting ${tmpfile_bug} (updates)" + echo "Debug: not deleting ${tmpfile_mod} (modules)" elif ${verbose} && ! ${debug} ; then echo "Deleting ${tmpfile_pkg}" echo "Deleting ${tmpfile_rep}" echo "Deleting ${tmpfile_sec}" echo "Deleting ${tmpfile_bug}" + echo "Deleting ${tmpfile_mod}" fi if ! ${debug} ; then rm -fr "${tmpfile_pkg}" rm -fr "${tmpfile_rep}" rm -fr "${tmpfile_sec}" rm -fr "${tmpfile_bug}" + rm -fr "${tmpfile_mod}" fi flock -u 200 rm -fr "${lock_dir}/patchman.lock" @@ -170,6 +173,34 @@ check_command_exists() { fi } +check_for_modularity() { + modularity=false + if check_command_exists yum ; then + if ${verbose} ; then + "Checking for modularity..." + fi + if [ yum module 2>&1 | grep -q "No such command" ] ; then + modularity=false + else + modularity=true + fi + fi +} + +get_enabled_modules() { + if ${verbose} ; then + echo 'Finding enabled modules...' + fi + yum module list --enabled \ + | grep "\[e\]" \ + | grep -v ^Hint \ + | awk {'print $1, $2, $4'} \ + | sed -e "s/[^ ][^ ]*/'&'/g") >> "${tmpfile_mod}" + if ${debug} ; then + cat "${tmpfile_mod}" + fi +} + get_installed_rpm_packages() { if check_command_exists rpm ; then if ${verbose} ; then @@ -223,6 +254,13 @@ get_packages() { get_installed_archlinux_packages } +get_modules() { + check_for_modularity + if ${modularity} ; then + get_enabled_modules + fi +} + get_hostname() { hostname=$(hostname -f) if [ -z "${hostname}" ] ; then @@ -504,8 +542,11 @@ post_data() { fi } -if ! check_command_exists which || ! check_command_exists mktemp || ! check_command_exists curl ; then - echo "which, mktemp or curl was not found, exiting." +if ! check_command_exists which || \ + ! check_command_exists mktemp || \ + ! check_command_exists curl || \ + ! check_command_exists flock ; then + echo "which, mktemp, flock or curl was not found, exiting." exit 1 fi @@ -532,9 +573,11 @@ tmpfile_pkg=$(mktemp) tmpfile_rep=$(mktemp) tmpfile_sec=$(mktemp) tmpfile_bug=$(mktemp) +tmpfile_mod=$(mktemp) get_host_data get_packages +get_modules if ${repo_check} ; then get_repos fi diff --git a/modules/models.py b/modules/models.py new file mode 100644 index 00000000..09aead87 --- /dev/null +++ b/modules/models.py @@ -0,0 +1,51 @@ +# Copyright 2022 Marcus Furlong +# +# This file is part of Patchman. +# +# Patchman is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 only. +# +# Patchman is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Patchman. If not, see + +from django.db import models +from django.urls import reverse + +from arch.models import PackageArchitecture +from packages.models import Package + + +class ModuleProfile(models.Model): + + name = models.CharField(unique=True, max_length=255) + + def __str__(self): + return self.name + + +class Module(models.Model): + + name = models.CharField(unique=True, max_length=255) + stream = models.CharField(unique=True, max_length=255) + version = models.CharField(max_length=255) + arch = models.ForeignKey(PackageArchitecture, on_delete=models.CASCADE) + context = models.CharField(unique=True, max_length=255) + packages = models.ManyToManyField(Package, blank=True) + profiles = models.ManyToManyField(ModuleProfile) + + class Meta(object): + verbose_name = 'Module' + verbose_name_plural = 'Modules' + ordering = ('name',) + + def __str__(self): + return self.name + + def get_absolute_url(self): + return reverse('modules:module_detail', args=[self.name]) diff --git a/packages/utils.py b/packages/utils.py index d850b3f7..2cfef827 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -73,6 +73,26 @@ def find_version(s, epoch, release): return s[e:r] +def parse_package_string(pkg_str): + """ Parse a package string and return + name, epoch, ver, release, dist, arch + """ + + for suffix in ['rpm', 'deb']: + pkg_str = re.sub(f'.{suffix}$', '', pkg_str) + pkg_re = re.compile('(\S+)-(?:(\d*):)?(.*)-(~?\w+)[.+]?(~?\S+)?\.(\S+)$') # noqa + m = pkg_re.match(pkg_str) + if m: + name, epoch, ver, rel, dist, arch = m.groups() + else: + e = 'Error parsing package string: "{0!s}"'.format(pkg_str) + error_message.send(sender=None, text=e) + return + if dist: + rel = '{0!s}.{1!s}'.format(rel, dist) + return name, epoch, ver, rel, dist, arch + + def update_errata(force=False): """ Update CentOS errata from https://cefs.steve-meier.de/ and mark packages that are security updates @@ -170,18 +190,7 @@ def parse_errata_children(e, children): osgroup, c = osgroups.get_or_create(name=osgroup_name) e.releases.add(osgroup) elif c.tag == 'packages': - pkg_str = c.text.replace('.rpm', '') - pkg_re = re.compile('(\S+)-(?:(\d*):)?(.*)-(~?\w+)[.+]?(~?\S+)?\.(\S+)$') # noqa - m = pkg_re.match(pkg_str) - if m: - name, epoch, ver, rel, dist, arch = m.groups() - else: - e = 'Error parsing errata: ' - e += 'could not parse package "{0!s}"'.format(pkg_str) - error_message.send(sender=None, text=e) - continue - if dist: - rel = '{0!s}.{1!s}'.format(rel, dist) + name, epoch, ver, rel, dist, arch = parse_package_string(c.text) p_type = Package.RPM pkg = get_or_create_package(name, epoch, ver, rel, arch, p_type) e.packages.add(pkg) diff --git a/repos/utils.py b/repos/utils.py index 25d9f3e0..d7930190 100644 --- a/repos/utils.py +++ b/repos/utils.py @@ -17,6 +17,7 @@ import re import tarfile +import yaml from datetime import datetime from io import BytesIO from defusedxml.lxml import _etree as etree @@ -28,7 +29,9 @@ from django.db.models import Q from packages.models import Package, PackageName, PackageString +from packages.utils import parse_package_string from arch.models import PackageArchitecture +from module.models import Module from util import get_url, download_url, response_is_valid, extract, \ get_checksum, Checksum from patchman.signals import progress_info_s, progress_update_s, \ @@ -140,8 +143,29 @@ def get_primary_url(mirror_url, data): namespaces={'ns': ns})[0].text csum_type = context.xpath("//ns:data[@type='primary']/ns:checksum/@type", namespaces={'ns': ns})[0] - primary_url = str(mirror_url.rsplit('/', 2)[0]) + '/' + location - return primary_url, checksum, csum_type + url = str(mirror_url.rsplit('/', 2)[0]) + '/' + location + return url, checksum, csum_type + + +def get_modules_url(mirror_url, data): + + if isinstance(data, str): + if data.startswith('Bad repo - not in list') or \ + data.startswith('Invalid repo'): + return None, None, None + ns = 'http://linux.duke.edu/metadata/repo' + try: + context = etree.parse(BytesIO(data), etree.XMLParser()) + except etree.XMLSyntaxError: + context = etree.parse(BytesIO(extract(data, 'gz')), etree.XMLParser()) + location = context.xpath("//ns:data[@type='modules']/ns:location/@href", + namespaces={'ns': ns})[0] + checksum = context.xpath("//ns:data[@type='modules']/ns:checksum", + namespaces={'ns': ns})[0].text + csum_type = context.xpath("//ns:data[@type='modules']/ns:checksum/@type", + namespaces={'ns': ns})[0] + url = str(mirror_url.rsplit('/', 2)[0]) + '/' + location + return url, checksum, csum_type def find_mirror_url(stored_mirror_url, formats): @@ -261,6 +285,51 @@ def check_for_metalinks(repo): add_mirrors_from_urls(repo, mirror_urls) +def extract_module_metadata(data, url): + """ Extract module metadata from a modules.yaml file + """ + modules = set() + extracted = extract(data, url) + try: + modules_yaml = yaml.safe_load_all(extracted) + except yaml.YAMLError as e: + print(e) + for doc in modules_yaml: + document = doc['document'] + modulemd = doc['data'] + if document == 'modulemd': + modulemd = doc['data'] + name = modulemd.get('name') + stream = modulemd['stream'] + version = modulemd.get('version') + context = modulemd.get('context') + arch = modulemd.get('arch') + raw_packages = modulemd.get('artifacts', {}).get('rpms', '') + raw_profiles = list(modulemd.get('profiles', {}).keys()) + packages = set() + for pkg_str in raw_packages: + name, epoch, version, release, dist, arch = parse_package_string(pkg_str) + package = PackageString(name=name.lower(), + epoch=epoch, + version=version, + release=release, + arch=arch, + packagetype='R') + packages.add(package) + for profile in raw_profiles: + profiles = set() + # TODO + module = Module(name=name, + stream=stream, + version=version, + context=context, + arch=arch, + packages=packages, + profiles=profiles) + modules.append(module) + return modules + + def extract_yum_packages(data, url): """ Extract package metadata from a yum primary.xml file """ @@ -427,7 +496,8 @@ def refresh_yum_repo(mirror, data, mirror_url, ts): """ Refresh package metadata for a yum-style rpm mirror and add the packages to the mirror """ - primary_url, checksum, checksum_type = get_primary_url(mirror_url, data) + primary_url, primary_checksum, primary_checksum_type = get_primary_url(mirror_url, data) + modules_url, modules_checksum, modules_checksum_type = get_modules_url(mirror_url, data) if not primary_url: mirror.fail() @@ -440,29 +510,36 @@ def refresh_yum_repo(mirror, data, mirror_url, ts): mirror.fail() return - data = download_url(res, 'Downloading repo info (2/2):') + data = download_url(res, 'Downloading package info:') if data is None: mirror.fail() return - computed_checksum = get_checksum(data, Checksum[checksum_type]) - if not mirror_checksum_is_valid(computed_checksum, checksum, mirror): + computed_checksum = get_checksum(data, Checksum[primary_checksum_type]) + if not mirror_checksum_is_valid(computed_checksum, primary_checksum, mirror, 'package'): return - if mirror.file_checksum == checksum: + if mirror.file_checksum == primary_checksum: text = 'Mirror checksum has not changed, ' text += 'not refreshing package metadata' warning_message.send(sender=None, text=text) return - mirror.file_checksum = checksum + mirror.file_checksum = primary_checksum + + if modules_url: + res = get_url(modules_url) + data = download_url(res, 'Downloading module info:') + computed_checksum = get_checksum(data, Checksum[modules_checksum_type]) + if not mirror_checksum_is_valid(computed_checksum, modules_checksum, mirror, 'module'): + return if hasattr(settings, 'MAX_MIRRORS') and \ isinstance(settings.MAX_MIRRORS, int): max_mirrors = settings.MAX_MIRRORS # only refresh X mirrors, where X = max_mirrors checksum_q = Q(mirrorlist=False, refresh=True, timestamp=ts, - file_checksum=checksum) + file_checksum=primary_checksum) have_checksum = mirror.repo.mirror_set.filter(checksum_q).count() if have_checksum >= max_mirrors: text = '{0!s} mirrors already have this '.format(max_mirrors) @@ -470,17 +547,18 @@ def refresh_yum_repo(mirror, data, mirror_url, ts): info_message.send(sender=None, text=text) else: packages = extract_yum_packages(data, primary_url) + modules = extract_module_metadata(data, modules_url) if packages: - update_mirror_packages(mirror, packages) + update_mirror_packages(mirror, packages, modules) -def mirror_checksum_is_valid(computed, provided, mirror): - """ Compares the computed checksum and the provided checksum. Returns True - if both match. +def mirror_checksum_is_valid(computed, provided, mirror, metadata_type): + """ Compares the computed checksum and the provided checksum. + Returns True if both match. """ if not computed or computed != provided: text = 'Checksum failed for mirror {0!s}'.format(mirror.id) - text += ', not refreshing package metadata' + text += ', not refreshing {0!s} metadata'.format(metadata_type) error_message.send(sender=None, text=text) text = 'Found checksum: {0!s}\nExpected checksum: {1!s}'.format( computed, @@ -544,7 +622,7 @@ def refresh_yast_repo(mirror, data): res = get_url(package_url) mirror.last_access_ok = response_is_valid(res) if mirror.last_access_ok: - data = download_url(res, 'Downloading repo info (2/2):') + data = download_url(res, 'Downloading yast repo info:') if data is None: mirror.fail() return @@ -592,7 +670,7 @@ def refresh_rpm_repo(repo): text += ' not refreshing {0!s}'.format(mirror.url) warning_message.send(sender=None, text=text) continue - data = download_url(res, 'Downloading repo info (1/2):') + data = download_url(res, 'Downloading repo info:') if data is None: mirror.fail() return diff --git a/requirements.txt b/requirements.txt index cd65f2c0..395a1134 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ humanize==3.13.1 version-utils==0.3.0 python-magic==0.4.25 python-memcached==1.59 +PyYAML==6.0