From 977d123fd57d2332829fbd75af73162b7f2522a8 Mon Sep 17 00:00:00 2001 From: Caleb Brown Date: Thu, 9 Dec 2021 09:55:43 +1100 Subject: [PATCH] Extend analysis_runner.py for backfilling versions for a given package (#193) This helps with experimentation/testing by having access to a range of versions for a given package. --- tools/analysis/analysis_runner.py | 68 +++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/tools/analysis/analysis_runner.py b/tools/analysis/analysis_runner.py index b27709b5..9c64e0a6 100644 --- a/tools/analysis/analysis_runner.py +++ b/tools/analysis/analysis_runner.py @@ -1,13 +1,51 @@ # Analysis runner. import argparse +import json import os import subprocess +import urllib.parse +import urllib.request _TOPIC = os.getenv( 'OSSMALWARE_WORKER_TOPIC', 'gcppubsub://projects/ossf-malware-analysis/topics/workers') _PACKAGES_BUCKET = os.getenv( 'OSSF_MALWARE_ANALYSIS_PACKAGES', 'gs://ossf-malware-analysis-packages') +_NPM_IGNORE_KEYS = ('modified', 'created') + + +def _npm_versions_for_package(pkg): + safe_pkg = urllib.parse.quote_plus(pkg) + url = f'https://registry.npmjs.com/{safe_pkg}' + resp = urllib.request.urlopen(url) + data = json.loads(resp.read()) + versions = data.get('time', {}).keys() + return [v for v in versions if v not in _NPM_IGNORE_KEYS][::-1] + + +def _pypi_versions_for_package(pkg): + safe_pkg = urllib.parse.quote_plus(pkg) + url = f'https://pypi.org/pypi/{safe_pkg}/json' + resp = urllib.request.urlopen(url) + data = json.loads(resp.read()) + releases = data.get('releases', {}) + return [v for v, d in releases.items() if d][::-1] + + +def _rubygems_versions_for_package(pkg): + safe_pkg = urllib.parse.quote_plus(pkg) + url = f'https://rubygems.org/api/v1/versions/{safe_pkg}.json' + resp = urllib.request.urlopen(url) + data = json.loads(resp.read()) + return [v['number'] for v in data] + + +def _versions_for_package(ecosystem, pkg): + return { + 'npm': _npm_versions_for_package, + 'pypi': _pypi_versions_for_package, + 'rubygems': _rubygems_versions_for_package, + }[ecosystem](pkg) def _upload_file(local_path): @@ -56,24 +94,38 @@ def main(): parser.add_argument('-f', '--file', help='Local package file') parser.add_argument('-v', '--version', help='Package version') - parser.add_argument('-b', '--results', help='Results bucket (overrides default).') + parser.add_argument( + '-a', '--all', default=False, + action=argparse.BooleanOptionalAction, + help='Use all publised versions for the package') + parser.add_argument( + '-b', '--results', help='Results bucket (overrides default).') args = parser.parse_args() if args.file and (not args.name or not args.version): raise ValueError('Need to specify package name and version for local file.') + package_names = [] if args.list: with open(args.list) as f: - for line in f.readlines(): + package_names = [line.strip() for line in f.readlines() if line.strip()] + elif args.name: + package_names = [args.name] + + if not package_names: + raise ValueError('No package name found.') + + for package in package_names: + if args.all: + for version in _versions_for_package(args.ecosystem, package): _request( - line.strip(), args.ecosystem, args.version, + package, args.ecosystem, version, results_bucket=args.results) - - elif args.name: - _request( - args.name, args.ecosystem, args.version, local_file=args.file, - results_bucket=args.results) + else: + _request( + package, args.ecosystem, args.version, + results_bucket=args.results) if __name__ == '__main__':