Skip to content

Commit

Permalink
Extend analysis_runner.py for backfilling versions for a given package (
Browse files Browse the repository at this point in the history
#193)

This helps with experimentation/testing by having access to a range of versions for a given package.
  • Loading branch information
calebbrown authored Dec 8, 2021
1 parent aa58a54 commit 977d123
Showing 1 changed file with 60 additions and 8 deletions.
68 changes: 60 additions & 8 deletions tools/analysis/analysis_runner.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,51 @@
# Analysis runner.
import argparse
import json
import os
import subprocess
import urllib.parse
import urllib.request

_TOPIC = os.getenv(
'OSSMALWARE_WORKER_TOPIC',
'gcppubsub://projects/ossf-malware-analysis/topics/workers')
_PACKAGES_BUCKET = os.getenv(
'OSSF_MALWARE_ANALYSIS_PACKAGES', 'gs://ossf-malware-analysis-packages')
_NPM_IGNORE_KEYS = ('modified', 'created')


def _npm_versions_for_package(pkg):
safe_pkg = urllib.parse.quote_plus(pkg)
url = f'https://registry.npmjs.com/{safe_pkg}'
resp = urllib.request.urlopen(url)
data = json.loads(resp.read())
versions = data.get('time', {}).keys()
return [v for v in versions if v not in _NPM_IGNORE_KEYS][::-1]


def _pypi_versions_for_package(pkg):
safe_pkg = urllib.parse.quote_plus(pkg)
url = f'https://pypi.org/pypi/{safe_pkg}/json'
resp = urllib.request.urlopen(url)
data = json.loads(resp.read())
releases = data.get('releases', {})
return [v for v, d in releases.items() if d][::-1]


def _rubygems_versions_for_package(pkg):
safe_pkg = urllib.parse.quote_plus(pkg)
url = f'https://rubygems.org/api/v1/versions/{safe_pkg}.json'
resp = urllib.request.urlopen(url)
data = json.loads(resp.read())
return [v['number'] for v in data]


def _versions_for_package(ecosystem, pkg):
return {
'npm': _npm_versions_for_package,
'pypi': _pypi_versions_for_package,
'rubygems': _rubygems_versions_for_package,
}[ecosystem](pkg)


def _upload_file(local_path):
Expand Down Expand Up @@ -56,24 +94,38 @@ def main():

parser.add_argument('-f', '--file', help='Local package file')
parser.add_argument('-v', '--version', help='Package version')
parser.add_argument('-b', '--results', help='Results bucket (overrides default).')
parser.add_argument(
'-a', '--all', default=False,
action=argparse.BooleanOptionalAction,
help='Use all publised versions for the package')
parser.add_argument(
'-b', '--results', help='Results bucket (overrides default).')

args = parser.parse_args()

if args.file and (not args.name or not args.version):
raise ValueError('Need to specify package name and version for local file.')

package_names = []
if args.list:
with open(args.list) as f:
for line in f.readlines():
package_names = [line.strip() for line in f.readlines() if line.strip()]
elif args.name:
package_names = [args.name]

if not package_names:
raise ValueError('No package name found.')

for package in package_names:
if args.all:
for version in _versions_for_package(args.ecosystem, package):
_request(
line.strip(), args.ecosystem, args.version,
package, args.ecosystem, version,
results_bucket=args.results)

elif args.name:
_request(
args.name, args.ecosystem, args.version, local_file=args.file,
results_bucket=args.results)
else:
_request(
package, args.ecosystem, args.version,
results_bucket=args.results)


if __name__ == '__main__':
Expand Down

0 comments on commit 977d123

Please sign in to comment.