diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9344aae..2f2cf53 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,16 +30,23 @@ jobs: uses: mamba-org/provision-with-micromamba@v15 with: environment-file: false - + - name: Python ${{ matrix.python-version }} run: | micromamba create --name TEST python=${{ matrix.python-version }} pip --file requirements-dev.txt --channel conda-forge micromamba activate TEST + + - name: test w/o conda-forge-metadata + run: | + micromamba activate TEST + depfinder --help pip install -e . --force-reinstall + coverage run -m pytest -vrsx test.py - - name: test + - name: test w/ conda-forge-metadata run: | micromamba activate TEST + pip install -e .[conda-forge] --force-reinstall coverage run -m pytest -vrsx test.py - name: coverage @@ -47,4 +54,4 @@ jobs: micromamba activate TEST coverage report -m codecov - + diff --git a/depfinder/main.py b/depfinder/main.py index 99976d0..bd68d57 100644 --- a/depfinder/main.py +++ b/depfinder/main.py @@ -249,7 +249,7 @@ def simple_import_search_conda_forge_import_map(path_to_source_code, builtins=No for name, md in total_import.items(): total_imports[name].update(md) from .reports import report_conda_forge_names_from_import_map - imports, _, _ = report_conda_forge_names_from_import_map( + imports, _ = report_conda_forge_names_from_import_map( total_imports, builtin_modules=builtins, ignore=ignore ) return {k: sorted(list(v)) for k, v in imports.items()} @@ -288,7 +288,7 @@ def simple_import_to_pkg_map(path_to_source_code, builtins=None, ignore=None, cu for name, md in total_import.items(): total_imports[name].update(md) from .reports import report_conda_forge_names_from_import_map - _, _, import_to_artifact = report_conda_forge_names_from_import_map( + _, import_to_pkg = report_conda_forge_names_from_import_map( total_imports, builtin_modules=builtins, ignore=ignore ) - return import_to_artifact + return import_to_pkg diff --git a/depfinder/reports.py b/depfinder/reports.py index bc748f2..7026cd6 100644 --- a/depfinder/reports.py +++ b/depfinder/reports.py @@ -30,45 +30,15 @@ from __future__ import print_function, division, absolute_import import logging -import sys from concurrent.futures._base import as_completed from concurrent.futures.thread import ThreadPoolExecutor from fnmatch import fnmatch -from functools import lru_cache - -import requests from .stdliblist import builtin_modules as _builtin_modules from .utils import SKETCHY_TYPES_TABLE -logger = logging.getLogger('depfinder') - - -@lru_cache() -def _import_map_num_letters(): - req = requests.get( - 'https://raw.githubusercontent.com/regro/libcfgraph/master' - '/import_maps_meta.json') - req.raise_for_status() - return int(req.json()['num_letters']) - - -@lru_cache() -def _import_map_cache(import_first_letters): - req = requests.get( - f'https://raw.githubusercontent.com/regro/libcfgraph' - f'/master/import_maps/{import_first_letters.lower()}.json') - if not req.ok: - print('Request to {req_url} failed'.format(req_url=req.url)) - return {} - return {k: set(v['elements']) for k, v in req.json().items()} - -FILE_LISTING = requests.get('https://raw.githubusercontent.com/regro/libcfgraph/master/.file_listing.json').json() -# TODO: upstream this to libcfgraph so we just request it, so we reduce bandwidth requirements -ARTIFACT_TO_PKG = {v.split('/')[-1].rsplit('.', 1)[0]: v.split('/')[1] for v in FILE_LISTING if 'artifacts' in v} -hubs_auths = requests.get( - 'https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/ranked_hubs_authorities.json').json() +logger = logging.getLogger('depfinder') def extract_pkg_from_import(name): @@ -82,29 +52,25 @@ def extract_pkg_from_import(name): Returns ------- - + most_likely_pkg : str + The most likely conda-forge package. + import_to_pkg : dict mapping str to sets + A dict mapping the import name to a set of possible packages that supply that import. """ - num_letters = _import_map_num_letters() - original_name = name - while True: - try: - fllt = name[:min(len(name), num_letters)] - import_map = _import_map_cache(fllt) - supplying_artifacts = import_map[name] - except KeyError: - if '.' not in name: - return original_name, {}, {} - name = name.rsplit('.', 1)[0] - pass - else: - break - import_to_artifact = {name: supplying_artifacts} - # TODO: launder supplying_pkgs through centrality scoring so we have one thing - # but keep the rest for the more detailed reports - supplying_pkgs = {ARTIFACT_TO_PKG[k] for k in supplying_artifacts} - import_to_pkg = {name: supplying_pkgs} - - return next(iter(k for k in hubs_auths if k in supplying_pkgs), original_name), import_to_artifact, import_to_pkg + from conda_forge_metadata.autotick_bot import map_import_to_package + from conda_forge_metadata.libcfgraph import get_libcfgraph_pkgs_for_import + try: + supplying_pkgs, _ = get_libcfgraph_pkgs_for_import(name) + best_import = map_import_to_package(name) + except Exception: + logger.exception( + "could not get package name from conda-forge metadata " + f"for import {name} due to an error" + ) + supplying_pkgs = set() + best_import = name + import_to_pkg = {name: supplying_pkgs or set()} + return best_import, import_to_pkg def recursively_search_for_name(name, module_names): @@ -126,7 +92,6 @@ def report_conda_forge_names_from_import_map(total_imports, builtin_modules=None report_keys = ['required', 'questionable', 'builtin', 'questionable no match', 'required no match'] report = {k: set() for k in report_keys} import_to_pkg = {k: {} for k in report_keys} - import_to_artifact = {k: {} for k in report_keys} futures = {} with ThreadPoolExecutor() as pool: @@ -140,7 +105,7 @@ def report_conda_forge_names_from_import_map(total_imports, builtin_modules=None futures[future] = md for future in as_completed(futures): md = futures[future] - most_likely_pkg, _import_to_artifact, _import_to_pkg = future.result() + most_likely_pkg, _import_to_pkg = future.result() for (filename, lineno), import_metadata in md.items(): # Make certain to throw out imports, since an import can happen multiple times @@ -148,20 +113,21 @@ def report_conda_forge_names_from_import_map(total_imports, builtin_modules=None # but is questionable for a regular file if any(fnmatch(filename, ignore_element) for ignore_element in ignore): continue + _name = list(_import_to_pkg.keys())[0] if any(import_metadata.get(v, False) for v in SKETCHY_TYPES_TABLE.values()): # if we couldn't find any artifacts to represent this then it doesn't exist in our maps - if not _import_to_artifact: + if not _import_to_pkg[_name]: report_key = 'questionable no match' else: report_key = 'questionable' else: # if we couldn't find any artifacts to represent this then it doesn't exist in our maps - if not _import_to_artifact: + if not _import_to_pkg[_name]: report_key = 'required no match' else: report_key = 'required' report[report_key].add(most_likely_pkg) import_to_pkg[report_key].update(_import_to_pkg) - import_to_artifact[report_key].update(_import_to_artifact) - return report, import_to_artifact, import_to_pkg + + return report, import_to_pkg diff --git a/depfinder/utils.py b/depfinder/utils.py index e582c3c..d39caaf 100644 --- a/depfinder/utils.py +++ b/depfinder/utils.py @@ -6,9 +6,11 @@ import sys import requests +import requests.exceptions import yaml from .stdliblist import builtin_modules +logger = logging.getLogger("depfinder") SKETCHY_TYPES_TABLE = {} @@ -68,10 +70,14 @@ Loader=yaml_loader, ) -req = requests.get('https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/mappings/pypi/name_mapping.yaml') -if req.status_code == 200: - mapping_list = yaml.load(req.text, Loader=yaml_loader) -else: +try: + import conda_forge_metadata.autotick_bot + mapping_list = conda_forge_metadata.autotick_bot.get_pypi_name_mapping() +except (ImportError, AttributeError, requests.exceptions.HTTPError): + logger.exception( + "could not get the conda-forge metadata pypi-to-conda name mapping " + "due to error. defaulting to an internal one which may be out of date." + ) mapping_list = yaml.load( pkgutil.get_data(__name__, 'pkg_data/name_mapping.yml').decode(), Loader=yaml_loader, diff --git a/setup.cfg b/setup.cfg index 3587cb7..673f521 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,6 +20,7 @@ install_requires = pyyaml stdlib-list; python_version < "3.10" requests + python_requires = >=2.7 packages = find: @@ -27,5 +28,9 @@ packages = find: console_scripts = depfinder = depfinder.cli:cli +[options.extras_require] +conda-forge = + conda-forge-metadata>=0.3.0 + [flake8] max-line-length=300 diff --git a/test.py b/test.py index 1024942..8d7d406 100644 --- a/test.py +++ b/test.py @@ -21,6 +21,12 @@ from depfinder.reports import report_conda_forge_names_from_import_map, extract_pkg_from_import, \ recursively_search_for_name, _builtin_modules +try: + import conda_forge_metadata # noqa + HAS_CF_METADATA = True +except ImportError: + HAS_CF_METADATA = False + random.seed(12345) # Testing spec: @@ -449,19 +455,33 @@ def test_get_top_level_import(): assert top_level_name == 'google.cloud.storage' +@pytest.mark.skipif( + not HAS_CF_METADATA, + reason="test of optional conda-forge-metadata integration", +) def test_report_conda_forge_names_from_import_map(): m, f, c = parse_file(join(dirname(depfinder.__file__), 'utils.py')) - report, import_to_artifact, import_to_pkg = report_conda_forge_names_from_import_map(c.total_imports) + report, import_to_pkg = report_conda_forge_names_from_import_map(c.total_imports) assert report['required'] == {'pyyaml', 'requests'} +@pytest.mark.skipif( + not HAS_CF_METADATA, + reason="test of optional conda-forge-metadata integration", +) def test_report_conda_forge_names_from_import_map_ignore(): m, f, c = parse_file(join(dirname(depfinder.__file__), 'inspection.py')) - report, import_to_artifact, import_to_pkg = report_conda_forge_names_from_import_map(c.total_imports, - ignore=['*insp*']) + report, import_to_pkg = report_conda_forge_names_from_import_map( + c.total_imports, + ignore=['*insp*'], + ) assert report['required'] == set() +@pytest.mark.skipif( + not HAS_CF_METADATA, + reason="test of optional conda-forge-metadata integration", +) def test_simple_import_search_conda_forge_import_map(): path_to_source = dirname(depfinder.__file__) expected_result = sorted(list({"pyyaml", "requests"})) @@ -469,16 +489,22 @@ def test_simple_import_search_conda_forge_import_map(): assert report['required'] == expected_result +@pytest.mark.skipif( + not HAS_CF_METADATA, + reason="test of optional conda-forge-metadata integration", +) @pytest.mark.parametrize('import_name, expected_result', [ ('six.moves', 'six'), - ('win32com.shell', 'pywin32'), - ('win32com', 'pywin32'), + # these need special casing elsewhere + # ('win32com.shell', 'pywin32'), + # ('win32com', 'pywin32'), + ("scipy.interpolate", "scipy"), # this comes from cython but doesn't seem to be a real pkg ('refnanny.hi', 'refnanny.hi') ]) def test_extract_pkg_from_import_for_complex_imports(import_name, expected_result): - result, _, _ = extract_pkg_from_import(import_name) - assert result == expected_result + result, allpkgs = extract_pkg_from_import(import_name) + assert result == expected_result, allpkgs @pytest.mark.parametrize('import_name, expected_result', [ @@ -489,19 +515,43 @@ def test_search_for_name(import_name, expected_result): assert builtin_name_maybe == expected_result +@pytest.mark.skipif( + not HAS_CF_METADATA, + reason="test of optional conda-forge-metadata integration", +) def test_simple_import_to_pkg_map(): path_to_source = dirname(depfinder.__file__) import_to_artifact = simple_import_to_pkg_map(path_to_source) - expected_result = {'builtin': {}, - 'questionable': {'stdlib_list': {'stdlib-list'}, 'IPython.core.inputsplitter': {'ipython', 'autovizwidget'}}, - 'questionable no match': {}, - 'required': {'requests': {'apache-libcloud', - 'arm_pyart', - 'autovizwidget', - 'dbxfs', - 'google-api-core', - 'google-cloud-bigquery-storage-core', - 'requests'}, - 'yaml': {'google-cloud-bigquery-storage-core', 'pyyaml'}}, - 'required no match': {}} - assert import_to_artifact == expected_result \ No newline at end of file + expected_result = { + 'builtin': {}, + 'questionable': { + 'stdlib_list': {'stdlib-list'}, + 'IPython.core.inputsplitter': {'ipython', 'autovizwidget'}, + 'conda_forge_metadata.autotick_bot': {'conda-forge-metadata'}, + 'conda_forge_metadata.libcfgraph': {'conda-forge-metadata'}, + }, + 'questionable no match': {}, + 'required': { + 'requests': { + 'apache-libcloud', + 'arm_pyart', + 'autovizwidget', + 'dbxfs', + 'google-api-core', + 'google-cloud-bigquery-storage-core', + 'requests' + }, + 'requests.exceptions': { + 'apache-libcloud', + 'arm_pyart', + 'autovizwidget', + 'dbxfs', + 'google-api-core', + 'google-cloud-bigquery-storage-core', + 'requests' + }, + 'yaml': {'google-cloud-bigquery-storage-core', 'pyyaml', 'rosco'} + }, + 'required no match': {} + } + assert import_to_artifact == expected_result