Skip to content

Commit

Permalink
Merge pull request #93 from ericdill/new-maps
Browse files Browse the repository at this point in the history
REF update to new metadata package
  • Loading branch information
ericdill authored May 5, 2023
2 parents 0853610 + 729b4a3 commit d7dce34
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 89 deletions.
13 changes: 10 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,28 @@ jobs:
uses: mamba-org/provision-with-micromamba@v15
with:
environment-file: false

- name: Python ${{ matrix.python-version }}
run: |
micromamba create --name TEST python=${{ matrix.python-version }} pip --file requirements-dev.txt --channel conda-forge
micromamba activate TEST
- name: test w/o conda-forge-metadata
run: |
micromamba activate TEST
depfinder --help
pip install -e . --force-reinstall
coverage run -m pytest -vrsx test.py
- name: test
- name: test w/ conda-forge-metadata
run: |
micromamba activate TEST
pip install -e .[conda-forge] --force-reinstall
coverage run -m pytest -vrsx test.py
- name: coverage
run: |
micromamba activate TEST
coverage report -m
codecov
6 changes: 3 additions & 3 deletions depfinder/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def simple_import_search_conda_forge_import_map(path_to_source_code, builtins=No
for name, md in total_import.items():
total_imports[name].update(md)
from .reports import report_conda_forge_names_from_import_map
imports, _, _ = report_conda_forge_names_from_import_map(
imports, _ = report_conda_forge_names_from_import_map(
total_imports, builtin_modules=builtins, ignore=ignore
)
return {k: sorted(list(v)) for k, v in imports.items()}
Expand Down Expand Up @@ -288,7 +288,7 @@ def simple_import_to_pkg_map(path_to_source_code, builtins=None, ignore=None, cu
for name, md in total_import.items():
total_imports[name].update(md)
from .reports import report_conda_forge_names_from_import_map
_, _, import_to_artifact = report_conda_forge_names_from_import_map(
_, import_to_pkg = report_conda_forge_names_from_import_map(
total_imports, builtin_modules=builtins, ignore=ignore
)
return import_to_artifact
return import_to_pkg
84 changes: 25 additions & 59 deletions depfinder/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,45 +30,15 @@
from __future__ import print_function, division, absolute_import

import logging
import sys
from concurrent.futures._base import as_completed
from concurrent.futures.thread import ThreadPoolExecutor
from fnmatch import fnmatch
from functools import lru_cache

import requests

from .stdliblist import builtin_modules as _builtin_modules
from .utils import SKETCHY_TYPES_TABLE

logger = logging.getLogger('depfinder')


@lru_cache()
def _import_map_num_letters():
req = requests.get(
'https://raw.githubusercontent.com/regro/libcfgraph/master'
'/import_maps_meta.json')
req.raise_for_status()
return int(req.json()['num_letters'])


@lru_cache()
def _import_map_cache(import_first_letters):
req = requests.get(
f'https://raw.githubusercontent.com/regro/libcfgraph'
f'/master/import_maps/{import_first_letters.lower()}.json')
if not req.ok:
print('Request to {req_url} failed'.format(req_url=req.url))
return {}
return {k: set(v['elements']) for k, v in req.json().items()}


FILE_LISTING = requests.get('https://raw.githubusercontent.com/regro/libcfgraph/master/.file_listing.json').json()
# TODO: upstream this to libcfgraph so we just request it, so we reduce bandwidth requirements
ARTIFACT_TO_PKG = {v.split('/')[-1].rsplit('.', 1)[0]: v.split('/')[1] for v in FILE_LISTING if 'artifacts' in v}
hubs_auths = requests.get(
'https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/ranked_hubs_authorities.json').json()
logger = logging.getLogger('depfinder')


def extract_pkg_from_import(name):
Expand All @@ -82,29 +52,25 @@ def extract_pkg_from_import(name):
Returns
-------
most_likely_pkg : str
The most likely conda-forge package.
import_to_pkg : dict mapping str to sets
A dict mapping the import name to a set of possible packages that supply that import.
"""
num_letters = _import_map_num_letters()
original_name = name
while True:
try:
fllt = name[:min(len(name), num_letters)]
import_map = _import_map_cache(fllt)
supplying_artifacts = import_map[name]
except KeyError:
if '.' not in name:
return original_name, {}, {}
name = name.rsplit('.', 1)[0]
pass
else:
break
import_to_artifact = {name: supplying_artifacts}
# TODO: launder supplying_pkgs through centrality scoring so we have one thing
# but keep the rest for the more detailed reports
supplying_pkgs = {ARTIFACT_TO_PKG[k] for k in supplying_artifacts}
import_to_pkg = {name: supplying_pkgs}

return next(iter(k for k in hubs_auths if k in supplying_pkgs), original_name), import_to_artifact, import_to_pkg
from conda_forge_metadata.autotick_bot import map_import_to_package
from conda_forge_metadata.libcfgraph import get_libcfgraph_pkgs_for_import
try:
supplying_pkgs, _ = get_libcfgraph_pkgs_for_import(name)
best_import = map_import_to_package(name)
except Exception:
logger.exception(
"could not get package name from conda-forge metadata "
f"for import {name} due to an error"
)
supplying_pkgs = set()
best_import = name
import_to_pkg = {name: supplying_pkgs or set()}
return best_import, import_to_pkg


def recursively_search_for_name(name, module_names):
Expand All @@ -126,7 +92,6 @@ def report_conda_forge_names_from_import_map(total_imports, builtin_modules=None
report_keys = ['required', 'questionable', 'builtin', 'questionable no match', 'required no match']
report = {k: set() for k in report_keys}
import_to_pkg = {k: {} for k in report_keys}
import_to_artifact = {k: {} for k in report_keys}
futures = {}

with ThreadPoolExecutor() as pool:
Expand All @@ -140,28 +105,29 @@ def report_conda_forge_names_from_import_map(total_imports, builtin_modules=None
futures[future] = md
for future in as_completed(futures):
md = futures[future]
most_likely_pkg, _import_to_artifact, _import_to_pkg = future.result()
most_likely_pkg, _import_to_pkg = future.result()

for (filename, lineno), import_metadata in md.items():
# Make certain to throw out imports, since an import can happen multiple times
# under different situations, import matplotlib is required by a test file
# but is questionable for a regular file
if any(fnmatch(filename, ignore_element) for ignore_element in ignore):
continue
_name = list(_import_to_pkg.keys())[0]
if any(import_metadata.get(v, False) for v in SKETCHY_TYPES_TABLE.values()):
# if we couldn't find any artifacts to represent this then it doesn't exist in our maps
if not _import_to_artifact:
if not _import_to_pkg[_name]:
report_key = 'questionable no match'
else:
report_key = 'questionable'
else:
# if we couldn't find any artifacts to represent this then it doesn't exist in our maps
if not _import_to_artifact:
if not _import_to_pkg[_name]:
report_key = 'required no match'
else:
report_key = 'required'

report[report_key].add(most_likely_pkg)
import_to_pkg[report_key].update(_import_to_pkg)
import_to_artifact[report_key].update(_import_to_artifact)
return report, import_to_artifact, import_to_pkg

return report, import_to_pkg
14 changes: 10 additions & 4 deletions depfinder/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import sys

import requests
import requests.exceptions
import yaml
from .stdliblist import builtin_modules

logger = logging.getLogger("depfinder")

SKETCHY_TYPES_TABLE = {}

Expand Down Expand Up @@ -68,10 +70,14 @@
Loader=yaml_loader,
)

req = requests.get('https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/mappings/pypi/name_mapping.yaml')
if req.status_code == 200:
mapping_list = yaml.load(req.text, Loader=yaml_loader)
else:
try:
import conda_forge_metadata.autotick_bot
mapping_list = conda_forge_metadata.autotick_bot.get_pypi_name_mapping()
except (ImportError, AttributeError, requests.exceptions.HTTPError):
logger.exception(
"could not get the conda-forge metadata pypi-to-conda name mapping "
"due to error. defaulting to an internal one which may be out of date."
)
mapping_list = yaml.load(
pkgutil.get_data(__name__, 'pkg_data/name_mapping.yml').decode(),
Loader=yaml_loader,
Expand Down
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ install_requires =
pyyaml
stdlib-list; python_version < "3.10"
requests

python_requires = >=2.7
packages = find:

[options.entry_points]
console_scripts =
depfinder = depfinder.cli:cli

[options.extras_require]
conda-forge =
conda-forge-metadata>=0.3.0

[flake8]
max-line-length=300
90 changes: 70 additions & 20 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@
from depfinder.reports import report_conda_forge_names_from_import_map, extract_pkg_from_import, \
recursively_search_for_name, _builtin_modules

try:
import conda_forge_metadata # noqa
HAS_CF_METADATA = True
except ImportError:
HAS_CF_METADATA = False

random.seed(12345)

# Testing spec:
Expand Down Expand Up @@ -449,36 +455,56 @@ def test_get_top_level_import():
assert top_level_name == 'google.cloud.storage'


@pytest.mark.skipif(
not HAS_CF_METADATA,
reason="test of optional conda-forge-metadata integration",
)
def test_report_conda_forge_names_from_import_map():
m, f, c = parse_file(join(dirname(depfinder.__file__), 'utils.py'))
report, import_to_artifact, import_to_pkg = report_conda_forge_names_from_import_map(c.total_imports)
report, import_to_pkg = report_conda_forge_names_from_import_map(c.total_imports)
assert report['required'] == {'pyyaml', 'requests'}


@pytest.mark.skipif(
not HAS_CF_METADATA,
reason="test of optional conda-forge-metadata integration",
)
def test_report_conda_forge_names_from_import_map_ignore():
m, f, c = parse_file(join(dirname(depfinder.__file__), 'inspection.py'))
report, import_to_artifact, import_to_pkg = report_conda_forge_names_from_import_map(c.total_imports,
ignore=['*insp*'])
report, import_to_pkg = report_conda_forge_names_from_import_map(
c.total_imports,
ignore=['*insp*'],
)
assert report['required'] == set()


@pytest.mark.skipif(
not HAS_CF_METADATA,
reason="test of optional conda-forge-metadata integration",
)
def test_simple_import_search_conda_forge_import_map():
path_to_source = dirname(depfinder.__file__)
expected_result = sorted(list({"pyyaml", "requests"}))
report = simple_import_search_conda_forge_import_map(path_to_source)
assert report['required'] == expected_result


@pytest.mark.skipif(
not HAS_CF_METADATA,
reason="test of optional conda-forge-metadata integration",
)
@pytest.mark.parametrize('import_name, expected_result', [
('six.moves', 'six'),
('win32com.shell', 'pywin32'),
('win32com', 'pywin32'),
# these need special casing elsewhere
# ('win32com.shell', 'pywin32'),
# ('win32com', 'pywin32'),
("scipy.interpolate", "scipy"),
# this comes from cython but doesn't seem to be a real pkg
('refnanny.hi', 'refnanny.hi')
])
def test_extract_pkg_from_import_for_complex_imports(import_name, expected_result):
result, _, _ = extract_pkg_from_import(import_name)
assert result == expected_result
result, allpkgs = extract_pkg_from_import(import_name)
assert result == expected_result, allpkgs


@pytest.mark.parametrize('import_name, expected_result', [
Expand All @@ -489,19 +515,43 @@ def test_search_for_name(import_name, expected_result):
assert builtin_name_maybe == expected_result


@pytest.mark.skipif(
not HAS_CF_METADATA,
reason="test of optional conda-forge-metadata integration",
)
def test_simple_import_to_pkg_map():
path_to_source = dirname(depfinder.__file__)
import_to_artifact = simple_import_to_pkg_map(path_to_source)
expected_result = {'builtin': {},
'questionable': {'stdlib_list': {'stdlib-list'}, 'IPython.core.inputsplitter': {'ipython', 'autovizwidget'}},
'questionable no match': {},
'required': {'requests': {'apache-libcloud',
'arm_pyart',
'autovizwidget',
'dbxfs',
'google-api-core',
'google-cloud-bigquery-storage-core',
'requests'},
'yaml': {'google-cloud-bigquery-storage-core', 'pyyaml'}},
'required no match': {}}
assert import_to_artifact == expected_result
expected_result = {
'builtin': {},
'questionable': {
'stdlib_list': {'stdlib-list'},
'IPython.core.inputsplitter': {'ipython', 'autovizwidget'},
'conda_forge_metadata.autotick_bot': {'conda-forge-metadata'},
'conda_forge_metadata.libcfgraph': {'conda-forge-metadata'},
},
'questionable no match': {},
'required': {
'requests': {
'apache-libcloud',
'arm_pyart',
'autovizwidget',
'dbxfs',
'google-api-core',
'google-cloud-bigquery-storage-core',
'requests'
},
'requests.exceptions': {
'apache-libcloud',
'arm_pyart',
'autovizwidget',
'dbxfs',
'google-api-core',
'google-cloud-bigquery-storage-core',
'requests'
},
'yaml': {'google-cloud-bigquery-storage-core', 'pyyaml', 'rosco'}
},
'required no match': {}
}
assert import_to_artifact == expected_result

0 comments on commit d7dce34

Please sign in to comment.