Skip to content

Commit

Permalink
update manylinux wheels download logic (#1250)
Browse files Browse the repository at this point in the history
* update manylinux wheels download logic

* add tests for updated manylinux logic

* Remove incorrect return statement from get_manylinux_wheel_url

* convert the manylinux file names to lowercase in get_manylinux_wheel_url

* add test to verify manylinux filenames are lowered

* change print to logger output

* mock requests.get in test_verify_manylinux_filename_is_lowered

* apply black

* fix mock.patch reference

* 🔧 add `ignore_cache` option for ease of testing.
✅ update testcase to use `ignore_cache`

* 🎨 fix flake8

---------

Co-authored-by: monkut <[email protected]>
Co-authored-by: shane <[email protected]>
  • Loading branch information
3 people authored Aug 20, 2023
1 parent 26d6182 commit f2f03ba
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 21 deletions.
61 changes: 61 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,67 @@ def test_get_manylinux_python310(self):
self.assertTrue(os.path.isfile(path))
os.remove(path)

def test_verify_python37_does_not_download_2_24_manylinux_wheel(self):
z = Zappa(runtime="python3.7")
cached_wheels_dir = os.path.join(tempfile.gettempdir(), "cached_wheels")
expected_wheel_path = os.path.join(
cached_wheels_dir, "cryptography-35.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.whl"
)

# Check with known manylinux wheel package
actual_wheel_path = z.get_cached_manylinux_wheel("cryptography", "35.0.0")
self.assertEqual(actual_wheel_path, expected_wheel_path)
os.remove(actual_wheel_path)

def test_verify_downloaded_manylinux_wheel(self):
z = Zappa(runtime="python3.10")
cached_wheels_dir = os.path.join(tempfile.gettempdir(), "cached_wheels")
expected_wheel_path = os.path.join(
cached_wheels_dir,
"pycryptodome-3.16.0-cp35-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl",
)

# check with a known manylinux wheel package
actual_wheel_path = z.get_cached_manylinux_wheel("pycryptodome", "3.16.0")
self.assertEqual(actual_wheel_path, expected_wheel_path)
os.remove(actual_wheel_path)

def test_verify_manylinux_filename_is_lowered(self):
z = Zappa(runtime="python3.10")
expected_filename = "markupsafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"

mock_package_data = {
"releases": {
"2.1.3": [
{
"url": "https://files.pythonhosted.org/packages/a6/56/f1d4ee39e898a9e63470cbb7fae1c58cce6874f25f54220b89213a47f273/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"filename": "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
},
{
"url": "https://files.pythonhosted.org/packages/12/b3/d9ed2c0971e1435b8a62354b18d3060b66c8cb1d368399ec0b9baa7c0ee5/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"filename": "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
},
{
"url": "https://files.pythonhosted.org/packages/bf/b7/c5ba9b7ad9ad21fc4a60df226615cf43ead185d328b77b0327d603d00cc5/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl",
"filename": "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl",
},
]
}
}

with mock.patch("zappa.core.requests.get") as mock_get:
mock_get.return_value.json.return_value = mock_package_data
wheel_url, file_name = z.get_manylinux_wheel_url("markupsafe", "2.1.3", ignore_cache=True)

self.assertEqual(file_name, expected_filename)
mock_get.assert_called_once_with(
"https://pypi.python.org/pypi/markupsafe/json", timeout=float(os.environ.get("PIP_TIMEOUT", 1.5))
)

# Clean the generated files
cached_pypi_info_dir = os.path.join(tempfile.gettempdir(), "cached_pypi_info")
os.remove(os.path.join(cached_pypi_info_dir, "markupsafe-2.1.3.json"))

def test_get_manylinux_python311(self):
z = Zappa(runtime="python3.11")
self.assertIsNotNone(z.get_cached_manylinux_wheel("psycopg2-binary", "2.9.7"))
Expand Down
58 changes: 37 additions & 21 deletions zappa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from builtins import bytes, int
from distutils.dir_util import copy_tree
from io import open
from pathlib import Path
from typing import Optional

import boto3
Expand Down Expand Up @@ -320,13 +321,17 @@ def __init__(
self.manylinux_suffix_start = "cp311"

# AWS Lambda supports manylinux1/2010, manylinux2014, and manylinux_2_24
manylinux_suffixes = ("_2_24", "2014", "2010", "1")
# Currently python3.7 lambda runtime does not support manylinux_2_24
# See https://github.com/zappa/Zappa/issues/1249 for more details
if self.runtime == "python3.7":
self.manylinux_suffixes = ("2014", "2010", "1")
else:
self.manylinux_suffixes = ("_2_24", "2014", "2010", "1")

self.manylinux_wheel_file_match = re.compile(
rf'^.*{self.manylinux_suffix_start}-(manylinux_\d+_\d+_x86_64[.])?manylinux({"|".join(manylinux_suffixes)})_x86_64[.]whl$' # noqa: E501
)
self.manylinux_wheel_abi3_file_match = re.compile(
rf'^.*cp3.-abi3-manylinux({"|".join(manylinux_suffixes)})_x86_64.whl$'
rf'^.*{self.manylinux_suffix_start}-(manylinux_\d+_\d+_x86_64[.])?manylinux({"|".join(self.manylinux_suffixes)})_x86_64[.]whl$' # noqa: E501
)
self.manylinux_wheel_abi3_file_match = re.compile(rf"^.*cp3.-abi3-manylinux.*_x86_64[.]whl$")

self.endpoint_urls = endpoint_urls
self.xray_tracing = xray_tracing
Expand Down Expand Up @@ -922,19 +927,22 @@ def get_cached_manylinux_wheel(self, package_name, package_version, disable_prog
wheel_path = os.path.join(cached_wheels_dir, wheel_file)

for pathname in glob.iglob(wheel_path):
if re.match(self.manylinux_wheel_file_match, pathname) or re.match(
self.manylinux_wheel_abi3_file_match, pathname
):
print(f" - {package_name}=={package_version}: Using locally cached manylinux wheel")
if re.match(self.manylinux_wheel_file_match, pathname):
logger.info(f" - {package_name}=={package_version}: Using locally cached manylinux wheel")
return pathname
elif re.match(self.manylinux_wheel_abi3_file_match, pathname):
for manylinux_suffix in self.manylinux_suffixes:
if f"manylinux{manylinux_suffix}_x86_64" in pathname:
logger.info(f" - {package_name}=={package_version}: Using locally cached manylinux wheel")
return pathname

# The file is not cached, download it.
wheel_url, filename = self.get_manylinux_wheel_url(package_name, package_version)
if not wheel_url:
return None

wheel_path = os.path.join(cached_wheels_dir, filename)
print(f" - {package_name}=={package_version}: Downloading")
logger.info(f" - {package_name}=={package_version}: Downloading")
with open(wheel_path, "wb") as f:
self.download_url_with_progress(wheel_url, f, disable_progress)

Expand All @@ -943,7 +951,7 @@ def get_cached_manylinux_wheel(self, package_name, package_version, disable_prog

return wheel_path

def get_manylinux_wheel_url(self, package_name, package_version):
def get_manylinux_wheel_url(self, package_name, package_version, ignore_cache: bool = False):
"""
For a given package name, returns a link to the download URL,
else returns None.
Expand All @@ -954,27 +962,31 @@ def get_manylinux_wheel_url(self, package_name, package_version):
also caches the JSON file so that we don't have to poll Pypi
every time.
"""
cached_pypi_info_dir = os.path.join(tempfile.gettempdir(), "cached_pypi_info")
if not os.path.isdir(cached_pypi_info_dir):
cached_pypi_info_dir = Path(tempfile.gettempdir()) / "cached_pypi_info"
if not cached_pypi_info_dir.is_dir():
os.makedirs(cached_pypi_info_dir)

# Even though the metadata is for the package, we save it in a
# filename that includes the package's version. This helps in
# invalidating the cached file if the user moves to a different
# version of the package.
# Related: https://github.com/Miserlou/Zappa/issues/899
json_file = "{0!s}-{1!s}.json".format(package_name, package_version)
json_file_path = os.path.join(cached_pypi_info_dir, json_file)
if os.path.exists(json_file_path):
with open(json_file_path, "rb") as metafile:
data = None
json_file_name = "{0!s}-{1!s}.json".format(package_name, package_version)
json_file_path = cached_pypi_info_dir / json_file_name
if json_file_path.exists():
with json_file_path.open("rb") as metafile:
data = json.load(metafile)
else:

if not data or ignore_cache:
url = "https://pypi.python.org/pypi/{}/json".format(package_name)
try:
res = requests.get(url, timeout=float(os.environ.get("PIP_TIMEOUT", 1.5)))
data = res.json()
except Exception: # pragma: no cover
return None, None
with open(json_file_path, "wb") as metafile:

with json_file_path.open("wb") as metafile:
jsondata = json.dumps(data)
metafile.write(bytes(jsondata, "utf-8"))

Expand All @@ -984,9 +996,13 @@ def get_manylinux_wheel_url(self, package_name, package_version):

for f in data["releases"][package_version]:
if re.match(self.manylinux_wheel_file_match, f["filename"]):
return f["url"], f["filename"]
# Since we have already lowered package names in get_installed_packages
# manylinux caching is not working for packages with capital case in names like MarkupSafe
return f["url"], f["filename"].lower()
elif re.match(self.manylinux_wheel_abi3_file_match, f["filename"]):
return f["url"], f["filename"]
for manylinux_suffix in self.manylinux_suffixes:
if f"manylinux{manylinux_suffix}_x86_64" in f["filename"]:
return f["url"], f["filename"].lower()
return None, None

##
Expand Down

0 comments on commit f2f03ba

Please sign in to comment.