Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update manylinux wheels download logic #1250

Merged
merged 12 commits into from
Aug 20, 2023
61 changes: 61 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,67 @@ def test_get_manylinux_python310(self):
self.assertTrue(os.path.isfile(path))
os.remove(path)

def test_verify_python37_does_not_download_2_24_manylinux_wheel(self):
sridhar562345 marked this conversation as resolved.
Show resolved Hide resolved
z = Zappa(runtime="python3.7")
cached_wheels_dir = os.path.join(tempfile.gettempdir(), "cached_wheels")
expected_wheel_path = os.path.join(
cached_wheels_dir, "cryptography-35.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.whl"
)

# Check with known manylinux wheel package
actual_wheel_path = z.get_cached_manylinux_wheel("cryptography", "35.0.0")
self.assertEqual(actual_wheel_path, expected_wheel_path)
os.remove(actual_wheel_path)

def test_verify_downloaded_manylinux_wheel(self):
z = Zappa(runtime="python3.10")
cached_wheels_dir = os.path.join(tempfile.gettempdir(), "cached_wheels")
expected_wheel_path = os.path.join(
cached_wheels_dir,
"pycryptodome-3.16.0-cp35-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl",
)

# check with a known manylinux wheel package
actual_wheel_path = z.get_cached_manylinux_wheel("pycryptodome", "3.16.0")
self.assertEqual(actual_wheel_path, expected_wheel_path)
os.remove(actual_wheel_path)

def test_verify_manylinux_filename_is_lowered(self):
z = Zappa(runtime="python3.10")
expected_filename = "markupsafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"

mock_package_data = {
"releases": {
"2.1.3": [
{
"url": "https://files.pythonhosted.org/packages/a6/56/f1d4ee39e898a9e63470cbb7fae1c58cce6874f25f54220b89213a47f273/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"filename": "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
},
{
"url": "https://files.pythonhosted.org/packages/12/b3/d9ed2c0971e1435b8a62354b18d3060b66c8cb1d368399ec0b9baa7c0ee5/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"filename": "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
},
{
"url": "https://files.pythonhosted.org/packages/bf/b7/c5ba9b7ad9ad21fc4a60df226615cf43ead185d328b77b0327d603d00cc5/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl",
"filename": "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl",
},
]
}
}

with mock.patch("zappa.core.requests.get") as mock_get:
mock_get.return_value.json.return_value = mock_package_data
wheel_url, file_name = z.get_manylinux_wheel_url("markupsafe", "2.1.3", ignore_cache=True)

self.assertEqual(file_name, expected_filename)
mock_get.assert_called_once_with(
"https://pypi.python.org/pypi/markupsafe/json", timeout=float(os.environ.get("PIP_TIMEOUT", 1.5))
)

# Clean the generated files
cached_pypi_info_dir = os.path.join(tempfile.gettempdir(), "cached_pypi_info")
os.remove(os.path.join(cached_pypi_info_dir, "markupsafe-2.1.3.json"))

def test_get_manylinux_python311(self):
z = Zappa(runtime="python3.11")
self.assertIsNotNone(z.get_cached_manylinux_wheel("psycopg2-binary", "2.9.7"))
Expand Down
58 changes: 37 additions & 21 deletions zappa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from builtins import bytes, int
from distutils.dir_util import copy_tree
from io import open
from pathlib import Path
from typing import Optional

import boto3
Expand Down Expand Up @@ -320,13 +321,17 @@ def __init__(
self.manylinux_suffix_start = "cp311"

# AWS Lambda supports manylinux1/2010, manylinux2014, and manylinux_2_24
manylinux_suffixes = ("_2_24", "2014", "2010", "1")
# Currently python3.7 lambda runtime does not support manylinux_2_24
# See https://github.com/zappa/Zappa/issues/1249 for more details
if self.runtime == "python3.7":
sridhar562345 marked this conversation as resolved.
Show resolved Hide resolved
self.manylinux_suffixes = ("2014", "2010", "1")
else:
self.manylinux_suffixes = ("_2_24", "2014", "2010", "1")

self.manylinux_wheel_file_match = re.compile(
rf'^.*{self.manylinux_suffix_start}-(manylinux_\d+_\d+_x86_64[.])?manylinux({"|".join(manylinux_suffixes)})_x86_64[.]whl$' # noqa: E501
)
self.manylinux_wheel_abi3_file_match = re.compile(
rf'^.*cp3.-abi3-manylinux({"|".join(manylinux_suffixes)})_x86_64.whl$'
rf'^.*{self.manylinux_suffix_start}-(manylinux_\d+_\d+_x86_64[.])?manylinux({"|".join(self.manylinux_suffixes)})_x86_64[.]whl$' # noqa: E501
)
self.manylinux_wheel_abi3_file_match = re.compile(rf"^.*cp3.-abi3-manylinux.*_x86_64[.]whl$")

self.endpoint_urls = endpoint_urls
self.xray_tracing = xray_tracing
Expand Down Expand Up @@ -922,19 +927,22 @@ def get_cached_manylinux_wheel(self, package_name, package_version, disable_prog
wheel_path = os.path.join(cached_wheels_dir, wheel_file)

for pathname in glob.iglob(wheel_path):
if re.match(self.manylinux_wheel_file_match, pathname) or re.match(
self.manylinux_wheel_abi3_file_match, pathname
):
print(f" - {package_name}=={package_version}: Using locally cached manylinux wheel")
if re.match(self.manylinux_wheel_file_match, pathname):
logger.info(f" - {package_name}=={package_version}: Using locally cached manylinux wheel")
return pathname
elif re.match(self.manylinux_wheel_abi3_file_match, pathname):
for manylinux_suffix in self.manylinux_suffixes:
if f"manylinux{manylinux_suffix}_x86_64" in pathname:
logger.info(f" - {package_name}=={package_version}: Using locally cached manylinux wheel")
return pathname

# The file is not cached, download it.
wheel_url, filename = self.get_manylinux_wheel_url(package_name, package_version)
if not wheel_url:
return None

wheel_path = os.path.join(cached_wheels_dir, filename)
print(f" - {package_name}=={package_version}: Downloading")
logger.info(f" - {package_name}=={package_version}: Downloading")
with open(wheel_path, "wb") as f:
self.download_url_with_progress(wheel_url, f, disable_progress)

Expand All @@ -943,7 +951,7 @@ def get_cached_manylinux_wheel(self, package_name, package_version, disable_prog

return wheel_path

def get_manylinux_wheel_url(self, package_name, package_version):
def get_manylinux_wheel_url(self, package_name, package_version, ignore_cache: bool = False):
"""
For a given package name, returns a link to the download URL,
else returns None.
Expand All @@ -954,27 +962,31 @@ def get_manylinux_wheel_url(self, package_name, package_version):
also caches the JSON file so that we don't have to poll Pypi
every time.
"""
cached_pypi_info_dir = os.path.join(tempfile.gettempdir(), "cached_pypi_info")
if not os.path.isdir(cached_pypi_info_dir):
cached_pypi_info_dir = Path(tempfile.gettempdir()) / "cached_pypi_info"
if not cached_pypi_info_dir.is_dir():
os.makedirs(cached_pypi_info_dir)

# Even though the metadata is for the package, we save it in a
# filename that includes the package's version. This helps in
# invalidating the cached file if the user moves to a different
# version of the package.
# Related: https://github.com/Miserlou/Zappa/issues/899
json_file = "{0!s}-{1!s}.json".format(package_name, package_version)
json_file_path = os.path.join(cached_pypi_info_dir, json_file)
if os.path.exists(json_file_path):
with open(json_file_path, "rb") as metafile:
data = None
json_file_name = "{0!s}-{1!s}.json".format(package_name, package_version)
json_file_path = cached_pypi_info_dir / json_file_name
if json_file_path.exists():
with json_file_path.open("rb") as metafile:
data = json.load(metafile)
else:

if not data or ignore_cache:
url = "https://pypi.python.org/pypi/{}/json".format(package_name)
try:
res = requests.get(url, timeout=float(os.environ.get("PIP_TIMEOUT", 1.5)))
data = res.json()
except Exception: # pragma: no cover
return None, None
with open(json_file_path, "wb") as metafile:

with json_file_path.open("wb") as metafile:
jsondata = json.dumps(data)
metafile.write(bytes(jsondata, "utf-8"))

Expand All @@ -984,9 +996,13 @@ def get_manylinux_wheel_url(self, package_name, package_version):

for f in data["releases"][package_version]:
if re.match(self.manylinux_wheel_file_match, f["filename"]):
return f["url"], f["filename"]
# Since we have already lowered package names in get_installed_packages
# manylinux caching is not working for packages with capital case in names like MarkupSafe
return f["url"], f["filename"].lower()
sridhar562345 marked this conversation as resolved.
Show resolved Hide resolved
elif re.match(self.manylinux_wheel_abi3_file_match, f["filename"]):
return f["url"], f["filename"]
for manylinux_suffix in self.manylinux_suffixes:
if f"manylinux{manylinux_suffix}_x86_64" in f["filename"]:
return f["url"], f["filename"].lower()
sridhar562345 marked this conversation as resolved.
Show resolved Hide resolved
return None, None

##
Expand Down
Loading