conda-incubator · scopatz · Jul 11, 2020 · Jul 11, 2020 · Jul 12, 2020 · Jul 12, 2020
diff --git a/conda_docker/cli.py b/conda_docker/cli.py
@@ -2,14 +2,15 @@
 import logging
 import argparse
 
-from conda_docker.conda import (
+from .conda_models import Context
+from .conda import (
     build_docker_environment,
     find_user_conda,
     conda_info,
     find_precs,
     fetch_precs,
 )
-from conda_docker.logging import init_logging
+from .logging import init_logging
 
 
 def cli(args):
@@ -89,6 +90,7 @@ def handle_conda_build(args):
     channels = info.get("channels", [])
     conda_default_channels = info.get("conda_default_channels", [])
     channels_remap = info.get("channels_remap", [])
+    context = Context()
     precs = find_precs(
         user_conda,
         download_dir,
@@ -99,6 +101,7 @@ def handle_conda_build(args):
         prefix=args.prefix,
         package_specs=args.package_specs,
         solver=args.solver,
+        context=context,
     )
     records = fetch_precs(download_dir, precs)
     # now build image

diff --git a/conda_docker/conda.py b/conda_docker/conda.py
@@ -1,5 +1,5 @@
 """Interface for finding, grabbing, and installing conda pakcages into docker image"""
-# Significant portions of this file were originally forked from conda constuctor
+# Significant portions of this file were originally forked from conda & conda constuctor
 # (c) 2016 Anaconda, Inc. / https://anaconda.com
 # constructor is distributed under the terms of the BSD 3-clause license.
 import os
@@ -11,32 +11,23 @@
 import tempfile
 import subprocess
 
-from conda.exports import download
+import requests
+from requests import ConnectionError, HTTPError
+from requests.exceptions import (
+    InvalidSchema,
+    SSLError,
+    ProxyError as RequestsProxyError,
+)
 
-try:
-    from conda import __version__ as CONDA_INTERFACE_VERSION
-
-    conda_interface_type = "conda"
-except ImportError:
-    raise RuntimeError(
-        "Conda must be installed for python interpreter\n"
-        f"with sys.prefix: {sys.prefix}"
-    )
-from conda.models.channel import all_channel_urls
-
-try:
-    from conda.models.records import PackageCacheRecord
-except ImportError:
-    from conda.models.package_cache_record import PackageCacheRecord
-from conda.models.dist import Dist
-
-from conda_docker.docker.base import Image
-from conda_docker.registry.client import pull_image
-from conda_docker.utils import timer, md5_files
+from .docker.base import Image
+from .registry.client import pull_image
+from .utils import timer, md5_files
+from .download import download, disable_ssl_verify_warning, join_url
+from .conda_models import Context, all_channel_urls, PackageCacheRecord, Dist
 
 
 LOGGER = logging.getLogger(__name__)
-CONDA_MAJOR_MINOR = tuple(int(x) for x in CONDA_INTERFACE_VERSION.split(".")[:2])
+REPODATA_FN = "repodata.json"
 
 
 def conda_file_filter(trim_static_libs=True, trim_js_maps=True):
@@ -67,42 +58,197 @@ def get_final_url(channels_remap, url):
     return url
 
 
-def get_repodata(url):
-    """Obtain the repodata from a channel URL"""
-    if CONDA_MAJOR_MINOR >= (4, 5):
-        from conda.core.subdir_data import fetch_repodata_remote_request
+def _ensure_text_type(value):
+    if hasattr(value, "decode"):
+        return value.decode("utf-8")
+    return value
 
-        raw_repodata_str = fetch_repodata_remote_request(url, None, None)
-    elif CONDA_MAJOR_MINOR >= (4, 4):
-        from conda.core.repodata import fetch_repodata_remote_request
 
-        raw_repodata_str = fetch_repodata_remote_request(url, None, None)
-    elif CONDA_MAJOR_MINOR >= (4, 3):
-        from conda.core.repodata import fetch_repodata_remote_request
+def _maybe_decompress(filename, resp_content):
+    if filename.endswith(".bz2"):
+        import bz2
 
-        repodata_obj = fetch_repodata_remote_request(None, url, None, None)
-        raw_repodata_str = json.dumps(repodata_obj)
-    else:
-        raise NotImplementedError(
-            f"unsupported version of conda: {CONDA_INTERFACE_VERSION}"
+        resp_content = bz2.decompress(resp_content)
+    return _ensure_text_type(resp_content).strip()
+
+
+def _add_http_value_to_dict(resp, http_key, d, dict_key):
+    value = resp.headers.get(http_key)
+    if value:
+        d[dict_key] = value
+
+
+def fetch_repodata_remote_request(
+    url,
+    etag=None,
+    mod_stamp=None,
+    repodata_fn=REPODATA_FN,
+    ssl_verify=True,
+    remote_connect_timeout_secs=9.15,
+    remote_read_timeout_secs=60.0,
+    proxies=None,
+    context=None,
+):
+    """Get raw repodata string"""
+    if not ssl_verify:
+        disable_ssl_verify_warning()
+
+    headers = {}
+    if etag:
+        headers["If-None-Match"] = etag
+    if mod_stamp:
+        headers["If-Modified-Since"] = mod_stamp
+
+    headers["Accept-Encoding"] = "gzip, deflate, compress, identity"
+    headers["Content-Type"] = "application/json"
+    filename = repodata_fn
+
+    try:
+        timeout = remote_connect_timeout_secs, remote_read_timeout_secs
+        resp = requests.get(
+            join_url(url, filename), headers=headers, proxies=proxies, timeout=timeout
         )
+        if LOGGER.isEnabledFor(logging.DEBUG):
+            LOGGER.debug(str(resp)[:256])
+        resp.raise_for_status()
+    except RequestsProxyError:
+        raise
+    except InvalidSchema as e:
+        if "SOCKS" in str(e):
+            message = (
+                "Requests has identified that your current working environment is configured "
+                "to use a SOCKS proxy, but pysocks is not installed. To proceed, remove your "
+                "proxy configuration, run `conda install pysocks`, and then you can re-enable "
+                "your proxy configuration."
+            )
+            raise RuntimeError(message)
+        else:
+            raise
+    except (ConnectionError, HTTPError, SSLError) as e:
+        # status_code might not exist on SSLError
+        status_code = getattr(e.response, "status_code", None)
+        if status_code in (403, 404):
+            if not url.endswith("/noarch"):
+                LOGGER.info(
+                    "Unable to retrieve repodata (response: %d) for %s",
+                    status_code,
+                    url + "/" + repodata_fn,
+                )
+                return None
+            else:
+                if context is None:
+                    context = Context()
+                if context.allow_non_channel_urls:
+                    LOGGER.warning(
+                        "Unable to retrieve repodata (response: %d) for %s",
+                        status_code,
+                        url + "/" + repodata_fn,
+                    )
+                    return None
+                else:
+                    raise
+        elif status_code == 401:
+            raise
+        elif status_code is not None and 500 <= status_code < 600:
+            help_message = (
+                "A remote server error occurred when trying to retrieve this URL. "
+                "A 500-type error (e.g. 500, 501, 502, 503, etc.) indicates the server failed to "
+                "fulfill a valid request. The problem may be spurious, and will resolve itself if you "
+                "try your request again. If the problem persists, consider notifying the maintainer "
+                "of the remote server."
+            )
+
+        else:
+            if url.startswith("https://repo.anaconda.com/"):
+                help_message = (
+                    "An HTTP error occurred when trying to retrieve this URL. "
+                    "HTTP errors are often intermittent, and a simple retry will get you on your way. "
+                    "If your current network has https://www.anaconda.com blocked, please file "
+                    "a support request with your network engineering team. "
+                    f"{url}"
+                )
+            else:
+                help_message = (
+                    "An HTTP error occurred when trying to retrieve this URL. "
+                    "HTTP errors are often intermittent, and a simple retry will get you on your way. "
+                    f"{url}"
+                )
+        raise HTTPError(
+            help_message,
+            join_url(url, filename),
+            status_code,
+            getattr(e.response, "reason", None),
+            getattr(e.response, "elapsed", None),
+            e.response,
+            caused_by=e,
+        )
+
+    if resp.status_code == 304:
+        raise RuntimeError("Response 304: Content Unchanged")
+
+    json_str = _maybe_decompress(filename, resp.content)
+
+    saved_fields = {"_url": url}
+    _add_http_value_to_dict(resp, "Etag", saved_fields, "_etag")
+    _add_http_value_to_dict(resp, "Last-Modified", saved_fields, "_mod")
+    _add_http_value_to_dict(resp, "Cache-Control", saved_fields, "_cache_control")
+
+    # add extra values to the raw repodata json
+    if json_str and json_str != "{}":
+        raw_repodata_str = "{0}, {1}".format(
+            json.dumps(saved_fields)[:-1],  # remove trailing '}'
+            json_str[1:],  # remove first '{'
+        )
+    else:
+        raw_repodata_str = _ensure_text_type(json.dumps(saved_fields))
+    return raw_repodata_str
+
+
+def get_repodata(
+    url,
+    ssl_verify=True,
+    remote_connect_timeout_secs=9.15,
+    remote_read_timeout_secs=60.0,
+    proxies=None,
+    context=None,
+):
+    """Obtain the repodata from a channel URL"""
+    if context is None:
+        context = Context()
+    raw_repodata_str = fetch_repodata_remote_request(
+        url,
+        ssl_verify=ssl_verify,
+        remote_connect_timeout_secs=remote_connect_timeout_secs,
+        remote_read_timeout_secs=remote_read_timeout_secs,
+        proxies=proxies,
+        context=context,
+    )
     full_repodata = json.loads(raw_repodata_str)
     return full_repodata
 
 
 def load_repodatas(
-    download_dir, channels=(), conda_default_channels=(), channels_remap=()
+    download_dir,
+    channels=(),
+    conda_default_channels=(),
+    channels_remap=(),
+    context=None,
 ):
     """Load all repodatas into a single dict"""
+    if context is None:
+        context = Context()
     cache_dir = os.path.join(download_dir, "cache")
     os.makedirs(cache_dir, exist_ok=True)
 
     remaps = {url["src"].rstrip("/"): url["dest"].rstrip("/") for url in channels_remap}
     urls = all_channel_urls(
-        url.rstrip("/")
-        for url in list(remaps) + list(channels) + list(conda_default_channels)
+        list(
+            url.rstrip("/")
+            for url in list(remaps) + list(channels) + list(conda_default_channels)
+        ),
+        context=context,
     )
-    repodatas = {url: get_repodata(url) for url in urls}
+    repodatas = {url: get_repodata(url, context=context) for url in urls}
     return repodatas
 
 
@@ -174,10 +320,13 @@ def precs_from_package_specs(
     channels=(),
     conda_default_channels=(),
     channels_remap=(),
+    context=None,
 ):
     """Get the package records from a list of package names/specs, as you
     might type them in on the command line. This has to perform a solve.
     """
+    if context is None:
+        context = Context()
     # perform solve
     solver_conda = find_solver_conda(solver, user_conda)
     LOGGER.info("solving conda environment")
@@ -197,7 +346,10 @@ def precs_from_package_specs(
     with timer(LOGGER, "loading repodata"):
         used_channels = {f"{x['base_url']}/{x['platform']}" for x in listing}
         repodatas = load_repodatas(
-            download_dir, channels=used_channels, channels_remap=channels_remap,
+            download_dir,
+            channels=used_channels,
+            channels_remap=channels_remap,
+            context=context,
         )
 
     # now, create PackageCacheRecords
@@ -235,12 +387,15 @@ def find_precs(
     channels=(),
     conda_default_channels=(),
     channels_remap=(),
+    context=None,
 ):
     if name is not None:
         precs = precs_from_environment_name(name, download_dir, user_conda)
     elif prefix is not None:
         precs = precs_from_environment_prefix(prefix, download_dir, user_conda)
     elif package_specs is not None:
+        if context is None:
+            context = Context()
         precs = precs_from_package_specs(
             package_specs,
             solver,
@@ -249,6 +404,7 @@ def find_precs(
             channels=channels,
             conda_default_channels=conda_default_channels,
             channels_remap=channels_remap,
+            context=context,
         )
     else:
         raise RuntimeError("could not determine package list")
@@ -297,15 +453,9 @@ def fetch_precs(download_dir, precs):
             LOGGER.info(f"fetching: {prec.fn}")
             download(prec.url, os.path.join(download_dir, prec.fn))
 
-        if not os.path.isdir(extracted_package_dir):
-            from conda.gateways.disk.create import extract_tarball
-
-            extract_tarball(package_tarball_full_path, extracted_package_dir)
-
-        repodata_record_path = os.path.join(
-            extracted_package_dir, "info", "repodata_record.json"
-        )
-
+        info_dir = os.path.join(extracted_package_dir, "info")
+        os.makedirs(info_dir, exist_ok=True)
+        repodata_record_path = os.path.join(info_dir, "repodata_record.json")
         with open(repodata_record_path, "w") as fh:
             json.dump(prec.dump(), fh, indent=2, sort_keys=True, separators=(",", ": "))