From 7cdc831d24a045f50c2c9f12d7c8791aee4e5fce Mon Sep 17 00:00:00 2001 From: Brian Kim Date: Tue, 8 Aug 2023 03:12:01 +0900 Subject: [PATCH] Fixed Azure being unable to download ip ranges (#904) Fixed by 1) passing in a user agent with the URL `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36` 2) parsing the redirection url with regex and successfully downloading the azure ip range json file. --------- Co-authored-by: Brian Kim --- skyplane/api/client.py | 4 ---- skyplane/api/config.py | 2 +- skyplane/api/dataplane.py | 6 +++--- skyplane/api/pipeline.py | 16 ++++------------ skyplane/api/tracker.py | 6 ++---- skyplane/api/transfer_job.py | 8 ++++---- skyplane/cli/cli_transfer.py | 3 +-- skyplane/cli/impl/progress_bar.py | 2 +- skyplane/compute/azure/azure_auth.py | 1 - skyplane/compute/server.py | 3 --- skyplane/gateway/gateway_daemon.py | 2 +- skyplane/gateway/gateway_daemon_api.py | 3 +-- skyplane/gateway/gateway_onprem.py | 3 --- skyplane/gateway/gateway_program.py | 2 +- skyplane/obj_store/azure_blob_interface.py | 1 - skyplane/obj_store/file_system_interface.py | 1 - skyplane/obj_store/object_store_interface.py | 3 +-- skyplane/obj_store/r2_interface.py | 11 ++--------- skyplane/planner/planner.py | 4 ++-- skyplane/utils/networking_tools.py | 10 +++++++++- 20 files changed, 33 insertions(+), 58 deletions(-) diff --git a/skyplane/api/client.py b/skyplane/api/client.py index d931e6625..7f0bb1437 100644 --- a/skyplane/api/client.py +++ b/skyplane/api/client.py @@ -5,15 +5,11 @@ from typing import TYPE_CHECKING, Optional from skyplane.api.config import TransferConfig -from skyplane.api.dataplane import Dataplane from skyplane.api.provisioner import Provisioner from skyplane.api.obj_store import ObjectStore from skyplane.api.usage import get_clientid -from skyplane.obj_store.object_store_interface import ObjectStoreInterface -from skyplane.planner.planner import MulticastDirectPlanner from skyplane.utils import logger from skyplane.utils.definitions import tmp_log_dir -from skyplane.utils.path import parse_path from skyplane.api.pipeline import Pipeline diff --git a/skyplane/api/config.py b/skyplane/api/config.py index 4e4979815..b65e820ac 100644 --- a/skyplane/api/config.py +++ b/skyplane/api/config.py @@ -1,6 +1,6 @@ from dataclasses import dataclass -from typing import Optional, List +from typing import Optional from skyplane import compute diff --git a/skyplane/api/dataplane.py b/skyplane/api/dataplane.py index df6fab67b..c8253a922 100644 --- a/skyplane/api/dataplane.py +++ b/skyplane/api/dataplane.py @@ -1,7 +1,7 @@ import json import os import threading -from collections import defaultdict, Counter +from collections import defaultdict from datetime import datetime from functools import partial from datetime import datetime @@ -15,7 +15,7 @@ from skyplane import compute from skyplane.exceptions import GatewayContainerStartException from skyplane.api.tracker import TransferProgressTracker, TransferHook -from skyplane.api.transfer_job import CopyJob, SyncJob, TransferJob +from skyplane.api.transfer_job import TransferJob from skyplane.api.config import TransferConfig from skyplane.planner.topology import TopologyPlan, TopologyPlanGateway from skyplane.utils import logger @@ -223,7 +223,7 @@ def provision( logger.fs.debug(f"[Dataplane.provision] Starting gateways on {len(jobs)} servers") try: do_parallel(lambda fn: fn(), jobs, n=-1, spinner=spinner, spinner_persist=spinner, desc="Starting gateway container on VMs") - except Exception as e: + except Exception: self.copy_gateway_logs() raise GatewayContainerStartException(f"Error starting gateways. Please check gateway logs {self.transfer_dir}") diff --git a/skyplane/api/pipeline.py b/skyplane/api/pipeline.py index 19662f139..6fa3face4 100644 --- a/skyplane/api/pipeline.py +++ b/skyplane/api/pipeline.py @@ -1,27 +1,19 @@ -import json -import time -import os import threading -from collections import defaultdict, Counter from datetime import datetime -from functools import partial from datetime import datetime -import nacl.secret -import nacl.utils import urllib3 from typing import TYPE_CHECKING, Dict, List, Optional from skyplane import compute -from skyplane.api.tracker import TransferProgressTracker, TransferHook +from skyplane.api.tracker import TransferProgressTracker from skyplane.api.transfer_job import CopyJob, SyncJob, TransferJob from skyplane.api.config import TransferConfig from skyplane.planner.planner import MulticastDirectPlanner, DirectPlannerSourceOneSided, DirectPlannerDestOneSided from skyplane.planner.topology import TopologyPlanGateway from skyplane.utils import logger -from skyplane.utils.definitions import gateway_docker_image, tmp_log_dir -from skyplane.utils.fn import PathLike, do_parallel +from skyplane.utils.definitions import tmp_log_dir from skyplane.api.dataplane import Dataplane @@ -118,7 +110,7 @@ def start(self, debug=False, progress=False): # copy gateway logs if debug: dp.copy_gateway_logs() - except Exception as e: + except Exception: dp.copy_gateway_logs() dp.deprovision(spinner=True) return dp @@ -131,7 +123,7 @@ def start_async(self, debug=False): if debug: dp.copy_gateway_logs() return tracker - except Exception as e: + except Exception: dp.copy_gateway_logs() return diff --git a/skyplane/api/tracker.py b/skyplane/api/tracker.py index 1354b90b1..0439dc5a0 100644 --- a/skyplane/api/tracker.py +++ b/skyplane/api/tracker.py @@ -1,5 +1,3 @@ -import functools -from pprint import pprint import json import time from abc import ABC @@ -221,7 +219,7 @@ def monitor_single_dst_helper(dst_region): } self.hooks.on_transfer_end() - start_time = int(time.time()) + int(time.time()) try: for job in self.jobs.values(): logger.fs.debug(f"[TransferProgressTracker] Finalizing job {job.uuid}") @@ -236,7 +234,7 @@ def monitor_single_dst_helper(dst_region): session_start_timestamp_ms, ) raise e - end_time = int(time.time()) + int(time.time()) # verify transfer try: diff --git a/skyplane/api/transfer_job.py b/skyplane/api/transfer_job.py index ecfbaa086..0155b7c17 100644 --- a/skyplane/api/transfer_job.py +++ b/skyplane/api/transfer_job.py @@ -13,10 +13,10 @@ from dataclasses import dataclass, field from queue import Queue -from abc import ABC, abstractmethod +from abc import ABC from typing import TYPE_CHECKING, Callable, Generator, List, Optional, Tuple, TypeVar, Dict -from abc import ABC, abstractmethod +from abc import ABC import urllib3 from rich import print as rprint @@ -24,7 +24,7 @@ from skyplane import exceptions from skyplane.api.config import TransferConfig -from skyplane.chunk import Chunk, ChunkRequest +from skyplane.chunk import Chunk from skyplane.obj_store.storage_interface import StorageInterface from skyplane.obj_store.object_store_interface import ObjectStoreObject, ObjectStoreInterface from skyplane.utils import logger @@ -624,7 +624,7 @@ def dispatch( src_gateways = dataplane.source_gateways() queue_size = [0] * len(src_gateways) n_multiparts = 0 - start = time.time() + time.time() for batch in batches: # send upload_id mappings to sink gateways diff --git a/skyplane/cli/cli_transfer.py b/skyplane/cli/cli_transfer.py index 0f4b29f6e..c1d6620fd 100644 --- a/skyplane/cli/cli_transfer.py +++ b/skyplane/cli/cli_transfer.py @@ -21,8 +21,7 @@ from skyplane.api.usage import UsageClient from skyplane.config import SkyplaneConfig from skyplane.config_paths import cloud_config, config_path -from skyplane.obj_store.object_store_interface import ObjectStoreInterface, StorageInterface -from skyplane.obj_store.file_system_interface import FileSystemInterface +from skyplane.obj_store.object_store_interface import StorageInterface from skyplane.cli.impl.progress_bar import ProgressBarTransferHook from skyplane.utils import logger from skyplane.utils.definitions import GB, format_bytes diff --git a/skyplane/cli/impl/progress_bar.py b/skyplane/cli/impl/progress_bar.py index 41d41a5ac..36fb8f1d8 100644 --- a/skyplane/cli/impl/progress_bar.py +++ b/skyplane/cli/impl/progress_bar.py @@ -3,7 +3,7 @@ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, DownloadColumn, TransferSpeedColumn, TimeRemainingColumn from skyplane import exceptions from skyplane.chunk import Chunk -from skyplane.cli.impl.common import console, print_stats_completed +from skyplane.cli.impl.common import console from skyplane.utils.definitions import format_bytes from skyplane.api.tracker import TransferHook diff --git a/skyplane/compute/azure/azure_auth.py b/skyplane/compute/azure/azure_auth.py index fdb7f023a..30e90f335 100644 --- a/skyplane/compute/azure/azure_auth.py +++ b/skyplane/compute/azure/azure_auth.py @@ -1,7 +1,6 @@ import json import os import subprocess -import re from typing import Dict, List, Optional diff --git a/skyplane/compute/server.py b/skyplane/compute/server.py index c4f9aacff..585773105 100644 --- a/skyplane/compute/server.py +++ b/skyplane/compute/server.py @@ -1,6 +1,5 @@ import json import logging -from pprint import pprint import os import socket from contextlib import closing @@ -14,12 +13,10 @@ from skyplane import compute from skyplane.compute.const_cmds import make_autoshutdown_script, make_dozzle_command, make_sysctl_tcp_tuning_command from skyplane.config_paths import config_path, cloud_config, __config_root__ -from skyplane.gateway.gateway_program import GatewayProgram from skyplane.utils import logger from skyplane.utils.fn import PathLike, wait_for from skyplane.utils.retry import retry_backoff from skyplane.utils.timer import Timer -from skyplane.planner.topology import TopologyPlanGateway tmp_log_dir = Path("/tmp/skyplane") diff --git a/skyplane/gateway/gateway_daemon.py b/skyplane/gateway/gateway_daemon.py index 06ef3e1ad..08e6562df 100644 --- a/skyplane/gateway/gateway_daemon.py +++ b/skyplane/gateway/gateway_daemon.py @@ -9,7 +9,7 @@ from multiprocessing import Event, Queue from os import PathLike from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List from skyplane.utils import logger diff --git a/skyplane/gateway/gateway_daemon_api.py b/skyplane/gateway/gateway_daemon_api.py index d05de75c9..9a462fac8 100644 --- a/skyplane/gateway/gateway_daemon_api.py +++ b/skyplane/gateway/gateway_daemon_api.py @@ -7,8 +7,7 @@ from multiprocessing.managers import DictProxy from queue import Empty from traceback import TracebackException -from typing import Dict, List, Tuple, Optional -import json +from typing import Dict, List, Tuple from flask import Flask, jsonify, request from werkzeug.serving import make_server diff --git a/skyplane/gateway/gateway_onprem.py b/skyplane/gateway/gateway_onprem.py index 9bd0e1d01..af6ffd0a2 100644 --- a/skyplane/gateway/gateway_onprem.py +++ b/skyplane/gateway/gateway_onprem.py @@ -1,6 +1,3 @@ -import psutil -from multiprocessing import Process - # TODO: migrate to programmable gateways # from skyplane.gateway.gateway_sender import GatewaySender # diff --git a/skyplane/gateway/gateway_program.py b/skyplane/gateway/gateway_program.py index f275b82e8..c27427fd9 100644 --- a/skyplane/gateway/gateway_program.py +++ b/skyplane/gateway/gateway_program.py @@ -1,4 +1,4 @@ -from typing import Optional, List, Tuple +from typing import Optional, List import json from collections import defaultdict diff --git a/skyplane/obj_store/azure_blob_interface.py b/skyplane/obj_store/azure_blob_interface.py index a55a6fc8c..f7a63cd3d 100644 --- a/skyplane/obj_store/azure_blob_interface.py +++ b/skyplane/obj_store/azure_blob_interface.py @@ -10,7 +10,6 @@ from skyplane.obj_store.azure_storage_account_interface import AzureStorageAccountInterface from skyplane.obj_store.object_store_interface import ObjectStoreInterface, ObjectStoreObject from skyplane.utils import logger, imports -from azure.storage.blob import ContentSettings MAX_BLOCK_DIGITS = 5 diff --git a/skyplane/obj_store/file_system_interface.py b/skyplane/obj_store/file_system_interface.py index e04c8530c..563498b20 100644 --- a/skyplane/obj_store/file_system_interface.py +++ b/skyplane/obj_store/file_system_interface.py @@ -1,7 +1,6 @@ from dataclasses import dataclass from typing import Any, Iterator, List, Optional from skyplane.obj_store.storage_interface import StorageInterface -import os @dataclass diff --git a/skyplane/obj_store/object_store_interface.py b/skyplane/obj_store/object_store_interface.py index cf8d7260b..5f25cbd3d 100644 --- a/skyplane/obj_store/object_store_interface.py +++ b/skyplane/obj_store/object_store_interface.py @@ -1,9 +1,8 @@ from dataclasses import dataclass -from typing import Any, Iterator, List, Optional, Tuple +from typing import Any, List, Optional, Tuple from skyplane.obj_store.storage_interface import StorageInterface -from skyplane.utils import logger @dataclass diff --git a/skyplane/obj_store/r2_interface.py b/skyplane/obj_store/r2_interface.py index bdb1c679d..f17ede03d 100644 --- a/skyplane/obj_store/r2_interface.py +++ b/skyplane/obj_store/r2_interface.py @@ -1,17 +1,10 @@ -import base64 -import hashlib import os import boto3 -from functools import lru_cache -from typing import Iterator, List, Optional, Tuple +from typing import Iterator -from skyplane import exceptions, compute -from skyplane.exceptions import NoSuchObjectException from skyplane.obj_store.s3_interface import S3Object, S3Interface -from skyplane.config_paths import cloud_config from skyplane.utils import logger, imports -from skyplane.utils.generator import batch_generator from skyplane.config_paths import config_path from skyplane.config import SkyplaneConfig @@ -35,7 +28,7 @@ def __init__(self, account_id: str, bucket_name: str): aws_secret_access_key=self.config.cloudflare_secret_access_key, region_name="auto", # explicity set region, otherwise may be read from AWS boto3 env ) - except Exception as e: + except Exception: raise ValueError("Error with connecting to {self.endpoint_url}: {e}") self.requester_pays = False self.bucket_name = bucket_name diff --git a/skyplane/planner/planner.py b/skyplane/planner/planner.py index 6bfb5233a..2bb21e0ac 100644 --- a/skyplane/planner/planner.py +++ b/skyplane/planner/planner.py @@ -420,7 +420,7 @@ def plan(self, jobs: List[TransferJob]) -> TopologyPlan: dst_prefix = dst_prefixes[i] dst_region_tag = dst_iface.region_tag() dst_bucket = dst_iface.bucket() - dst_gateways = plan.get_region_gateways(dst_region_tag) + plan.get_region_gateways(dst_region_tag) # special case where destination is same region as source src_program.add_operator( @@ -476,7 +476,7 @@ def plan(self, jobs: List[TransferJob]) -> TopologyPlan: dst_prefix = dst_prefixes[i] dst_region_tag = dst_iface.region_tag() dst_bucket = dst_iface.bucket() - dst_gateways = plan.get_region_gateways(dst_region_tag) + plan.get_region_gateways(dst_region_tag) # source region gateway program obj_store_read = dst_program[dst_region_tag].add_operator( diff --git a/skyplane/utils/networking_tools.py b/skyplane/utils/networking_tools.py index 040c11550..647b4867f 100644 --- a/skyplane/utils/networking_tools.py +++ b/skyplane/utils/networking_tools.py @@ -23,7 +23,15 @@ def get_cloud_region(ip: str, provider: str = "aws") -> str: if re.match(prefix["ip_prefix"], ip): return prefix["region"] elif provider == "azure": - region = requests.get(f"https://www.microsoft.com/en-us/download/confirmation.aspx?id=56519").json() + user_agent = { + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95" + } + body = requests.get(f"https://www.microsoft.com/en-us/download/confirmation.aspx?id=56519", headers=user_agent).content + matches = re.search(b'downloadretry" href="([^"]*)"', body) + if matches is None: + return default_region[provider] + region_url = matches.groups(0)[0] + region = requests.get(region_url).json() for prefix in region["values"]: if re.match(prefix["properties"]["addressPrefix"], ip): return prefix["properties"]["region"]