Skip to content

Commit

Permalink
Fixed Azure being unable to download ip ranges (#904)
Browse files Browse the repository at this point in the history
Fixed by 
1) passing in a user agent with the URL
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/113.0.0.0 Safari/537.36`
2) parsing the redirection url with regex and successfully downloading
the azure ip range json file.

---------

Co-authored-by: Brian Kim <[email protected]>
  • Loading branch information
s3kim2018 and Brian Kim authored Aug 7, 2023
1 parent 2aab284 commit 7cdc831
Show file tree
Hide file tree
Showing 20 changed files with 33 additions and 58 deletions.
4 changes: 0 additions & 4 deletions skyplane/api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,11 @@
from typing import TYPE_CHECKING, Optional

from skyplane.api.config import TransferConfig
from skyplane.api.dataplane import Dataplane
from skyplane.api.provisioner import Provisioner
from skyplane.api.obj_store import ObjectStore
from skyplane.api.usage import get_clientid
from skyplane.obj_store.object_store_interface import ObjectStoreInterface
from skyplane.planner.planner import MulticastDirectPlanner
from skyplane.utils import logger
from skyplane.utils.definitions import tmp_log_dir
from skyplane.utils.path import parse_path

from skyplane.api.pipeline import Pipeline

Expand Down
2 changes: 1 addition & 1 deletion skyplane/api/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass

from typing import Optional, List
from typing import Optional

from skyplane import compute

Expand Down
6 changes: 3 additions & 3 deletions skyplane/api/dataplane.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import os
import threading
from collections import defaultdict, Counter
from collections import defaultdict
from datetime import datetime
from functools import partial
from datetime import datetime
Expand All @@ -15,7 +15,7 @@
from skyplane import compute
from skyplane.exceptions import GatewayContainerStartException
from skyplane.api.tracker import TransferProgressTracker, TransferHook
from skyplane.api.transfer_job import CopyJob, SyncJob, TransferJob
from skyplane.api.transfer_job import TransferJob
from skyplane.api.config import TransferConfig
from skyplane.planner.topology import TopologyPlan, TopologyPlanGateway
from skyplane.utils import logger
Expand Down Expand Up @@ -223,7 +223,7 @@ def provision(
logger.fs.debug(f"[Dataplane.provision] Starting gateways on {len(jobs)} servers")
try:
do_parallel(lambda fn: fn(), jobs, n=-1, spinner=spinner, spinner_persist=spinner, desc="Starting gateway container on VMs")
except Exception as e:
except Exception:
self.copy_gateway_logs()
raise GatewayContainerStartException(f"Error starting gateways. Please check gateway logs {self.transfer_dir}")

Expand Down
16 changes: 4 additions & 12 deletions skyplane/api/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,19 @@
import json
import time
import os
import threading
from collections import defaultdict, Counter
from datetime import datetime
from functools import partial
from datetime import datetime

import nacl.secret
import nacl.utils
import urllib3
from typing import TYPE_CHECKING, Dict, List, Optional

from skyplane import compute
from skyplane.api.tracker import TransferProgressTracker, TransferHook
from skyplane.api.tracker import TransferProgressTracker
from skyplane.api.transfer_job import CopyJob, SyncJob, TransferJob
from skyplane.api.config import TransferConfig

from skyplane.planner.planner import MulticastDirectPlanner, DirectPlannerSourceOneSided, DirectPlannerDestOneSided
from skyplane.planner.topology import TopologyPlanGateway
from skyplane.utils import logger
from skyplane.utils.definitions import gateway_docker_image, tmp_log_dir
from skyplane.utils.fn import PathLike, do_parallel
from skyplane.utils.definitions import tmp_log_dir

from skyplane.api.dataplane import Dataplane

Expand Down Expand Up @@ -118,7 +110,7 @@ def start(self, debug=False, progress=False):
# copy gateway logs
if debug:
dp.copy_gateway_logs()
except Exception as e:
except Exception:
dp.copy_gateway_logs()
dp.deprovision(spinner=True)
return dp
Expand All @@ -131,7 +123,7 @@ def start_async(self, debug=False):
if debug:
dp.copy_gateway_logs()
return tracker
except Exception as e:
except Exception:
dp.copy_gateway_logs()
return

Expand Down
6 changes: 2 additions & 4 deletions skyplane/api/tracker.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import functools
from pprint import pprint
import json
import time
from abc import ABC
Expand Down Expand Up @@ -221,7 +219,7 @@ def monitor_single_dst_helper(dst_region):
}
self.hooks.on_transfer_end()

start_time = int(time.time())
int(time.time())
try:
for job in self.jobs.values():
logger.fs.debug(f"[TransferProgressTracker] Finalizing job {job.uuid}")
Expand All @@ -236,7 +234,7 @@ def monitor_single_dst_helper(dst_region):
session_start_timestamp_ms,
)
raise e
end_time = int(time.time())
int(time.time())

# verify transfer
try:
Expand Down
8 changes: 4 additions & 4 deletions skyplane/api/transfer_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@
from dataclasses import dataclass, field
from queue import Queue

from abc import ABC, abstractmethod
from abc import ABC
from typing import TYPE_CHECKING, Callable, Generator, List, Optional, Tuple, TypeVar, Dict

from abc import ABC, abstractmethod
from abc import ABC

import urllib3
from rich import print as rprint
from functools import partial

from skyplane import exceptions
from skyplane.api.config import TransferConfig
from skyplane.chunk import Chunk, ChunkRequest
from skyplane.chunk import Chunk
from skyplane.obj_store.storage_interface import StorageInterface
from skyplane.obj_store.object_store_interface import ObjectStoreObject, ObjectStoreInterface
from skyplane.utils import logger
Expand Down Expand Up @@ -624,7 +624,7 @@ def dispatch(
src_gateways = dataplane.source_gateways()
queue_size = [0] * len(src_gateways)
n_multiparts = 0
start = time.time()
time.time()

for batch in batches:
# send upload_id mappings to sink gateways
Expand Down
3 changes: 1 addition & 2 deletions skyplane/cli/cli_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
from skyplane.api.usage import UsageClient
from skyplane.config import SkyplaneConfig
from skyplane.config_paths import cloud_config, config_path
from skyplane.obj_store.object_store_interface import ObjectStoreInterface, StorageInterface
from skyplane.obj_store.file_system_interface import FileSystemInterface
from skyplane.obj_store.object_store_interface import StorageInterface
from skyplane.cli.impl.progress_bar import ProgressBarTransferHook
from skyplane.utils import logger
from skyplane.utils.definitions import GB, format_bytes
Expand Down
2 changes: 1 addition & 1 deletion skyplane/cli/impl/progress_bar.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, DownloadColumn, TransferSpeedColumn, TimeRemainingColumn
from skyplane import exceptions
from skyplane.chunk import Chunk
from skyplane.cli.impl.common import console, print_stats_completed
from skyplane.cli.impl.common import console
from skyplane.utils.definitions import format_bytes
from skyplane.api.tracker import TransferHook

Expand Down
1 change: 0 additions & 1 deletion skyplane/compute/azure/azure_auth.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import os
import subprocess
import re

from typing import Dict, List, Optional

Expand Down
3 changes: 0 additions & 3 deletions skyplane/compute/server.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import logging
from pprint import pprint
import os
import socket
from contextlib import closing
Expand All @@ -14,12 +13,10 @@
from skyplane import compute
from skyplane.compute.const_cmds import make_autoshutdown_script, make_dozzle_command, make_sysctl_tcp_tuning_command
from skyplane.config_paths import config_path, cloud_config, __config_root__
from skyplane.gateway.gateway_program import GatewayProgram
from skyplane.utils import logger
from skyplane.utils.fn import PathLike, wait_for
from skyplane.utils.retry import retry_backoff
from skyplane.utils.timer import Timer
from skyplane.planner.topology import TopologyPlanGateway

tmp_log_dir = Path("/tmp/skyplane")

Expand Down
2 changes: 1 addition & 1 deletion skyplane/gateway/gateway_daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from multiprocessing import Event, Queue
from os import PathLike
from pathlib import Path
from typing import Dict, List, Optional
from typing import Dict, List

from skyplane.utils import logger

Expand Down
3 changes: 1 addition & 2 deletions skyplane/gateway/gateway_daemon_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from multiprocessing.managers import DictProxy
from queue import Empty
from traceback import TracebackException
from typing import Dict, List, Tuple, Optional
import json
from typing import Dict, List, Tuple
from flask import Flask, jsonify, request
from werkzeug.serving import make_server

Expand Down
3 changes: 0 additions & 3 deletions skyplane/gateway/gateway_onprem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import psutil
from multiprocessing import Process

# TODO: migrate to programmable gateways
# from skyplane.gateway.gateway_sender import GatewaySender
#
Expand Down
2 changes: 1 addition & 1 deletion skyplane/gateway/gateway_program.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, List, Tuple
from typing import Optional, List
import json
from collections import defaultdict

Expand Down
1 change: 0 additions & 1 deletion skyplane/obj_store/azure_blob_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from skyplane.obj_store.azure_storage_account_interface import AzureStorageAccountInterface
from skyplane.obj_store.object_store_interface import ObjectStoreInterface, ObjectStoreObject
from skyplane.utils import logger, imports
from azure.storage.blob import ContentSettings


MAX_BLOCK_DIGITS = 5
Expand Down
1 change: 0 additions & 1 deletion skyplane/obj_store/file_system_interface.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from dataclasses import dataclass
from typing import Any, Iterator, List, Optional
from skyplane.obj_store.storage_interface import StorageInterface
import os


@dataclass
Expand Down
3 changes: 1 addition & 2 deletions skyplane/obj_store/object_store_interface.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from dataclasses import dataclass

from typing import Any, Iterator, List, Optional, Tuple
from typing import Any, List, Optional, Tuple

from skyplane.obj_store.storage_interface import StorageInterface
from skyplane.utils import logger


@dataclass
Expand Down
11 changes: 2 additions & 9 deletions skyplane/obj_store/r2_interface.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
import base64
import hashlib
import os
import boto3
from functools import lru_cache

from typing import Iterator, List, Optional, Tuple
from typing import Iterator

from skyplane import exceptions, compute
from skyplane.exceptions import NoSuchObjectException
from skyplane.obj_store.s3_interface import S3Object, S3Interface
from skyplane.config_paths import cloud_config
from skyplane.utils import logger, imports
from skyplane.utils.generator import batch_generator

from skyplane.config_paths import config_path
from skyplane.config import SkyplaneConfig
Expand All @@ -35,7 +28,7 @@ def __init__(self, account_id: str, bucket_name: str):
aws_secret_access_key=self.config.cloudflare_secret_access_key,
region_name="auto", # explicity set region, otherwise may be read from AWS boto3 env
)
except Exception as e:
except Exception:
raise ValueError("Error with connecting to {self.endpoint_url}: {e}")
self.requester_pays = False
self.bucket_name = bucket_name
Expand Down
4 changes: 2 additions & 2 deletions skyplane/planner/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ def plan(self, jobs: List[TransferJob]) -> TopologyPlan:
dst_prefix = dst_prefixes[i]
dst_region_tag = dst_iface.region_tag()
dst_bucket = dst_iface.bucket()
dst_gateways = plan.get_region_gateways(dst_region_tag)
plan.get_region_gateways(dst_region_tag)

# special case where destination is same region as source
src_program.add_operator(
Expand Down Expand Up @@ -476,7 +476,7 @@ def plan(self, jobs: List[TransferJob]) -> TopologyPlan:
dst_prefix = dst_prefixes[i]
dst_region_tag = dst_iface.region_tag()
dst_bucket = dst_iface.bucket()
dst_gateways = plan.get_region_gateways(dst_region_tag)
plan.get_region_gateways(dst_region_tag)

# source region gateway program
obj_store_read = dst_program[dst_region_tag].add_operator(
Expand Down
10 changes: 9 additions & 1 deletion skyplane/utils/networking_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@ def get_cloud_region(ip: str, provider: str = "aws") -> str:
if re.match(prefix["ip_prefix"], ip):
return prefix["region"]
elif provider == "azure":
region = requests.get(f"https://www.microsoft.com/en-us/download/confirmation.aspx?id=56519").json()
user_agent = {
"User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95"
}
body = requests.get(f"https://www.microsoft.com/en-us/download/confirmation.aspx?id=56519", headers=user_agent).content
matches = re.search(b'downloadretry" href="([^"]*)"', body)
if matches is None:
return default_region[provider]
region_url = matches.groups(0)[0]
region = requests.get(region_url).json()
for prefix in region["values"]:
if re.match(prefix["properties"]["addressPrefix"], ip):
return prefix["properties"]["region"]
Expand Down

0 comments on commit 7cdc831

Please sign in to comment.