Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added basic support for temporary urls #13

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 45 additions & 7 deletions swiftspec/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,21 +139,52 @@ async def set_session(self):
return self._session

def get_tokens_from_env(self):
auth = []
token = os.environ.get("OS_AUTH_TOKEN")
url = os.environ.get("OS_STORAGE_URL")
if token and url:
return [{"token": token, "url": url}]
else:
return []
auth.append({"token": token, "url": url})

sig = os.environ.get("TEMP_URL_SIG")
expire = os.environ.get("TEMP_URL_EXPIRES")
prf = os.environ.get("TEMP_URL_PREFIX")

if url and sig and expire:
if prf:
auth.append(
{
"url": url,
"temp_url_sig": sig,
"temp_url_expires": expire,
"temp_url_prefix": prf,
}
)
else:
auth.append(
{"url": url, "temp_url_sig": sig, "temp_url_expires": expire}
)
return auth

def headers_for_url(self, url):
headers = {}
for auth in self.auth:
if url.startswith(auth["url"]):
if url.startswith(auth["url"]) and "token" in auth:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change probably shouldn't be there in this form. The loop over all the auth methods should stop as soon as there is a match on the url. It might then be the case, that for the matched auth method, the headers should be empty (because it's params-based).

(it's the same issue for the loop in params_for_url)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if you have a url AND a token, that should always be preferred, right? however I do not understand why token should not be checked.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@d70-t i do not understand how to decide what to take unless using the check for token

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thinking was that auth would be a list of authentication methods and the rule would be to apply the first method which matches on the requested url. Along this line, checking for token would be a way of skipping the first entry matching the url (but we don't want to skip).

At some point, I thought if auth might be better implemented as a dict, but that doesn't support prefix-matching, thus it became a list.

Copy link
Collaborator

@d70-t d70-t Jul 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@d70-t i do not understand how to decide what to take unless using the check for token

It would be up to the method creating the auth list, to ensure only the right option (token or temporary) is part of the list.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made a draft of this line of thought here. It passes the unit tests, but as (unfortunately) authentication isn't yet covered by tests, I don't know yet if it really works.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds all good. I think and hope your draft does the same as my code so i dont care what implementation we use. if there are other params or header options that we want to include someday it could be nice to already have separate functions for each. Also, if the authentication does not care about providing two ways (params and headers), we could just provide both.

headers["X-Auth-Token"] = auth["token"]
break
return headers

def params_for_url(self, url):
params = {}
for auth in self.auth:
if (
url.startswith(auth["url"])
and "temp_url_sig" in auth
and "temp_url_expires" in auth
):
params = {k: v for k, v in auth.items if k.startswith("temp_url_")}
break
return params

@classmethod
def _strip_protocol(cls, path):
"""For SWIFT, we always want to keep the full URL"""
Expand All @@ -167,6 +198,8 @@ async def _ls(self, path, detail=True, **kwargs):
"format": "json",
}
url = f"https://{ref.host}/v1/{ref.account}"
params.update(self.params_for_url(url))

async with session.get(
url, params=params, headers=self.headers_for_url(url)
) as res:
Expand All @@ -193,6 +226,8 @@ async def _ls(self, path, detail=True, **kwargs):
"prefix": prefix,
}
url = f"https://{ref.host}/v1/{ref.account}/{ref.container}"
params.update(self.params_for_url(url))

async with session.get(
url, params=params, headers=self.headers_for_url(url)
) as res:
Expand Down Expand Up @@ -222,6 +257,7 @@ def _raise_not_found_for_status(self, response, ref):
async def _cat_file(self, path, start=None, end=None, **kwargs):
ref = SWIFTRef(path)
headers = self.headers_for_url(ref.http_url)
params = self.params_for_url(ref.http_url)
if start is not None:
assert start >= 0
if end is not None:
Expand All @@ -235,7 +271,7 @@ async def _cat_file(self, path, start=None, end=None, **kwargs):
headers["Range"] = f"bytes=0-{end}"

session = await self.set_session()
async with session.get(ref.http_url, headers=headers) as res:
async with session.get(ref.http_url, params=params, headers=headers) as res:
self._raise_not_found_for_status(res, ref)
return await res.read()

Expand All @@ -251,13 +287,14 @@ async def _pipe_file(self, path, data, chunksize=50 * 2**20, **kwargs):

url = ref.http_url
headers = self.headers_for_url(url)
params = self.params_for_url(url)
headers["Content-Length"] = str(size)
if self.verify_uploads:
# in swift, ETag is alwas the MD5sum and will be used by the server to verify the upload
headers["ETag"] = md5(data).hexdigest()

session = await self.set_session()
async with session.put(url, data=data, headers=headers) as res:
async with session.put(url, data=data, params=params, headers=headers) as res:
res.raise_for_status()

async def _rm_file(self, path, missing_is_ok=False, **kwargs):
Expand Down Expand Up @@ -325,8 +362,9 @@ async def _info(self, path, **kwargs):
"size": None,
}
headers = self.headers_for_url(ref.http_url)
params = self.params_for_url(ref.http_url)
session = await self.set_session()
async with session.head(ref.http_url, headers=headers) as res:
async with session.head(ref.http_url, params=params, headers=headers) as res:
if res.status != 200:
raise FileNotFoundError(f"file '{ref.swift_url}' not found")
info = {
Expand Down