Skip to content

Commit

Permalink
Change IVO parser to use from described in DMTN-302
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Dec 3, 2024
1 parent f42290d commit 8d88168
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 16 deletions.
31 changes: 21 additions & 10 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

__all__ = ["Butler"]

import os
import urllib.parse
import uuid
from abc import abstractmethod
Expand Down Expand Up @@ -550,30 +549,42 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
Notes
-----
Supports dataset URIs of the forms
``ivo://rubin.lsst/datasets?butler_label/UUID``
and ``butler://butler_label/UUID``. In ``ivo`` URIs the butler label
can include ``/`` and the trailing ``/`` before the UUID is always
stripped.
``ivo://org.rubinobs/usdac/dr1?repo=butler_label&id=UUID`` (see
DMTN-302) and ``butler://butler_label/UUID``. The ``butler`` URI is
deprecated and can not include ``/`` in the label string. ``ivo`` URIs
can include anything supported by the `Butler` constructor, including
paths to repositories and alias labels.
ivo://rubin.lsst/datasets?/repo/main/UUID
ivo://org.rubinobs/dr1?repo=/repo/main&id=UUID
will return a label of ``/repo/main``.
This method does not attempt to check that the dataset exists in the
labeled butler.
Since the IVOID can be issued by any publisher to represent a Butler
dataset there is no validation of the path or netloc component of the
URI. The only requirement is that there are ``id`` and ``repo`` keys
in the ``ivo`` URI query component.
"""
parsed = urllib.parse.urlparse(uri)
if parsed.scheme == "ivo":
# TODO: Validate netloc component.
if parsed.path != "/datasets":
raise ValueError(f"Unrecognized path in IVOID {uri}. Expected 'datasets' got {parsed.path!r}")
label, id_ = os.path.split(parsed.query)
# Do not validate the netloc or the path values.
qs = urllib.parse.parse_qs(parsed.query)
if "repo" not in qs or "id" not in qs:
raise ValueError(f"Missing 'repo' and/or 'id' query parameters in IVOID {uri}.")
if len(qs["repo"]) != 1 or len(qs["id"]) != 1:
raise ValueError(f"Butler IVOID only supports a single value of repo and id, got {uri}")
label = qs["repo"][0]
id_ = qs["id"][0]
elif parsed.scheme == "butler":
label = parsed.netloc
# Need to strip the leading /.
id_ = parsed.path[1:]
else:
raise ValueError(f"Unrecognized URI scheme: {uri!r}")
# Strip trailing/leading whitespace from label.
label = label.strip()
if not label:
raise ValueError(f"No butler repository label found in uri {uri!r}")
try:
Expand Down
15 changes: 9 additions & 6 deletions tests/test_simpleButler.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,10 +912,10 @@ def test_dataset_uris(self):
factory = butler_factory.bind(access_token=None)

for dataset_uri in (
f"ivo://rubin.lsst/datasets?{config_dir}/{ref.id}",
f"ivo://rubin.lsst/datasets?{config_dir}/butler.yaml/{ref.id}",
f"ivo://org.rubinobs/usdac/test?repo={config_dir}&id={ref.id}",
f"ivo://org.rubinobs/ukdac/lsst-dr1?repo={config_dir}/butler.yaml&id={ref.id}",
f"butler://{label}/{ref.id}",
f"ivo://rubin.lsst/datasets?{label}/{ref.id}",
f"ivo://org.rubinobs/usdac/lsst-dp1?repo={label}&id={ref.id}",
):
new_butler, ref2 = Butler.get_dataset_from_uri(dataset_uri)
self.assertEqual(ref, ref2)
Expand All @@ -939,11 +939,14 @@ def test_dataset_uris(self):
# Test some failure modes.
for dataset_uri in (
"butler://label/1234", # Bad UUID.
"butler://1234", # No label.
"butler://1234", # No UUID.
"butler:///1234", # No label.
"ivo://rubin/1234", # No query part and bad UUID and no label.
"ivo://rubin/datasets/dr1/82d79caa-0823-4300-9874-67b737367ee0", # No query part.
"ivo://rubin/datasets?dr1/1234", # Bad UUID.
"ivo://rubin.lsst/butler?dr1/82d79caa-0823-4300-9874-67b737367ee0", # Not datasets.
"ivo://org.rubinobs/datasets?repo=dr1&id=1234", # Bad UUID.
"ivo://org.rubinobs/butler?release=dr1&id=82d79caa-0823-4300-9874-67b737367ee0", # No repo key.
"ivo://org.rubinobs/butler?repo=dr1&repo=dr2&id=82d79caa-0823-4300-9874-67b737367ee0", # 2 vals.
"ivo://org.rubinobs/something?repo=%20&id=82d79caa-0823-4300-9874-67b737367ee0", # no repo.
"https://something.edu/1234", # Wrong scheme.
):
with self.assertRaises(ValueError):
Expand Down

0 comments on commit 8d88168

Please sign in to comment.