diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 27d55ce318..74bb22ed16 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -29,7 +29,6 @@ __all__ = ["Butler"] -import os import urllib.parse import uuid from abc import abstractmethod @@ -550,30 +549,42 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]: Notes ----- Supports dataset URIs of the forms - ``ivo://rubin.lsst/datasets?butler_label/UUID`` - and ``butler://butler_label/UUID``. In ``ivo`` URIs the butler label - can include ``/`` and the trailing ``/`` before the UUID is always - stripped. + ``ivo://org.rubinobs/usdac/dr1?repo=butler_label&id=UUID`` (see + DMTN-302) and ``butler://butler_label/UUID``. The ``butler`` URI is + deprecated and can not include ``/`` in the label string. ``ivo`` URIs + can include anything supported by the `Butler` constructor, including + paths to repositories and alias labels. - ivo://rubin.lsst/datasets?/repo/main/UUID + ivo://org.rubinobs/dr1?repo=/repo/main&id=UUID will return a label of ``/repo/main``. This method does not attempt to check that the dataset exists in the labeled butler. + + Since the IVOID can be issued by any publisher to represent a Butler + dataset there is no validation of the path or netloc component of the + URI. The only requirement is that there are ``id`` and ``repo`` keys + in the ``ivo`` URI query component. """ parsed = urllib.parse.urlparse(uri) if parsed.scheme == "ivo": - # TODO: Validate netloc component. - if parsed.path != "/datasets": - raise ValueError(f"Unrecognized path in IVOID {uri}. Expected 'datasets' got {parsed.path!r}") - label, id_ = os.path.split(parsed.query) + # Do not validate the netloc or the path values. + qs = urllib.parse.parse_qs(parsed.query) + if "repo" not in qs or "id" not in qs: + raise ValueError(f"Missing 'repo' and/or 'id' query parameters in IVOID {uri}.") + if len(qs["repo"]) != 1 or len(qs["id"]) != 1: + raise ValueError(f"Butler IVOID only supports a single value of repo and id, got {uri}") + label = qs["repo"][0] + id_ = qs["id"][0] elif parsed.scheme == "butler": label = parsed.netloc # Need to strip the leading /. id_ = parsed.path[1:] else: raise ValueError(f"Unrecognized URI scheme: {uri!r}") + # Strip trailing/leading whitespace from label. + label = label.strip() if not label: raise ValueError(f"No butler repository label found in uri {uri!r}") try: diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py index 012ff0c39a..5ad73ceab8 100644 --- a/tests/test_simpleButler.py +++ b/tests/test_simpleButler.py @@ -912,10 +912,10 @@ def test_dataset_uris(self): factory = butler_factory.bind(access_token=None) for dataset_uri in ( - f"ivo://rubin.lsst/datasets?{config_dir}/{ref.id}", - f"ivo://rubin.lsst/datasets?{config_dir}/butler.yaml/{ref.id}", + f"ivo://org.rubinobs/usdac/test?repo={config_dir}&id={ref.id}", + f"ivo://org.rubinobs/ukdac/lsst-dr1?repo={config_dir}/butler.yaml&id={ref.id}", f"butler://{label}/{ref.id}", - f"ivo://rubin.lsst/datasets?{label}/{ref.id}", + f"ivo://org.rubinobs/usdac/lsst-dp1?repo={label}&id={ref.id}", ): new_butler, ref2 = Butler.get_dataset_from_uri(dataset_uri) self.assertEqual(ref, ref2) @@ -939,11 +939,14 @@ def test_dataset_uris(self): # Test some failure modes. for dataset_uri in ( "butler://label/1234", # Bad UUID. - "butler://1234", # No label. + "butler://1234", # No UUID. + "butler:///1234", # No label. "ivo://rubin/1234", # No query part and bad UUID and no label. "ivo://rubin/datasets/dr1/82d79caa-0823-4300-9874-67b737367ee0", # No query part. - "ivo://rubin/datasets?dr1/1234", # Bad UUID. - "ivo://rubin.lsst/butler?dr1/82d79caa-0823-4300-9874-67b737367ee0", # Not datasets. + "ivo://org.rubinobs/datasets?repo=dr1&id=1234", # Bad UUID. + "ivo://org.rubinobs/butler?release=dr1&id=82d79caa-0823-4300-9874-67b737367ee0", # No repo key. + "ivo://org.rubinobs/butler?repo=dr1&repo=dr2&id=82d79caa-0823-4300-9874-67b737367ee0", # 2 vals. + "ivo://org.rubinobs/something?repo=%20&id=82d79caa-0823-4300-9874-67b737367ee0", # no repo. "https://something.edu/1234", # Wrong scheme. ): with self.assertRaises(ValueError):