From ecbefa8732da5e865422820d30f4bcde282e09f8 Mon Sep 17 00:00:00 2001
From: Johann Bahl <jb@flyingcircus.io>
Date: Fri, 8 Dec 2023 20:43:23 +0100
Subject: [PATCH] unify and extend revision spec syntax

all subcommands (except restore) accept multiple revisions
---
 .github/workflows/python-app.yml              |   2 -
 .../20231208_201510_jb_reintroduce_find.rst   |   3 +
 doc/man-backy.rst                             |  46 +++-
 src/backy/backup.py                           | 212 +++++++++++++-----
 src/backy/main.py                             |  71 +++---
 src/backy/tests/test_archive.py               |  97 +++++++-
 src/backy/tests/test_backup.py                |  10 +-
 src/backy/tests/test_backy.py                 |  20 +-
 src/backy/tests/test_main.py                  |  12 +-
 src/backy/utils.py                            |  42 +++-
 10 files changed, 371 insertions(+), 144 deletions(-)
 create mode 100644 changelog.d/20231208_201510_jb_reintroduce_find.rst

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 0038de65..7c577e0d 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -38,8 +38,6 @@ jobs:
       - name: Install Nix
         uses: DeterminateSystems/nix-installer-action@main
       - uses: DeterminateSystems/magic-nix-cache-action@main
-      - name: Check Nixpkgs inputs
-        uses: DeterminateSystems/flake-checker-action@main
         with:
           fail-mode: true
 
diff --git a/changelog.d/20231208_201510_jb_reintroduce_find.rst b/changelog.d/20231208_201510_jb_reintroduce_find.rst
new file mode 100644
index 00000000..2ea6a2ff
--- /dev/null
+++ b/changelog.d/20231208_201510_jb_reintroduce_find.rst
@@ -0,0 +1,3 @@
+.. A new scriv changelog fragment.
+
+- Unify and extend revision spec syntax
diff --git a/doc/man-backy.rst b/doc/man-backy.rst
index 8e158ea7..e58a45a6 100644
--- a/doc/man-backy.rst
+++ b/doc/man-backy.rst
@@ -141,16 +141,48 @@ Subcommand-specific options
     Valid for **scheduler** and **check** subcommands.
 
 **-r** *REVISION*
-    Selects a revision other than the last revision.
+    Selects one or more revisions other than the default.
 
-    Revisions can be specified in the following ways:
+    A single revision can be specified in the following ways:
 
-    * A full revision ID as printed with **backy status**. ID prefixes are OK as
-      long as they are unique.
+    * A full revision ID as printed with **backy status**.
     * A relative revision count: 0 is the last revision, 1 the one before, ...
-    * The key word **last** or **latest** as alias for the last revision.
-    * A revision tag. If several revisions with the given tag exist, the newest
-      one will be given.
+    * The key word **last** or **latest** is an alias for the last revision.
+    * The key word **first** is an alias for the first revision.
+    * The function **first** followed by a revision specifier in parentheses.
+      This returns the first value in the list, not the earliest by date.
+    * The function **last** followed by a revision specifier in parentheses.
+      This returns the last value in the list, not the latest by date.
+
+    Multiple revisions can be specified in the following ways:
+
+    * A multi revision specifier enclosed in parentheses.
+    * The function **not** followed by a revision specifier in parentheses.
+      This returns every revision which is not in the list.
+      Ordered by date, oldest first.
+    * The function **reverse** followed by a revision specifier in parentheses.
+      This returns the list in reversed order.
+    * The key word **all** is an alias for all revisions.
+      Ordered by date, oldest first.
+    * The key word **clean** is an alias for all clean/completed revisions.
+      Ordered by date, oldest first.
+    * A Trust state with the **trust:** prefix: Selects all revisions with this
+      Trust state. Ordered by date, oldest first.
+    * A tag with the **tag:** prefix. Selects all revisions with this tag.
+      Ordered by date, oldest first.
+    * An inclusive range using two single revision specifiers separated with two
+      dots. The singe revision specifiers may be omitted, in which case the
+      **first** and/or **last** revision is assumed.
+      In addition to the single revision specifiers iso dates are also
+      supported (YYYY-MM-DD[THH:MM:SS[.ffffff]+HH:MM[:SS[.ffffff]]). The time
+      defaults to 00:00 and the timezone to the local timezone. The result is
+      ordered by date, oldest first, regardless of the provided argument order.
+    * An intersection using an ampersand separated list of all the above
+      specifiers. The order will be preserved.
+    * A comma separated list of all the above specifiers. The order will be
+      preserved and duplicates removed.
+
+    All subcommands except restore accept multiple revisions.
 
     Valid for **find** and **restore** subcommands.
 
diff --git a/src/backy/backup.py b/src/backy/backup.py
index 4874c150..e0c12352 100644
--- a/src/backy/backup.py
+++ b/src/backy/backup.py
@@ -3,16 +3,26 @@
 import glob
 import os
 import os.path as p
+import re
 import subprocess
 import time
 from enum import Enum
-from typing import IO, Optional, Type
+from math import ceil, floor
+from typing import IO, List, Optional, Type
 
+import tzlocal
 import yaml
 from structlog.stdlib import BoundLogger
 
 import backy.backends.chunked
-from backy.utils import min_date
+from backy.utils import (
+    duplicates,
+    list_get,
+    list_rindex,
+    list_split,
+    min_date,
+    unique,
+)
 
 from .backends import BackendException, BackyBackend
 from .backends.chunked import ChunkedFileBackend
@@ -56,7 +66,9 @@ def locked(target=None, mode=None):
         raise ValueError("Unknown lock mode '{}'".format(mode))
 
     def wrap(f):
-        def locked_function(self, *args, **kw):
+        def locked_function(self, *args, skip_lock=False, **kw):
+            if skip_lock:
+                return f(self, *args, **kw)
             if target in self._lock_fds:
                 raise RuntimeError("Bug: Locking is not re-entrant.")
             target_path = p.join(self.path, target)
@@ -201,13 +213,13 @@ def _clean(self):
                 revision.remove()
 
     @locked(target=".backup", mode="exclusive")
-    def forget_revision(self, revision):
-        r = self.find(revision)
-        r.remove()
+    def forget(self, revision: str):
+        for r in self.find_revisions(revision):
+            r.remove()
 
     @locked(target=".backup", mode="exclusive")
     @locked(target=".purge", mode="shared")
-    def backup(self, tags, force=False):
+    def backup(self, tags: set[str], force=False):
         if not force:
             missing_tags = (
                 filter_schedule_tags(tags) - self.schedule.schedule.keys()
@@ -250,7 +262,7 @@ def backup(self, tags, force=False):
             except BackendException:
                 self.log.exception("backend-error-distrust-all")
                 verified = False
-                self.distrust_range()
+                self.distrust("all", skip_lock=True)
             if not verified:
                 self.log.error(
                     "verification-failed",
@@ -282,44 +294,16 @@ def backup(self, tags, force=False):
                 break
 
     @locked(target=".backup", mode="exclusive")
-    def distrust(
-        self,
-        revision=None,
-        from_: Optional[datetime.date] = None,
-        until: Optional[datetime.date] = None,
-    ):
-        if revision:
-            r = self.find(revision)
-            r.distrust()
-            r.write_info()
-        else:
-            self.distrust_range(from_, until)
-
-    def distrust_range(
-        self,
-        from_: Optional[datetime.date] = None,
-        until: Optional[datetime.date] = None,
-    ):
-        for r in self.clean_history:
-            if from_ and r.timestamp.date() < from_:
-                continue
-            if until and r.timestamp.date() > until:
-                continue
+    def distrust(self, revision: str):
+        for r in self.find_revisions(revision):
             r.distrust()
             r.write_info()
 
     @locked(target=".purge", mode="shared")
-    def verify(self, revision=None):
-        if revision:
-            r = self.find(revision)
+    def verify(self, revision: str):
+        for r in self.find_revisions(revision):
             backend = self.backend_factory(r, self.log)
             backend.verify()
-        else:
-            for r in list(self.clean_history):
-                if r.trust != Trust.DISTRUSTED:
-                    continue
-                backend = self.backend_factory(r, self.log)
-                backend.verify()
 
     @locked(target=".purge", mode="exclusive")
     def purge(self):
@@ -498,34 +482,131 @@ def upgrade(self):
     ######################
     # Looking up revisions
 
-    def last_by_tag(self):
+    def last_by_tag(self) -> dict[str, datetime.datetime]:
         """Return a dictionary showing the last time each tag was
         backed up.
 
         Tags that have never been backed up won't show up here.
 
         """
-        last_times = {}
+        last_times: dict[str, datetime.datetime] = {}
         for revision in self.clean_history:
             for tag in revision.tags:
                 last_times.setdefault(tag, min_date())
                 last_times[tag] = max([last_times[tag], revision.timestamp])
         return last_times
 
-    def find_revisions(self, spec):
+    def find_revisions(
+        self, spec: str | List[str | Revision | List[Revision]]
+    ) -> List[Revision]:
         """Get a sorted list of revisions, oldest first, that match the given
         specification.
         """
-        if isinstance(spec, str) and spec.startswith("tag:"):
-            tag = spec.replace("tag:", "")
-            result = [r for r in self.history if tag in r.tags]
-        elif spec == "all":
-            result = self.history[:]
+
+        tokens: List[str | Revision | List[Revision]]
+        if isinstance(spec, str):
+            tokens = [
+                t.strip()
+                for t in re.split(r"(\(|\)|,|&|\.\.)", spec)
+                if t.strip()
+            ]
         else:
-            result = [self.find(spec)]
-        return result
+            tokens = spec
+        if "(" in tokens and ")" in tokens:
+            i = list_rindex(tokens, "(")
+            j = tokens.index(")", i)
+            prev, middle, next = tokens[:i], tokens[i + 1 : j], tokens[j + 1 :]
+
+            functions = {
+                "first": lambda x: x[0],
+                "last": lambda x: x[-1],
+                "not": lambda x: [r for r in self.history if r not in x],
+                "reverse": lambda x: list(reversed(x)),
+            }
+            if prev and isinstance(prev[-1], str) and prev[-1] in functions:
+                return self.find_revisions(
+                    prev[:-1]
+                    + [functions[prev[-1]](self.find_revisions(middle))]
+                    + next
+                )
+            return self.find_revisions(
+                prev + [self.find_revisions(middle)] + next
+            )
+        elif "," in tokens:
+            i = tokens.index(",")
+            return unique(
+                self.find_revisions(tokens[:i])
+                + self.find_revisions(tokens[i + 1 :])
+            )
+        elif "&" in tokens:
+            i = tokens.index("&")
+            return duplicates(
+                self.find_revisions(tokens[:i]),
+                self.find_revisions(tokens[i + 1 :]),
+            )
+        elif ".." in tokens:
+            _a, _b = list_split(tokens, "..")
+            assert len(_a) <= 1 and len(_b) <= 1
+            a = self.index_by_token(list_get(_a, 0, "first"))
+            b = self.index_by_token(list_get(_b, 0, "last"))
+            return self.history[ceil(min(a, b)) : floor(max(a, b)) + 1]
+        assert len(tokens) == 1
+        token = tokens[0]
+        if isinstance(token, Revision):
+            return [token]
+        elif isinstance(token, list):
+            return token
+        if token.startswith("tag:"):
+            tag = token.removeprefix("tag:")
+            return [r for r in self.history if tag in r.tags]
+        elif token.startswith("trust:"):
+            trust = Trust(token.removeprefix("trust:").lower())
+            return [r for r in self.history if trust == r.trust]
+        elif token == "all":
+            return self.history[:]
+        elif token == "clean":
+            return self.clean_history[:]
+        else:
+            return [self.find(token)]
+
+    def index_by_token(self, spec: str | Revision | List[Revision]):
+        assert not isinstance(
+            spec, list
+        ), "can only index a single revision specifier"
+        if isinstance(spec, str):
+            return self.index_by_date(spec) or self.history.index(
+                self.find(spec)
+            )
+        else:
+            return self.history.index(spec)
 
-    def find_by_number(self, spec):
+    def index_by_date(self, spec: str) -> Optional[float]:
+        """Return index of revision matched by datetime.
+        Index may be fractional if there is no exact datetime match.
+        Index range: [-0.5, len+0.5]
+        """
+        try:
+            date = datetime.datetime.fromisoformat(spec)
+            date = date.replace(tzinfo=date.tzinfo or tzlocal.get_localzone())
+            l = list_get(
+                [i for i, r in enumerate(self.history) if r.timestamp <= date],
+                -1,
+                -1,
+            )
+            r = list_get(
+                [i for i, r in enumerate(self.history) if r.timestamp >= date],
+                0,
+                len(self.history),
+            )
+            print(spec, l, r)
+            assert (
+                0 <= r - l <= 1
+            ), "can not index with date if multiple revision have the same timestamp"
+            return (l + r) / 2.0
+        except ValueError:
+            return None
+
+    def find_by_number(self, _spec: str) -> Revision:
         """Returns revision by relative number.
 
         0 is the newest,
@@ -535,22 +616,23 @@ def find_by_number(self, spec):
 
         Raises IndexError or ValueError if no revision is found.
         """
-        spec = int(spec)
+        spec = int(_spec)
         if spec < 0:
             raise KeyError("Integer revisions must be positive")
         return self.history[-spec - 1]
 
-    def find_by_tag(self, spec):
+    def find_by_tag(self, spec: str) -> Revision:
         """Returns the latest revision matching a given tag.
 
         Raises IndexError or ValueError if no revision is found.
         """
         if spec in ["last", "latest"]:
             return self.history[-1]
-        matching = [r for r in self.history if spec in r.tags]
-        return max((r.timestamp, r) for r in matching)[1]
+        if spec == "first":
+            return self.history[0]
+        raise ValueError()
 
-    def find_by_uuid(self, spec):
+    def find_by_uuid(self, spec: str) -> Revision:
         """Returns revision matched by UUID.
 
         Raises IndexError if no revision is found.
@@ -560,16 +642,28 @@ def find_by_uuid(self, spec):
         except KeyError:
             raise IndexError()
 
-    def find(self, spec) -> Revision:
+    def find_by_function(self, spec: str):
+        m = re.fullmatch(r"(\w+)\(.+\)", spec)
+        if m and m.group(1) in ["first", "last"]:
+            return self.find_revisions(m.group(0))[0]
+        raise ValueError()
+
+    def find(self, spec: str) -> Revision:
         """Flexible revision search.
 
         Locates a revision by relative number, by tag, or by uuid.
 
         """
-        if spec is None or spec == "" or not self.history:
+        spec = spec.strip()
+        if spec == "" or not self.history:
             raise KeyError(spec)
 
-        for find in (self.find_by_number, self.find_by_uuid, self.find_by_tag):
+        for find in (
+            self.find_by_number,
+            self.find_by_uuid,
+            self.find_by_tag,
+            self.find_by_function,
+        ):
             try:
                 return find(spec)
             except (ValueError, IndexError):
diff --git a/src/backy/main.py b/src/backy/main.py
index 15cc0ae5..5f409a00 100644
--- a/src/backy/main.py
+++ b/src/backy/main.py
@@ -25,16 +25,6 @@
 from .client import APIClient, CLIClient
 
 
-def valid_date(s):
-    if s is None:
-        return None
-    try:
-        return datetime.datetime.strptime(s, "%Y-%m-%d").date()
-    except ValueError:
-        msg = "Not a valid date: '{0}'.".format(s)
-        raise argparse.ArgumentTypeError(msg)
-
-
 class Command(object):
     """Proxy between CLI calls and actual backup code."""
 
@@ -45,10 +35,10 @@ def __init__(self, path, log):
         self.path = path
         self.log = log
 
-    def status(self, yaml_: bool):
-        b = backy.backup.Backup(self.path, self.log)
+    def status(self, yaml_: bool, revision):
+        revs = backy.backup.Backup(self.path, self.log).find_revisions(revision)
         if yaml_:
-            print(yaml.safe_dump([r.to_dict() for r in b.clean_history]))
+            print(yaml.safe_dump([r.to_dict() for r in revs]))
             return
         total_bytes = 0
 
@@ -62,7 +52,7 @@ def status(self, yaml_: bool):
             "Trust",
         )
 
-        for r in b.history:
+        for r in revs:
             total_bytes += r.stats.get("bytes_written", 0)
             duration = r.stats.get("duration")
             if duration:
@@ -85,7 +75,7 @@ def status(self, yaml_: bool):
 
         print(
             "{} revisions containing {} data (estimated)".format(
-                len(b.history), humanize.naturalsize(total_bytes, binary=True)
+                len(revs), humanize.naturalsize(total_bytes, binary=True)
             )
         )
 
@@ -108,14 +98,15 @@ def restore(self, revision, target, restore_backend):
 
     def find(self, revision, uuid):
         b = backy.backup.Backup(self.path, self.log)
-        if uuid:
-            print(b.find(revision).uuid)
-        else:
-            print(b.find(revision).filename)
+        for r in b.find_revisions(revision):
+            if uuid:
+                print(r.uuid)
+            else:
+                print(r.filename)
 
     def forget(self, revision):
         b = backy.backup.Backup(self.path, self.log)
-        b.forget_revision(revision)
+        b.forget(revision)
 
     def scheduler(self, config):
         backy.daemon.main(config, self.log)
@@ -128,9 +119,9 @@ def upgrade(self):
         b = backy.backup.Backup(self.path, self.log)
         b.upgrade()
 
-    def distrust(self, revision, from_, until):
+    def distrust(self, revision):
         b = backy.backup.Backup(self.path, self.log)
-        b.distrust(revision, from_, until)
+        b.distrust(revision)
 
     def verify(self, revision):
         b = backy.backup.Backup(self.path, self.log)
@@ -323,7 +314,7 @@ def setup_argparser():
     # FIND
     p = subparsers.add_parser(
         "find",
-        help="Print full path to a given revision's image file",
+        help="Print full path or uuid of specified revisions",
     )
     p.add_argument(
         "--uuid",
@@ -347,6 +338,13 @@ def setup_argparser():
 """,
     )
     p.add_argument("--yaml", dest="yaml_", action="store_true")
+    p.add_argument(
+        "-r",
+        "--revision",
+        metavar="SPEC",
+        default="all",
+        help="use revision SPEC as filter",
+    )
     p.set_defaults(func="status")
 
     # upgrade
@@ -372,46 +370,31 @@ def setup_argparser():
     p = subparsers.add_parser(
         "distrust",
         help="""\
-Distrust one or all revisions.
+Distrust specified revisions.
 """,
     )
     p.add_argument(
         "-r",
         "--revision",
         metavar="SPEC",
-        default="",
+        default="all",
         help="use revision SPEC to distrust, distrusting all if not given",
     )
-    p.add_argument(
-        "-f",
-        "--from",
-        metavar="DATE",
-        type=valid_date,
-        help="Mark revisions on or after this date as distrusted",
-        dest="from_",
-    )
-    p.add_argument(
-        "-u",
-        "--until",
-        metavar="DATE",
-        type=valid_date,
-        help="Mark revisions on or before this date as distrusted",
-    )
     p.set_defaults(func="distrust")
 
     # VERIFY
     p = subparsers.add_parser(
         "verify",
         help="""\
-Verify one or all revisions.
+Verify specified revisions.
 """,
     )
     p.add_argument(
         "-r",
         "--revision",
         metavar="SPEC",
-        default="",
-        help="use revision SPEC to verify, verifying all if not given",
+        default="trust:distrusted",
+        help="use revision SPEC to verify, verifying all distrusted if not given",
     )
     p.set_defaults(func="verify")
 
@@ -419,7 +402,7 @@ def setup_argparser():
     p = subparsers.add_parser(
         "forget",
         help="""\
-Forget revision.
+Forget specified revisions.
 """,
     )
     p.add_argument(
diff --git a/src/backy/tests/test_archive.py b/src/backy/tests/test_archive.py
index f68d5e7a..48c12100 100644
--- a/src/backy/tests/test_archive.py
+++ b/src/backy/tests/test_archive.py
@@ -11,6 +11,7 @@ def backup_with_revisions(backup, tmpdir):
 uuid: 123-0
 timestamp: 2015-08-29 00:00:00+00:00
 parent:
+trust: verified
 stats: {bytes_written: 14868480, duration: 31.1}
 tags: [daily, weekly, monthly]
 """
@@ -45,7 +46,7 @@ def test_empty_revisions(backup):
 
 def test_find_revision_empty(backup):
     with pytest.raises(KeyError):
-        backup.find(-1)
+        backup.find("-1")
     with pytest.raises(KeyError):
         backup.find("last")
     with pytest.raises(KeyError):
@@ -59,26 +60,104 @@ def test_load_revisions(backup_with_revisions):
     assert a.history[1].get_parent().uuid == "123-0"
     assert a.history[2].get_parent().uuid == "123-1"
     assert a.history[0].get_parent() is None
+
+
+def test_find_revisions(backup_with_revisions):
+    a = backup_with_revisions
     assert a.find_revisions("all") == a.history
-    assert a.find_revisions(1) == [a.find(1)]
+    assert a.find_revisions("1") == [a.find("1")]
+    assert a.find_revisions("tag:dail") == []
+    assert a.find_revisions("trust:verified") == [a.find("123-0")]
+    assert a.find_revisions("2..1") == [a.find("2"), a.find("1")]
+    assert a.find_revisions("1..2") == [a.find("2"), a.find("1")]
+    assert a.find_revisions("123-0..123-1") == [
+        a.find("123-0"),
+        a.find("123-1"),
+    ]
+    assert a.find_revisions("last(tag:daily)..123-1") == [
+        a.find("123-1"),
+        a.find("123-2"),
+    ]
+    assert a.find_revisions("123-1..") == [a.find("123-1"), a.find("123-2")]
+    assert a.find_revisions("..") == a.history
+    assert a.find_revisions("first..last") == a.history
+    assert a.find_revisions("tag:weekly") == [a.find("123-0"), a.find("123-1")]
+    assert a.find_revisions("1, tag:weekly") == [
+        a.find("123-1"),
+        a.find("123-0"),
+    ]
+    assert a.find_revisions("0,2..1") == [
+        a.find("123-2"),
+        a.find("123-0"),
+        a.find("123-1"),
+    ]
+    assert a.find_revisions("2,1, 2,0,1") == [
+        a.find("123-0"),
+        a.find("123-1"),
+        a.find("123-2"),
+    ]
+    assert a.find_revisions("2015-09-01..2015-08-30") == [
+        a.find("123-1"),
+        a.find("123-2"),
+    ]
+    assert a.find_revisions("2015-08-30..last(last(tag:daily&clean))") == [
+        a.find("123-1"),
+    ]
+    assert a.find_revisions("2015-08-30..,trust:verified") == [
+        a.find("123-1"),
+        a.find("123-2"),
+        a.find("123-0"),
+    ]
+    assert a.find_revisions(
+        "first(trust:verified)..last(reverse(2015-08-30..))"
+    ) == [
+        a.find("123-0"),
+        a.find("123-1"),
+    ]
+    assert a.find_revisions("reverse(not(clean))") == [
+        a.find("123-2"),
+    ]
+    assert a.find_revisions("last(reverse(first(123-1, 123-0)))") == [
+        a.find("123-1"),
+    ]
+    assert a.find_revisions("( (first( (123-0, 123-1)) ))") == [
+        a.find("123-0"),
+    ]
+
+
+def test_find_revisions_should_raise_invalid_spec(backup_with_revisions):
+    a = backup_with_revisions
+    with pytest.raises(KeyError):
+        a.find_revisions("aaaa..125")
+    with pytest.raises(AssertionError):
+        a.find_revisions("last)..5")
+    with pytest.raises(KeyError):
+        a.find_revisions("clean-..,1")
+    with pytest.raises(KeyError):
+        a.find_revisions("123-")
+    with pytest.raises(IndexError):
+        a.find_revisions("first(not(all))")
+    with pytest.raises(KeyError):
+        a.find_revisions("2015-09..2015-08-30")
 
 
 def test_find_revision(backup_with_revisions):
     a = backup_with_revisions
     assert a.find("last").uuid == "123-2"
     with pytest.raises(KeyError):
-        a.find(-1)
-    assert a.find(0).uuid == "123-2"
-    assert a.find(1).uuid == "123-1"
-    assert a.find(2).uuid == "123-0"
+        a.find("-1")
+    assert a.find("0").uuid == "123-2"
+    assert a.find("1").uuid == "123-1"
+    assert a.find("2").uuid == "123-0"
 
     assert a.find("123-1").uuid == "123-1"
     with pytest.raises(KeyError):
         a.find("125-125")
 
-    assert a.find("daily").uuid == "123-2"
-    assert a.find("weekly").uuid == "123-1"
-    assert a.find("monthly").uuid == "123-0"
+    assert a.find("last(tag:daily)").uuid == "123-2"
+    assert a.find("last(tag:weekly)").uuid == "123-1"
+    assert a.find("last(tag:monthly)").uuid == "123-0"
+    assert a.find(" first( tag:monthly  ) ").uuid == "123-0"
 
 
 def test_clean_history_should_exclude_incomplete_revs(backup_with_revisions):
diff --git a/src/backy/tests/test_backup.py b/src/backy/tests/test_backup.py
index 22b2f384..d3455d42 100644
--- a/src/backy/tests/test_backup.py
+++ b/src/backy/tests/test_backup.py
@@ -26,7 +26,7 @@ def test_find(simple_file_config, tmpdir, log):
     rev.timestamp = backy.utils.now()
     rev.materialize()
     backup.scan()
-    assert str(tmpdir / "123-456") == backup.find(0).filename
+    assert str(tmpdir / "123-456") == backup.find("0").filename
 
 
 def test_find_should_raise_if_not_found(simple_file_config, log):
@@ -46,7 +46,7 @@ def test_restore_target(simple_file_config):
     with open(source, "wb") as f:
         f.write(b"volume contents\n")
     backup.backup({"daily"})
-    backup.restore(0, target)
+    backup.restore("0", target)
     with open(source, "rb") as s, open(target, "rb") as t:
         assert s.read() == t.read()
 
@@ -57,7 +57,7 @@ def test_restore_stdout(simple_file_config, capfd):
     with open(source, "wb") as f:
         f.write(b"volume contents\n")
     backup.backup({"daily"})
-    backup.restore(0, "-")
+    backup.restore("0", "-")
     assert not os.path.exists("-")
     out, err = capfd.readouterr()
     assert "volume contents\n" == out
@@ -72,10 +72,10 @@ def test_restore_backy_extract(simple_file_config, monkeypatch):
     with open(source, "wb") as f:
         f.write(b"a" * CHUNK_SIZE)
     backup.backup({"daily"})
-    backup.restore(0, "restore.img")
+    backup.restore("0", "restore.img")
     check_output.assert_called()
     backup.restore_backy_extract.assert_called_once_with(
-        backup.find(0), "restore.img"
+        backup.find("0"), "restore.img"
     )
 
 
diff --git a/src/backy/tests/test_backy.py b/src/backy/tests/test_backy.py
index 1d5df426..0d2ae46a 100644
--- a/src/backy/tests/test_backy.py
+++ b/src/backy/tests/test_backy.py
@@ -44,7 +44,7 @@ def test_smoketest_internal(tmpdir, log):
 
     # Restore first state form newest revision at position 0
     restore_target = str(tmpdir / "image1.restore")
-    backup.restore(0, restore_target)
+    backup.restore("0", restore_target)
     with pytest.raises(IOError):
         open(backup.history[-1].filename, "wb")
     with pytest.raises(IOError):
@@ -57,13 +57,13 @@ def test_smoketest_internal(tmpdir, log):
     assert len(backup.history) == 2
 
     # Restore second state from second backup which is the newest at position 0
-    backup.restore(0, restore_target)
+    backup.restore("0", restore_target)
     d1 = open(source2, "rb").read()
     d2 = open(restore_target, "rb").read()
     assert d1 == d2
 
     # Our original backup is now at position 1. Lets restore that again.
-    backup.restore(1, restore_target)
+    backup.restore("1", restore_target)
     assert open(source1, "rb").read() == open(restore_target, "rb").read()
 
     # Backup second state again
@@ -72,15 +72,15 @@ def test_smoketest_internal(tmpdir, log):
     assert len(backup.history) == 3
 
     # Restore image2 from its most recent at position 0
-    backup.restore(0, restore_target)
+    backup.restore("0", restore_target)
     assert open(source2, "rb").read() == open(restore_target, "rb").read()
 
     # Restore image2 from its previous backup, now at position 1
-    backup.restore(1, restore_target)
+    backup.restore("1", restore_target)
     assert open(source2, "rb").read() == open(restore_target, "rb").read()
 
     # Our original backup is now at position 2. Lets restore that again.
-    backup.restore(2, restore_target)
+    backup.restore("2", restore_target)
     assert open(source1, "rb").read() == open(restore_target, "rb").read()
 
     # Backup third state
@@ -89,18 +89,18 @@ def test_smoketest_internal(tmpdir, log):
     assert len(backup.history) == 4
 
     # Restore image3 from the most curent state
-    backup.restore(0, restore_target)
+    backup.restore("0", restore_target)
     assert open(source3, "rb").read() == open(restore_target, "rb").read()
 
     # Restore image2 from position 1 and 2
-    backup.restore(1, restore_target)
+    backup.restore("1", restore_target)
     assert open(source2, "rb").read() == open(restore_target, "rb").read()
 
-    backup.restore(2, restore_target)
+    backup.restore("2", restore_target)
     assert open(source2, "rb").read() == open(restore_target, "rb").read()
 
     # Restore image1 from position 3
-    backup.restore(3, restore_target)
+    backup.restore("3", restore_target)
     assert open(source1, "rb").read() == open(restore_target, "rb").read()
 
 
diff --git a/src/backy/tests/test_main.py b/src/backy/tests/test_main.py
index 6f0413b5..c8dffed3 100644
--- a/src/backy/tests/test_main.py
+++ b/src/backy/tests/test_main.py
@@ -130,7 +130,7 @@ def test_call_status(capsys, backup, argv, monkeypatch):
         Ellipsis(
             """\
 (<backy.main.Command object at 0x...>,)
-{'yaml_': False}
+{'revision': 'all', 'yaml_': False}
 """
         )
         == out
@@ -139,7 +139,7 @@ def test_call_status(capsys, backup, argv, monkeypatch):
         Ellipsis(
             """\
 ... D command/invoked                args='... -v -b ... status'
-... D command/parsed                 func='status' func_args={'yaml_': False}
+... D command/parsed                 func='status' func_args={'yaml_': False, 'revision': 'all'}
 ... D command/successful             \n\
 """
         )
@@ -343,7 +343,7 @@ def do_raise(*args, **kw):
         Ellipsis(
             """\
 ... D command/invoked                args='... -l ... -b ... status'
-... D command/parsed                 func='status' func_args={'yaml_': False}
+... D command/parsed                 func='status' func_args={'yaml_': False, 'revision': 'all'}
 ... E command/failed                 exception_class='builtins.RuntimeError' exception_msg='test'
 exception>\tTraceback (most recent call last):
 exception>\t  File ".../src/backy/main.py", line ..., in main
@@ -364,7 +364,7 @@ def test_commands_wrapper_status(backup, tmpdir, capsys, clock, tz_berlin, log):
     revision.timestamp = backy.utils.now()
     revision.materialize()
 
-    commands.status(yaml_=False)
+    commands.status(yaml_=False, revision="all")
     out, err = capsys.readouterr()
 
     assert err == ""
@@ -390,10 +390,8 @@ def test_commands_wrapper_status_yaml(
     revision.stats["duration"] = 3.5
     revision.stats["bytes_written"] = 42
     revision.materialize()
-    revision2 = Revision(backup, log, "2")  # ignored
-    revision2.materialize()
 
-    commands.status(yaml_=True)
+    commands.status(yaml_=True, revision="all")
     out, err = capsys.readouterr()
 
     assert err == ""
diff --git a/src/backy/utils.py b/src/backy/utils.py
index 78d84014..ea21bef1 100644
--- a/src/backy/utils.py
+++ b/src/backy/utils.py
@@ -10,7 +10,8 @@
 import sys
 import tempfile
 import time
-from typing import IO, Callable
+import typing
+from typing import IO, Callable, Iterable, List, TypeVar
 from zoneinfo import ZoneInfo
 
 import humanize
@@ -20,6 +21,9 @@
 from .ext_deps import CP
 from .fallocate import punch_hole
 
+_T = TypeVar("_T")
+_U = TypeVar("_U")
+
 log = structlog.stdlib.get_logger(subsystem="utils")
 
 log_data: str  # for pytest
@@ -467,3 +471,39 @@ def format_datetime_local(dt):
         dt.astimezone(tz).replace(tzinfo=None).strftime("%Y-%m-%d %H:%M:%S"),
         tz,
     )
+
+
+def unique(iterable: Iterable[_T]) -> List[_T]:
+    return list(dict.fromkeys(iterable))
+
+
+def duplicates(a: List[_T], b: List[_T]) -> List[_T]:
+    return unique(i for i in a if i in b)
+
+
+def list_rindex(l: List[_T], v: _T) -> int:
+    return len(l) - l[-1::-1].index(v) - 1
+
+
+@typing.overload
+def list_get(l: List[_T], i: int) -> _T | None:
+    ...
+
+
+@typing.overload
+def list_get(l: List[_T], i: int, default: _U) -> _T | _U:
+    ...
+
+
+def list_get(l, i, default=None):
+    return l[i] if -len(l) <= i < len(l) else default
+
+
+def list_split(l: List[_T], v: _T) -> List[List[_T]]:
+    res: List[List[_T]] = [[]]
+    for i in l:
+        if i == v:
+            res.append([])
+        else:
+            res[-1].append(i)
+    return res