From 1971fcfe0875eeb200dbcb66f385e504cfad6609 Mon Sep 17 00:00:00 2001 From: Chinmay Bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Tue, 10 Sep 2024 22:15:43 +0530 Subject: [PATCH] Cache Manifest files (#787) * cache manifests * update API * small fix * move cache to module level * update signature and check --- pyiceberg/table/snapshots.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index 1ccb079922..980399a2ab 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -19,6 +19,7 @@ import time from collections import defaultdict from enum import Enum +from functools import lru_cache from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Iterable, List, Mapping, Optional from pydantic import Field, PrivateAttr, model_serializer @@ -230,6 +231,13 @@ def __eq__(self, other: Any) -> bool: ) +@lru_cache +def _manifests(io: FileIO, manifest_list: str) -> List[ManifestFile]: + """Return the manifests from the manifest list.""" + file = io.new_input(manifest_list) + return list(read_manifest_list(file)) + + class Snapshot(IcebergBaseModel): snapshot_id: int = Field(alias="snapshot-id") parent_snapshot_id: Optional[int] = Field(alias="parent-snapshot-id", default=None) @@ -250,9 +258,9 @@ def __str__(self) -> str: return result_str def manifests(self, io: FileIO) -> List[ManifestFile]: - if self.manifest_list is not None: - file = io.new_input(self.manifest_list) - return list(read_manifest_list(file)) + """Return the manifests for the given snapshot.""" + if self.manifest_list: + return _manifests(io, self.manifest_list) return []