Skip to content

Commit

Permalink
Thanks Ryan!
Browse files Browse the repository at this point in the history
  • Loading branch information
Fokko committed Dec 7, 2023
1 parent 2ff5c03 commit 6bca2aa
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 31 deletions.
48 changes: 24 additions & 24 deletions pyiceberg/table/snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,17 @@
ADDED_EQUALITY_DELETES = 'added-equality-deletes'
ADDED_FILE_SIZE = 'added-files-size'
ADDED_POSITION_DELETES = 'added-position-deletes'
ADDED_POSITION_DELETE_FILES = f'{ADDED_POSITION_DELETES}-files'
ADDED_POSITION_DELETE_FILES = 'added-position-delete-files'
ADDED_RECORDS = 'added-records'
DELETED_DATA_FILES = 'deleted-data-files'
DELETED_RECORDS = 'deleted-records'
EQUALITY_DELETE_FILES = 'added-equality-delete-files'
ADDED_EQUALITY_DELETE_FILES = 'added-equality-delete-files'
REMOVED_DELETE_FILES = 'removed-delete-files'
REMOVED_EQUALITY_DELETES = 'removed-equality-deletes'
REMOVED_EQUALITY_DELETE_FILES = f'{REMOVED_EQUALITY_DELETES}-files'
REMOVED_EQUALITY_DELETE_FILES = 'removed-equality-delete-files'
REMOVED_FILE_SIZE = 'removed-files-size'
REMOVED_POSITION_DELETES = 'removed-position-deletes'
REMOVED_POSITION_DELETE_FILES = f'{REMOVED_POSITION_DELETES}-files'
REMOVED_POSITION_DELETE_FILES = 'removed-position-delete-files'
TOTAL_EQUALITY_DELETES = 'total-equality-deletes'
TOTAL_POSITION_DELETES = 'total-position-deletes'
TOTAL_DATA_FILES = 'total-data-files'
Expand Down Expand Up @@ -92,14 +92,14 @@ def __init__(self, operation: Operation, **data: Any) -> None:

def __getitem__(self, __key: str) -> Optional[Any]: # type: ignore
"""Return a key as it is a map."""
if __key == 'operation':
if __key.lower() == 'operation':
return self.operation
else:
return self._additional_properties.get(__key)

def __setitem__(self, key: str, value: Any) -> None:
"""Set a key as it is a map."""
if key == 'operation':
if key.lower() == 'operation':
self.operation = value
else:
self._additional_properties[key] = value
Expand Down Expand Up @@ -171,10 +171,10 @@ class SnapshotLogEntry(IcebergBaseModel):


class SnapshotSummaryCollector:
added_size: int
removed_size: int
added_files: int
removed_files: int
added_file_size: int
removed_file_size: int
added_data_files: int
removed_data_files: int
added_eq_delete_files: int
removed_eq_delete_files: int
added_pos_delete_files: int
Expand All @@ -189,10 +189,10 @@ class SnapshotSummaryCollector:
removed_eq_deletes: int

def __init__(self) -> None:
self.added_size = 0
self.removed_size = 0
self.added_files = 0
self.removed_files = 0
self.added_file_size = 0
self.removed_file_size = 0
self.added_data_files = 0
self.removed_data_files = 0
self.added_eq_delete_files = 0
self.removed_eq_delete_files = 0
self.added_pos_delete_files = 0
Expand All @@ -207,10 +207,10 @@ def __init__(self) -> None:
self.removed_eq_deletes = 0

def add_file(self, data_file: DataFile) -> None:
self.added_size += data_file.file_size_in_bytes
self.added_file_size += data_file.file_size_in_bytes

if data_file.content == DataFileContent.DATA:
self.added_files += 1
self.added_data_files += 1
self.added_records += data_file.record_count
elif data_file.content == DataFileContent.POSITION_DELETES:
self.added_delete_files += 1
Expand All @@ -224,10 +224,10 @@ def add_file(self, data_file: DataFile) -> None:
raise ValueError(f"Unknown data file content: {data_file.content}")

def remove_file(self, data_file: DataFile) -> None:
self.removed_size += data_file.file_size_in_bytes
self.removed_file_size += data_file.file_size_in_bytes

if data_file.content == DataFileContent.DATA:
self.removed_files += 1
self.removed_data_files += 1
self.deleted_records += data_file.record_count
elif data_file.content == DataFileContent.POSITION_DELETES:
self.removed_delete_files += 1
Expand All @@ -246,11 +246,11 @@ def set_when_positive(properties: Dict[str, str], num: int, property_name: str)
properties[property_name] = str(num)

properties: Dict[str, str] = {}
set_when_positive(properties, self.added_size, ADDED_FILE_SIZE)
set_when_positive(properties, self.removed_size, REMOVED_FILE_SIZE)
set_when_positive(properties, self.added_files, ADDED_DATA_FILES)
set_when_positive(properties, self.removed_files, DELETED_DATA_FILES)
set_when_positive(properties, self.added_eq_delete_files, EQUALITY_DELETE_FILES)
set_when_positive(properties, self.added_file_size, ADDED_FILE_SIZE)
set_when_positive(properties, self.removed_file_size, REMOVED_FILE_SIZE)
set_when_positive(properties, self.added_data_files, ADDED_DATA_FILES)
set_when_positive(properties, self.removed_data_files, DELETED_DATA_FILES)
set_when_positive(properties, self.added_eq_delete_files, ADDED_EQUALITY_DELETE_FILES)
set_when_positive(properties, self.removed_eq_delete_files, REMOVED_EQUALITY_DELETE_FILES)
set_when_positive(properties, self.added_pos_delete_files, ADDED_POSITION_DELETE_FILES)
set_when_positive(properties, self.removed_pos_delete_files, REMOVED_POSITION_DELETE_FILES)
Expand Down Expand Up @@ -293,7 +293,7 @@ def _truncate_table_summary(summary: Summary, previous_summary: Mapping[str, str
return summary


def _merge_snapshot_summaries(
def _update_snapshot_summaries(
summary: Summary, previous_summary: Optional[Mapping[str, str]] = None, truncate_full_table: bool = False
) -> Summary:
if summary.operation not in {Operation.APPEND, Operation.OVERWRITE}:
Expand Down
14 changes: 7 additions & 7 deletions tests/table/test_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import pytest

from pyiceberg.manifest import DataFile, DataFileContent, ManifestContent, ManifestFile
from pyiceberg.table.snapshots import Operation, Snapshot, SnapshotSummaryCollector, Summary, _merge_snapshot_summaries
from pyiceberg.table.snapshots import Operation, Snapshot, SnapshotSummaryCollector, Summary, _update_snapshot_summaries


@pytest.fixture
Expand Down Expand Up @@ -161,7 +161,7 @@ def test_snapshot_summary_collector(data_file: DataFile) -> None:


def test_merge_snapshot_summaries_empty() -> None:
assert _merge_snapshot_summaries(Summary(Operation.APPEND)) == Summary(
assert _update_snapshot_summaries(Summary(Operation.APPEND)) == Summary(
operation=Operation.APPEND,
**{
'total-data-files': '0',
Expand All @@ -175,7 +175,7 @@ def test_merge_snapshot_summaries_empty() -> None:


def test_merge_snapshot_summaries_new_summary() -> None:
actual = _merge_snapshot_summaries(
actual = _update_snapshot_summaries(
summary=Summary(
operation=Operation.APPEND,
**{
Expand Down Expand Up @@ -211,7 +211,7 @@ def test_merge_snapshot_summaries_new_summary() -> None:


def test_merge_snapshot_summaries_overwrite_summary() -> None:
actual = _merge_snapshot_summaries(
actual = _update_snapshot_summaries(
summary=Summary(
operation=Operation.OVERWRITE,
**{
Expand Down Expand Up @@ -260,17 +260,17 @@ def test_merge_snapshot_summaries_overwrite_summary() -> None:

def test_invalid_operation() -> None:
with pytest.raises(ValueError) as e:
_merge_snapshot_summaries(summary=Summary(Operation.REPLACE))
_update_snapshot_summaries(summary=Summary(Operation.REPLACE))
assert "Operation not implemented: Operation.REPLACE" in str(e.value)

with pytest.raises(ValueError) as e:
_merge_snapshot_summaries(summary=Summary(Operation.DELETE))
_update_snapshot_summaries(summary=Summary(Operation.DELETE))
assert "Operation not implemented: Operation.DELETE" in str(e.value)


def test_invalid_type() -> None:
with pytest.raises(ValueError) as e:
_merge_snapshot_summaries(
_update_snapshot_summaries(
summary=Summary(
operation=Operation.OVERWRITE,
**{
Expand Down

0 comments on commit 6bca2aa

Please sign in to comment.