Skip to content

Commit

Permalink
Merge branch 'main' of github.com:apache/iceberg-python into fd-hive
Browse files Browse the repository at this point in the history
  • Loading branch information
Fokko committed Jan 11, 2024
2 parents 2e77cb7 + 5085d28 commit a25c1b7
Show file tree
Hide file tree
Showing 51 changed files with 2,206 additions and 981 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ jobs:
if: startsWith(matrix.os, 'ubuntu')
run: ls -lah dist/* && cp dist/* wheelhouse/

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: "release-${{ github.event.inputs.version }}"
path: ./wheelhouse/*
11 changes: 4 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,12 @@ repos:
- id: check-ast
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version (Used for linting)
rev: v0.1.0
rev: v0.1.8
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
- repo: https://github.com/ambv/black
rev: 23.10.0
hooks:
- id: black
args: [--skip-string-normalization]
args: [ --fix, --exit-non-zero-on-fix, --preview ]
- id: ruff-format
args: [ --preview ]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.6.1
hooks:
Expand Down
27 changes: 27 additions & 0 deletions dev/provision.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,30 @@
('123')
"""
)

spark.sql(
"""
INSERT INTO default.test_table_sanitized_character
VALUES
('123')
"""
)

spark.sql(
"""
CREATE TABLE default.test_table_add_column (
a string
)
USING iceberg
"""
)

spark.sql("INSERT INTO default.test_table_add_column VALUES ('1')")

spark.sql(
"""
ALTER TABLE default.test_table_add_column ADD COLUMN b string
"""
)

spark.sql("INSERT INTO default.test_table_add_column VALUES ('2', '2')")
1 change: 1 addition & 0 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ For the FileIO there are several configuration options available:
| s3.signer | bearer | Configure the signature version of the FileIO. |
| s3.region | us-west-2 | Sets the region of the bucket |
| s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. |
| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. |

### HDFS

Expand Down
4 changes: 2 additions & 2 deletions mkdocs/docs/feature-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ The goal is that the python library will provide a functional, performant subset
| Create Table | X | X |
| Rename Table | X | X |
| Drop Table | X | X |
| Alter Table | X | |
| Set Table Properties | X | |
| Alter Table | X | X |
| Set Table Properties | X | X |
| Create Namespace | X | X |
| Drop Namespace | X | X |
| Set Namespace Properties | X | X |
Expand Down
6 changes: 3 additions & 3 deletions mkdocs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

mkdocs==1.5.3
griffe==0.38.1
jinja2==3.1.2
jinja2==3.1.3
mkdocstrings==0.24.0
mkdocstrings-python==1.7.5
mkdocstrings-python==1.8.0
mkdocs-literate-nav==0.6.1
mkdocs-autorefs==0.5.0
mkdocs-gen-files==0.5.0
mkdocs-material==9.5.2
mkdocs-material==9.5.3
mkdocs-material-extensions==1.3.1
mkdocs-section-index==0.3.8
1,538 changes: 1,069 additions & 469 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyiceberg/avro/codecs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
so don't confuse it with the Python's "codecs", which is a package mainly for
converting character sets (https://docs.python.org/3/library/codecs.html).
"""

from __future__ import annotations

from typing import Dict, Optional, Type
Expand Down
6 changes: 2 additions & 4 deletions pyiceberg/avro/codecs/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ class Codec(ABC):

@staticmethod
@abstractmethod
def compress(data: bytes) -> tuple[bytes, int]:
...
def compress(data: bytes) -> tuple[bytes, int]: ...

@staticmethod
@abstractmethod
def decompress(data: bytes) -> bytes:
...
def decompress(data: bytes) -> bytes: ...
1 change: 1 addition & 0 deletions pyiceberg/avro/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.
# pylint: disable=W0621
"""Avro reader for reading Avro files."""

from __future__ import annotations

import io
Expand Down
7 changes: 3 additions & 4 deletions pyiceberg/avro/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
The reader tree can be changed in such a way that the
read schema is different, while respecting the read schema.
"""

from __future__ import annotations

from abc import abstractmethod
Expand Down Expand Up @@ -85,12 +86,10 @@ def _skip_map_array(decoder: BinaryDecoder, skip_entry: Callable[[], None]) -> N

class Reader(Singleton):
@abstractmethod
def read(self, decoder: BinaryDecoder) -> Any:
...
def read(self, decoder: BinaryDecoder) -> Any: ...

@abstractmethod
def skip(self, decoder: BinaryDecoder) -> None:
...
def skip(self, decoder: BinaryDecoder) -> None: ...

def __repr__(self) -> str:
"""Return the string representation of the Reader class."""
Expand Down
4 changes: 1 addition & 3 deletions pyiceberg/avro/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,7 @@ def resolve_reader(
Raises:
NotImplementedError: If attempting to resolve an unrecognized object type.
"""
return visit_with_partner(
file_schema, read_schema, ReadSchemaResolver(read_types, read_enums), SchemaPartnerAccessor()
) # type: ignore
return visit_with_partner(file_schema, read_schema, ReadSchemaResolver(read_types, read_enums), SchemaPartnerAccessor()) # type: ignore


class EnumReader(Reader):
Expand Down
4 changes: 2 additions & 2 deletions pyiceberg/avro/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
Constructing a writer tree from the schema makes it easy
to decouple the writing implementation from the schema.
"""

from __future__ import annotations

from abc import abstractmethod
Expand All @@ -43,8 +44,7 @@
@dataclass(frozen=True)
class Writer(Singleton):
@abstractmethod
def write(self, encoder: BinaryEncoder, val: Any) -> Any:
...
def write(self, encoder: BinaryEncoder, val: Any) -> Any: ...

def __repr__(self) -> str:
"""Return string representation of this object."""
Expand Down
46 changes: 44 additions & 2 deletions pyiceberg/catalog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from __future__ import annotations

import logging
import re
import uuid
from abc import ABC, abstractmethod
from dataclasses import dataclass
Expand Down Expand Up @@ -74,6 +75,17 @@
LOCATION = "location"
EXTERNAL_TABLE = "EXTERNAL_TABLE"

TABLE_METADATA_FILE_NAME_REGEX = re.compile(
r"""
(\d+) # version number
- # separator
([\w-]{36}) # UUID (36 characters, including hyphens)
(?:\.\w+)? # optional codec name
\.metadata\.json # file extension
""",
re.X,
)


class CatalogType(Enum):
REST = "rest"
Expand Down Expand Up @@ -587,8 +599,38 @@ def _write_metadata(metadata: TableMetadata, io: FileIO, metadata_path: str) ->
ToOutputFile.table_metadata(metadata, io.new_output(metadata_path))

@staticmethod
def _get_metadata_location(location: str) -> str:
return f"{location}/metadata/00000-{uuid.uuid4()}.metadata.json"
def _get_metadata_location(location: str, new_version: int = 0) -> str:
if new_version < 0:
raise ValueError(f"Table metadata version: `{new_version}` must be a non-negative integer")
version_str = f"{new_version:05d}"
return f"{location}/metadata/{version_str}-{uuid.uuid4()}.metadata.json"

@staticmethod
def _parse_metadata_version(metadata_location: str) -> int:
"""Parse the version from the metadata location.
The version is the first part of the file name, before the first dash.
For example, the version of the metadata file
`s3://bucket/db/tb/metadata/00001-6c97e413-d51b-4538-ac70-12fe2a85cb83.metadata.json`
is 1.
If the path does not comply with the pattern, the version is defaulted to be -1, ensuring
that the next metadata file is treated as having version 0.
Args:
metadata_location (str): The location of the metadata file.
Returns:
int: The version of the metadata file. -1 if the file name does not have valid version string
"""
file_name = metadata_location.split("/")[-1]
if file_name_match := TABLE_METADATA_FILE_NAME_REGEX.fullmatch(file_name):
try:
uuid.UUID(file_name_match.group(2))
except ValueError:
return -1
return int(file_name_match.group(1))
else:
return -1

def _get_updated_props_and_update_summary(
self, current_properties: Properties, removals: Optional[Set[str]], updates: Properties
Expand Down
Loading

0 comments on commit a25c1b7

Please sign in to comment.