Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from __future__ import annotations

import itertools
import os
import uuid
import warnings
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -1788,19 +1787,20 @@ def refresh(self) -> Table:

@classmethod
def _metadata_location_from_version_hint(cls, metadata_location: str, properties: Properties = EMPTY_DICT) -> str:
version_hint_location = os.path.join(metadata_location, "metadata", "version-hint.text")
metadata_dir = f"{metadata_location.rstrip('/')}/metadata"
version_hint_location = f"{metadata_dir}/version-hint.text"
io = load_file_io(properties=properties, location=version_hint_location)
file = io.new_input(version_hint_location)

with file.open() as stream:
content = stream.read().decode("utf-8")

if content.endswith(".metadata.json"):
return os.path.join(metadata_location, "metadata", content)
return f"{metadata_dir}/{content}"
elif content.isnumeric():
return os.path.join(metadata_location, "metadata", f"v{content}.metadata.json")
return f"{metadata_dir}/v{content}.metadata.json"
else:
return os.path.join(metadata_location, "metadata", f"{content}.metadata.json")
return f"{metadata_dir}/{content}.metadata.json"

@classmethod
def from_metadata(cls, metadata_location: str, properties: Properties = EMPTY_DICT) -> StaticTable:
Expand Down
49 changes: 49 additions & 0 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# under the License.
# pylint:disable=redefined-outer-name
import json
import ntpath
import os
import uuid
from copy import copy
from typing import Any
Expand Down Expand Up @@ -589,6 +591,53 @@ def test_static_table_version_hint_same_as_table(
assert static_table.metadata == table_v2.metadata


@pytest.mark.parametrize(
"version_hint_content, expected_metadata_file",
[
("v3.metadata.json", "v3.metadata.json"),
("3", "v3.metadata.json"),
("some-uuid", "some-uuid.metadata.json"),
],
)
def test_static_table_version_hint_location_uses_forward_slashes(
version_hint_content: str, expected_metadata_file: str, monkeypatch: pytest.MonkeyPatch
) -> None:
# Iceberg locations are URIs and must always be forward-slash separated, regardless of the
# host OS. Swapping in the Windows path join makes an OS-separator join emit backslashes, so
# this asserts the resolved metadata location stays a valid forward-slash URI on any platform.
monkeypatch.setattr(os.path, "join", ntpath.join)

table_root = "s3://warehouse/wh/nyc.db/taxis"
requested_locations: list[str] = []

class _FakeStream:
def __enter__(self) -> "_FakeStream":
return self

def __exit__(self, *args: Any) -> None:
return None

def read(self, size: int = 0) -> bytes:
return version_hint_content.encode("utf-8")

class _FakeInputFile:
def open(self, seekable: bool = True) -> "_FakeStream":
return _FakeStream()

class _FakeFileIO:
def new_input(self, location: str) -> "_FakeInputFile":
requested_locations.append(location)
return _FakeInputFile()

monkeypatch.setattr("pyiceberg.table.load_file_io", lambda *args, **kwargs: _FakeFileIO())

resolved_location = StaticTable._metadata_location_from_version_hint(table_root)

assert requested_locations == [f"{table_root}/metadata/version-hint.text"]
assert resolved_location == f"{table_root}/metadata/{expected_metadata_file}"
assert "\\" not in resolved_location


def test_static_table_io_does_not_exist(metadata_location: str) -> None:
with pytest.raises(ValueError):
StaticTable.from_metadata(metadata_location, {PY_IO_IMPL: "pyiceberg.does.not.exist.FileIO"})
Expand Down