From a3638e1196b0240bad75bf61e917a29cb6bd17aa Mon Sep 17 00:00:00 2001 From: June Kim Date: Mon, 11 May 2026 21:12:55 -0700 Subject: [PATCH] [pytest] add read_table() and log_files() to DeltaTestLocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DeltaTestLocation.row_count() was sufficient for basic validation, but tests that need to inspect table contents or verify file-level behavior require richer access. Add two methods: * read_table(missing_ok=False) → pyarrow.Table | None Returns the current snapshot as a pyarrow Table. Useful for row-level assertions. * log_files(missing_ok=False) → list[str] Returns the list of parquet file URIs in the current snapshot. Useful for verifying compaction, file count, and storage layout. Both follow the existing pattern: deferred deltalake import to avoid aarch64 import crashes, and missing_ok flag to probe caches. Fixes #6135 Signed-off-by: June Kim Signed-off-by: June Kim --- python/tests/unit/test_delta_test_location.py | 104 ++++++++++++++++++ python/tests/utils.py | 44 ++++++++ 2 files changed, 148 insertions(+) create mode 100644 python/tests/unit/test_delta_test_location.py diff --git a/python/tests/unit/test_delta_test_location.py b/python/tests/unit/test_delta_test_location.py new file mode 100644 index 00000000000..dedfbd3d32a --- /dev/null +++ b/python/tests/unit/test_delta_test_location.py @@ -0,0 +1,104 @@ +"""Unit tests for DeltaTestLocation helper methods.""" + +import pytest +from tests import skip_on_arm64 +from tests.utils import DeltaTestLocation + + +@skip_on_arm64 +def test_read_table_missing_ok(): + """read_table with missing_ok=True returns None for non-existent table.""" + loc = DeltaTestLocation.create("test_read_table_missing", mode="snapshot") + try: + result = loc.read_table(missing_ok=True) + assert result is None + finally: + loc.cleanup() + + +@skip_on_arm64 +def test_read_table_raises_on_missing(): + """read_table raises TableNotFoundError when table doesn't exist.""" + from deltalake.exceptions import TableNotFoundError + + loc = DeltaTestLocation.create("test_read_table_raises", mode="snapshot") + try: + with pytest.raises(TableNotFoundError): + loc.read_table(missing_ok=False) + finally: + loc.cleanup() + + +@skip_on_arm64 +def test_log_files_missing_ok(): + """log_files with missing_ok=True returns empty list for non-existent table.""" + loc = DeltaTestLocation.create("test_log_files_missing", mode="snapshot") + try: + result = loc.log_files(missing_ok=True) + assert result == [] + finally: + loc.cleanup() + + +@skip_on_arm64 +def test_log_files_raises_on_missing(): + """log_files raises TableNotFoundError when table doesn't exist.""" + from deltalake.exceptions import TableNotFoundError + + loc = DeltaTestLocation.create("test_log_files_raises", mode="snapshot") + try: + with pytest.raises(TableNotFoundError): + loc.log_files(missing_ok=False) + finally: + loc.cleanup() + + +@skip_on_arm64 +def test_read_table_returns_pyarrow(): + """read_table returns a pyarrow Table when table exists.""" + from deltalake import write_deltalake + import pyarrow as pa + + loc = DeltaTestLocation.create("test_read_table_pyarrow", mode="snapshot") + try: + # Create a simple delta table + data = pa.table({"id": [1, 2, 3], "value": ["a", "b", "c"]}) + write_deltalake( + loc.uri, data, mode="overwrite", storage_options=loc.delta_storage_options() + ) + + # Read it back + result = loc.read_table() + assert result is not None + assert isinstance(result, pa.Table) + assert result.num_rows == 3 + assert "id" in result.column_names + assert "value" in result.column_names + finally: + loc.cleanup() + + +@skip_on_arm64 +def test_log_files_returns_file_list(): + """log_files returns a list of file URIs when table exists.""" + from deltalake import write_deltalake + import pyarrow as pa + + loc = DeltaTestLocation.create("test_log_files_list", mode="snapshot") + try: + # Create a simple delta table + data = pa.table({"id": [1, 2, 3]}) + write_deltalake( + loc.uri, data, mode="overwrite", storage_options=loc.delta_storage_options() + ) + + # Get file list + result = loc.log_files() + assert isinstance(result, list) + assert len(result) > 0 + # Each entry should be a file URI + for file_uri in result: + assert isinstance(file_uri, str) + assert ".parquet" in file_uri.lower() + finally: + loc.cleanup() diff --git a/python/tests/utils.py b/python/tests/utils.py index 300d5dc9221..5adbd4c17fd 100644 --- a/python/tests/utils.py +++ b/python/tests/utils.py @@ -146,6 +146,50 @@ def row_count(self, missing_ok: bool = False) -> int: raise return dt.count() + def read_table(self, missing_ok: bool = False): + """Return the Delta table as a pyarrow Table. + + The `deltalake` import is deferred to here so that module-level + test collection does not crash on aarch64 hosts where the wheel + aborts on import; tests that need this method gate themselves + with `@skip_on_arm64`. + + :param missing_ok: When True, return ``None`` if the table does not + exist instead of raising. + """ + from deltalake import DeltaTable + from deltalake.exceptions import TableNotFoundError + + try: + dt = DeltaTable(self.uri, storage_options=self.delta_storage_options()) + except TableNotFoundError: + if missing_ok: + return None + raise + return dt.to_pyarrow_table() + + def log_files(self, missing_ok: bool = False) -> list[str]: + """Return the list of data files in the current Delta snapshot. + + The `deltalake` import is deferred to here so that module-level + test collection does not crash on aarch64 hosts where the wheel + aborts on import; tests that need this method gate themselves + with `@skip_on_arm64`. + + :param missing_ok: When True, return an empty list if the table does + not exist instead of raising. + """ + from deltalake import DeltaTable + from deltalake.exceptions import TableNotFoundError + + try: + dt = DeltaTable(self.uri, storage_options=self.delta_storage_options()) + except TableNotFoundError: + if missing_ok: + return [] + raise + return dt.file_uris() + def cleanup(self) -> None: """Remove the local temp directory, if any.