From ca6963958507c8e99d5382b4c5076606eb47d51d Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 1 Apr 2025 11:25:08 -0400 Subject: [PATCH] s3fs configurations --- pyproject.toml | 2 +- quadfeather/tiler.py | 6 +++--- tests/test_s3.py | 10 ++++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 900891d..d1f0a72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "quadfeather" -version = "2.2.0" +version = "2.3.0" description = "Quadtree tiling from CSV/Apache Arrow for use with deepscatter in the browser." readme = "README.md" requires-python = ">=3.9" diff --git a/quadfeather/tiler.py b/quadfeather/tiler.py index 8dd87f5..0dc7e9c 100644 --- a/quadfeather/tiler.py +++ b/quadfeather/tiler.py @@ -463,7 +463,7 @@ def __init__( # Easier file system handling methods def write_feather(self, write_path: Path, table: pa.Table, compression: Literal["zstd", "uncompressed"] = "zstd"): - with self.filesystem.open_output_stream(write_path.as_posix()) as f: + with self.filesystem.open_output_stream(write_path.as_posix(), metadata=None) as f: feather.write_feather(table, f, compression=compression) def read_feather(self, read_path: Path, columns: Optional[List[str]] = None) -> pa.Table: @@ -713,7 +713,7 @@ def __init__( def write_batch_to_filehandle(self, path: Path, batch: pa.Table): if not path in self._open_filehandles: - open_filehandle = self.quadtree.filesystem.open_output_stream(path.as_posix()) + open_filehandle = self.quadtree.filesystem.open_output_stream(path.as_posix(), metadata=None) if path.suffix == ".feather": self._open_filehandles[path] = ipc.new_file(open_filehandle, schema=batch.schema) elif path.suffix == ".parquet": @@ -1399,7 +1399,7 @@ def overflow_buffers(self): p = self.overflow_loc if k != "": p = p.with_suffix(f".{k}.arrow") - self._sinks[k] = self.quadtree.filesystem.open_output_stream(p.as_posix()) + self._sinks[k] = self.quadtree.filesystem.open_output_stream(p.as_posix(), metadata=None) self._overflow_writers[k] = pa.ipc.new_file( self._sinks[k], self.quadtree.schemas[k] ) diff --git a/tests/test_s3.py b/tests/test_s3.py index b1131ae..1d590dc 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -1,16 +1,18 @@ from pyarrow import fs from quadfeather.tiler import * from pathlib import Path +import s3fs import pytest + def get_s3_filesystem(region: str = "us-east-2"): - return fs.S3FileSystem( - region=region, - ) + filesystem = s3fs.S3FileSystem(s3_additional_kwargs={'ServerSideEncryption': 'AES256'}, client_kwargs={'region_name': region}) + filesystem = fs.FSSpecHandler(filesystem) + return fs.PyFileSystem(filesystem) @pytest.mark.skip(reason="This is a slow test that requires a real S3 bucket.") -def test_s3_filesystem(bucket_name: str,NUM_POINTS=100_000, TILE_SIZE=10_000): +def test_s3_filesystem(bucket_name: str, NUM_POINTS=100_000, TILE_SIZE=10_000): fs = get_s3_filesystem() basedir = Path(bucket_name) / "tiles" print("Creating test data...")