diff --git a/packages/evo-colormaps/tests/data/category_colormap_response.json b/packages/evo-colormaps/tests/data/category_colormap_response.json index 88625a87..5c4a7c6d 100644 --- a/packages/evo-colormaps/tests/data/category_colormap_response.json +++ b/packages/evo-colormaps/tests/data/category_colormap_response.json @@ -24,5 +24,5 @@ "modified_by": "00000000-0000-0000-0000-000000000010", "name": "category colormap 1", "schema": "category", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000008" + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000008" } diff --git a/packages/evo-colormaps/tests/data/colormap_association_collection_response.json b/packages/evo-colormaps/tests/data/colormap_association_collection_response.json index 62e5521b..2092aa79 100644 --- a/packages/evo-colormaps/tests/data/colormap_association_collection_response.json +++ b/packages/evo-colormaps/tests/data/colormap_association_collection_response.json @@ -3,37 +3,37 @@ { "attribute_id": "a fairly unique ID", "colormap_id": "00000000-0000-0000-0000-000000000006", - "colormap_uri": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000006", + "colormap_uri": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000006", "created_at": "2024-09-16 01:30:00", "created_by": "00000000-0000-0000-0000-000000000010", "id": "00000000-0000-0000-0000-00000000001e", "modified_at": "2024-09-16 01:30:00", "modified_by": "00000000-0000-0000-0000-000000000010", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/00000000-0000-0000-0000-000000000014/associations/00000000-0000-0000-0000-00000000001e", + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/00000000-0000-0000-0000-000000000014/associations/00000000-0000-0000-0000-00000000001e", "workspace_id": "00000000-0000-0000-0000-00000000162e" }, { "attribute_id": "00000000-0000-0000-0000-000000000002", "colormap_id": "00000000-0000-0000-0000-000000000007", - "colormap_uri": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000007", + "colormap_uri": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000007", "created_at": "2024-09-16 01:30:00", "created_by": "00000000-0000-0000-0000-000000000010", "id": "00000000-0000-0000-0000-00000000001f", "modified_at": "2024-09-16 01:30:00", "modified_by": "00000000-0000-0000-0000-000000000010", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/00000000-0000-0000-0000-000000000014/associations/00000000-0000-0000-0000-00000000001f", + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/00000000-0000-0000-0000-000000000014/associations/00000000-0000-0000-0000-00000000001f", "workspace_id": "00000000-0000-0000-0000-00000000162e" }, { "attribute_id": "another fairly unique ID", "colormap_id": "00000000-0000-0000-0000-000000000008", - "colormap_uri": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000008", + "colormap_uri": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000008", "created_at": "2024-09-16 01:30:00", "created_by": "00000000-0000-0000-0000-000000000010", "id": "00000000-0000-0000-0000-000000000020", "modified_at": "2024-09-16 01:30:00", "modified_by": "00000000-0000-0000-0000-000000000010", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/00000000-0000-0000-0000-000000000014/associations/00000000-0000-0000-0000-000000000020", + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/00000000-0000-0000-0000-000000000014/associations/00000000-0000-0000-0000-000000000020", "workspace_id": "00000000-0000-0000-0000-00000000162e" } ] diff --git a/packages/evo-colormaps/tests/data/colormap_association_response.json b/packages/evo-colormaps/tests/data/colormap_association_response.json index cd6b1a3d..639ab41e 100644 --- a/packages/evo-colormaps/tests/data/colormap_association_response.json +++ b/packages/evo-colormaps/tests/data/colormap_association_response.json @@ -1,12 +1,12 @@ { "attribute_id": "a very unique ID", "colormap_id": "00000000-0000-0000-0000-000000000006", - "colormap_uri": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000006", + "colormap_uri": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000006", "created_at": "2024-09-16 01:30:00", "created_by": "00000000-0000-0000-0000-000000000010", "id": "00000000-0000-0000-0000-00000000001e", "modified_at": "2024-09-16 01:30:00", "modified_by": "00000000-0000-0000-0000-000000000010", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/00000000-0000-0000-0000-000000000014/associations/00000000-0000-0000-0000-00000000001e", + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/00000000-0000-0000-0000-000000000014/associations/00000000-0000-0000-0000-00000000001e", "workspace_id": "00000000-0000-0000-0000-00000000162e" } \ No newline at end of file diff --git a/packages/evo-colormaps/tests/data/colormap_collection_response.json b/packages/evo-colormaps/tests/data/colormap_collection_response.json index 10858592..2740fe4a 100644 --- a/packages/evo-colormaps/tests/data/colormap_collection_response.json +++ b/packages/evo-colormaps/tests/data/colormap_collection_response.json @@ -35,7 +35,7 @@ "modified_by": "00000000-0000-0000-0000-000000000010", "name": "continuous colormap 1", "schema": "continuous", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000006" + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000006" }, { "colors": [ @@ -70,7 +70,7 @@ "modified_by": "00000000-0000-0000-0000-000000000010", "name": "discrete colormap 1", "schema": "discrete", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000007" + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000007" }, { "colors": [ @@ -102,7 +102,7 @@ "modified_by": "00000000-0000-0000-0000-000000000010", "name": "category colormap 1", "schema": "category", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000008" + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000008" } ] } diff --git a/packages/evo-colormaps/tests/data/continuous_colormap_response.json b/packages/evo-colormaps/tests/data/continuous_colormap_response.json index 9ce4f1e0..8133d47e 100644 --- a/packages/evo-colormaps/tests/data/continuous_colormap_response.json +++ b/packages/evo-colormaps/tests/data/continuous_colormap_response.json @@ -33,5 +33,5 @@ "modified_by": "00000000-0000-0000-0000-000000000010", "name": "continuous colormap 1", "schema": "continuous", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000006" + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000006" } diff --git a/packages/evo-colormaps/tests/data/discrete_colormap_response.json b/packages/evo-colormaps/tests/data/discrete_colormap_response.json index 2b021962..f99ed11c 100644 --- a/packages/evo-colormaps/tests/data/discrete_colormap_response.json +++ b/packages/evo-colormaps/tests/data/discrete_colormap_response.json @@ -28,5 +28,5 @@ "modified_by": "00000000-0000-0000-0000-000000000010", "name": "discrete colormap 1", "schema": "discrete", - "self_link": "http://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000007" + "self_link": "https://unittest.localhost/colormap/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/colormaps/00000000-0000-0000-0000-000000000007" } diff --git a/packages/evo-files/tests/data/get_file.json b/packages/evo-files/tests/data/get_file.json index f9f84a31..c16002dd 100644 --- a/packages/evo-files/tests/data/get_file.json +++ b/packages/evo-files/tests/data/get_file.json @@ -11,12 +11,12 @@ "name": "x y", "email": "test@example.com" }, - "download": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", + "download": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", "etag": "", "file_id": "00000000-0000-0000-0000-000000000006", "name": "points.csv", "path": "/", - "self": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", + "self": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", "size": 10, "version_id": "1", "versions": [ @@ -28,7 +28,7 @@ "email": "test@example.com" }, "file_id": "00000000-0000-0000-0000-000000000006", - "link": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", + "link": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", "name": "points.csv", "path": "/", "size": 0, diff --git a/packages/evo-files/tests/data/get_file_long_name.json b/packages/evo-files/tests/data/get_file_long_name.json index cfdcef93..611f87a0 100644 --- a/packages/evo-files/tests/data/get_file_long_name.json +++ b/packages/evo-files/tests/data/get_file_long_name.json @@ -11,12 +11,12 @@ "name": "x y", "email": "test@example.com" }, - "download": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", + "download": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", "etag": "", "file_id": "00000000-0000-0000-0000-000000000006", "name": "pointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspointspoints.csv", "path": "/", - "self": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", + "self": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", "size": 10, "version_id": "1", "versions": [ @@ -28,7 +28,7 @@ "email": "test@example.com" }, "file_id": "00000000-0000-0000-0000-000000000006", - "link": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", + "link": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", "name": "points.csv", "path": "/", "size": 0, diff --git a/packages/evo-files/tests/data/list_files_0.json b/packages/evo-files/tests/data/list_files_0.json index 2cfaf500..b5a4c537 100644 --- a/packages/evo-files/tests/data/list_files_0.json +++ b/packages/evo-files/tests/data/list_files_0.json @@ -20,7 +20,7 @@ }, "etag": "", "links": { - "self": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/A/m.json" + "self": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/A/m.json" }, "size": 11 }, @@ -43,14 +43,14 @@ }, "etag": "", "links": { - "self": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/A/n.json" + "self": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/A/n.json" }, "size": 12 } ], "limit": 5000, "links": { - "next": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files?path=pointset/&limit=2&offset=2", + "next": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files?path=pointset/&limit=2&offset=2", "prev": null, "self": "" }, diff --git a/packages/evo-files/tests/data/list_files_1.json b/packages/evo-files/tests/data/list_files_1.json index 044a4bc8..2f45c0ed 100644 --- a/packages/evo-files/tests/data/list_files_1.json +++ b/packages/evo-files/tests/data/list_files_1.json @@ -20,7 +20,7 @@ }, "etag": "", "links": { - "self": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/B/o.json" + "self": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/B/o.json" }, "size": 13 } @@ -28,7 +28,7 @@ "limit": 5000, "links": { "next": null, - "prev": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files?limit=2&offset=0", + "prev": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files?limit=2&offset=0", "self": "" }, "offset": 0, diff --git a/packages/evo-files/tests/data/list_versions.json b/packages/evo-files/tests/data/list_versions.json index b78b3ab5..27d5b084 100644 --- a/packages/evo-files/tests/data/list_versions.json +++ b/packages/evo-files/tests/data/list_versions.json @@ -11,12 +11,12 @@ "name": "x y", "email": "test@example.com" }, - "download": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", + "download": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", "etag": "", "file_id": "00000000-0000-0000-0000-000000000006", "name": "points.csv", "path": "/", - "self": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", + "self": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", "size": 14, "version_id": "1", "versions": [ @@ -28,7 +28,7 @@ "email": "test@example.com" }, "file_id": "00000000-0000-0000-0000-000000000006", - "link": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", + "link": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", "name": "points.csv", "path": "/", "size": 14, @@ -42,7 +42,7 @@ "email": "test@example.com" }, "file_id": "00000000-0000-0000-0000-000000000006", - "link": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", + "link": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", "name": "points.csv", "path": "/", "size": 15, @@ -56,7 +56,7 @@ "email": "test@example.com" }, "file_id": "00000000-0000-0000-0000-000000000006", - "link": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", + "link": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points.csv", "name": "points.csv", "path": "/", "size": 16, diff --git a/packages/evo-files/tests/data/update_file.json b/packages/evo-files/tests/data/update_file.json index 364a53ca..72ad018d 100644 --- a/packages/evo-files/tests/data/update_file.json +++ b/packages/evo-files/tests/data/update_file.json @@ -1,5 +1,5 @@ { "file_id": "00000000-0000-0000-0000-000000000005", - "upload": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/00000000-0000-0000-0000-000000000005", + "upload": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/00000000-0000-0000-0000-000000000005", "version_id": "1" } \ No newline at end of file diff --git a/packages/evo-files/tests/data/upsert_file.json b/packages/evo-files/tests/data/upsert_file.json index 183db42e..bb45d26d 100644 --- a/packages/evo-files/tests/data/upsert_file.json +++ b/packages/evo-files/tests/data/upsert_file.json @@ -1,5 +1,5 @@ { "file_id": "00000000-0000-0000-0000-000000000005", - "upload": "http://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", + "upload": "https://unittest.localhost/path/file/v2/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/files/path/points_0.csv", "version_id": "1" } \ No newline at end of file diff --git a/packages/evo-objects/docs/examples/quickstart.ipynb b/packages/evo-objects/docs/examples/quickstart.ipynb index 70b63392..fb1ef3e0 100644 --- a/packages/evo-objects/docs/examples/quickstart.ipynb +++ b/packages/evo-objects/docs/examples/quickstart.ipynb @@ -44,7 +44,7 @@ "environment = manager.get_environment()\n", "connector = manager.get_connector()\n", "\n", - "object_client = ObjectAPIClient(environment, connector)\n", + "object_client = ObjectAPIClient(environment, connector, manager.cache) # Cache is optional\n", "service_health = await object_client.get_service_health()\n", "\n", "print(f\"Object API is {service_health.status.name.lower()}\")\n", @@ -267,16 +267,6 @@ "\n", "downloaded_object = await object_client.download_object_by_path(\"sdk/v2/sample-pointset.json\")\n", "metadata = downloaded_object.metadata\n", - "downloaded_dict = downloaded_object.as_dict()\n", - "print(downloaded_dict)\n", - "\n", - "# Use the data client to download the parquet data.\n", - "downloaded_data = await data_client.download_table(\n", - " object_id=metadata.id,\n", - " version_id=metadata.version_id,\n", - " table_info=downloaded_dict[\"locations\"][\"coordinates\"],\n", - " fb=FeedbackWidget(\"Downloading data\"),\n", - ")\n", "\n", "if metadata.created_by is not None and metadata.created_by.name is not None:\n", " accreditation = f\"{metadata.created_by.name}\"\n", @@ -284,10 +274,92 @@ " accreditation = \"an unknown user\"\n", "created_at_str = metadata.created_at.astimezone().strftime(\"on %Y-%m-%d at %H:%M:%S\")\n", "print(f\"{metadata.path} :: uploaded by {accreditation} {created_at_str}\")\n", - "print(downloaded_dict)\n", + "\n", + "# Downloaded object supports JMESPath queries for more expressive access to JSON data.\n", + "print(\n", + " downloaded_object.search( # Project only a few fields for display\n", + " \"\"\"\n", + " {\n", + " name: name,\n", + " uuid: uuid,\n", + " schema: schema,\n", + " coordinate_reference_system: coordinate_reference_system,\n", + " bounding_box: {\n", + " min: [bounding_box.min_x, bounding_box.min_y, bounding_box.min_z],\n", + " max: [bounding_box.max_x, bounding_box.max_y, bounding_box.max_z]\n", + " }\n", + " }\n", + " \"\"\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from evo.objects import DownloadedObject, ObjectReference\n", + "\n", + "# If you already know the object you want, you don't even need the object client\n", + "ref = ObjectReference.new(\n", + " environment=manager.get_environment(),\n", + " object_path=\"sdk/v2/sample-pointset.json\",\n", + " version_id=metadata.version_id, # The version ID is optional\n", + ")\n", + "print(\"ObjectReference URL:\", ref) # An object reference can also be a string in the format printed here.\n", + "downloaded_object = await DownloadedObject.from_reference(\n", + " connector=manager.get_connector(),\n", + " reference=ref,\n", + " cache=manager.cache,\n", + ")\n", + "downloaded_object.search(\"@\") # Pretty-print the entire object via a JMESPath proxy object" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download parquet data from a pointset object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "table_info = downloaded_object.search(\"locations.coordinates\") # Use a JMESPath expression to find the table info\n", + "\n", + "# Download parquet data by table info reference\n", + "downloaded_data = await downloaded_object.download_table(table_info, fb=FeedbackWidget(\"Downloading pyarrow.Table\"))\n", + "\n", + "# OR you can just use the JMESPath expression in the download_table call directly\n", + "downloaded_data = await downloaded_object.download_table(\"locations.coordinates\")\n", "print(downloaded_data)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# A similar interface can be used to download as a pandas DataFrame\n", + "await downloaded_object.download_dataframe(\"locations.coordinates\", fb=FeedbackWidget(\"Downloading pandas.DataFrame\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# And as a NumPy array\n", + "await downloaded_object.download_array(\"locations.coordinates\", fb=FeedbackWidget(\"Downloading numpy.ndarray\"))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -324,7 +396,7 @@ "stages = await object_client.list_stages()\n", "\n", "for stage in stages:\n", - " print(f\"{stage.name} ({stage.stage_id})\")" + " print(f\"{stage.name} ({stage.id})\")" ] }, { @@ -344,7 +416,7 @@ "metadata = downloaded_object.metadata\n", "print(f\"Current stage: {metadata}\")\n", "\n", - "await object_client.set_stage(metadata.id, version_id=metadata.version_id, stage_id=stages[1].stage_id)\n", + "await object_client.set_stage(metadata.id, version_id=metadata.version_id, stage_id=stages[1].id)\n", "\n", "updated_metadata = await object_client.download_object_by_id(metadata.id)\n", "print(f\"Updated stage: {updated_metadata.metadata.stage}\")" @@ -362,7 +434,7 @@ ], "metadata": { "kernelspec": { - "display_name": "venv", + "display_name": "evo-sdk", "language": "python", "name": "python3" }, diff --git a/packages/evo-objects/pyproject.toml b/packages/evo-objects/pyproject.toml index 335223de..722ff02d 100644 --- a/packages/evo-objects/pyproject.toml +++ b/packages/evo-objects/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "evo-objects" description = "Python SDK for using the Seequent Evo Geoscience Object API" -version = "0.2.3" +version = "0.3.0" requires-python = ">=3.10" license-files = ["LICENSE.md"] dynamic = ["readme"] @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ - "evo-sdk-common>=0.1.0", + "evo-sdk-common[jmespath]>=0.5.4", "pydantic>=2,<3", ] @@ -21,9 +21,9 @@ Homepage = "https://www.seequent.com/" Documentation = "https://developer.seequent.com/" [project.optional-dependencies] -aiohttp = ["evo-sdk-common[aiohttp]>=0.1.0"] -notebooks = ["evo-sdk-common[notebooks]>=0.1.0"] -utils = ["pyarrow", "pandas"] +aiohttp = ["evo-sdk-common[aiohttp]"] +notebooks = ["evo-sdk-common[notebooks]"] +utils = ["pyarrow", "pyarrow-stubs", "pandas", "numpy"] [dependency-groups] # Dev dependencies. The version is left unspecified so the latest is installed. diff --git a/packages/evo-objects/src/evo/objects/__init__.py b/packages/evo-objects/src/evo/objects/__init__.py index e81312be..dba64df8 100644 --- a/packages/evo-objects/src/evo/objects/__init__.py +++ b/packages/evo-objects/src/evo/objects/__init__.py @@ -10,7 +10,7 @@ # limitations under the License. from .client import DownloadedObject, ObjectAPIClient -from .data import ObjectMetadata, ObjectSchema, ObjectVersion, SchemaVersion, Stage +from .data import ObjectMetadata, ObjectReference, ObjectSchema, ObjectVersion, SchemaVersion, Stage from .io import ObjectDataDownload, ObjectDataUpload __all__ = [ @@ -19,6 +19,7 @@ "ObjectDataDownload", "ObjectDataUpload", "ObjectMetadata", + "ObjectReference", "ObjectSchema", "ObjectVersion", "SchemaVersion", diff --git a/packages/evo-objects/src/evo/objects/client/__init__.py b/packages/evo-objects/src/evo/objects/client/__init__.py new file mode 100644 index 00000000..65670d4c --- /dev/null +++ b/packages/evo-objects/src/evo/objects/client/__init__.py @@ -0,0 +1,15 @@ +# Copyright © 2025 Bentley Systems, Incorporated +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .api_client import ObjectAPIClient +from .object_client import DownloadedObject + +__all__ = ["DownloadedObject", "ObjectAPIClient"] diff --git a/packages/evo-objects/src/evo/objects/client.py b/packages/evo-objects/src/evo/objects/client/api_client.py similarity index 69% rename from packages/evo-objects/src/evo/objects/client.py rename to packages/evo-objects/src/evo/objects/client/api_client.py index 93152122..a71bd452 100644 --- a/packages/evo-objects/src/evo/objects/client.py +++ b/packages/evo-objects/src/evo/objects/client/api_client.py @@ -11,110 +11,46 @@ from __future__ import annotations -from collections.abc import AsyncIterator, Iterator, Sequence -from pathlib import PurePosixPath +from collections.abc import AsyncIterator, Sequence from uuid import UUID from evo import logging from evo.common import APIConnector, BaseAPIClient, HealthCheckType, ICache, Page, ServiceHealth from evo.common.data import EmptyResponse, Environment, OrderByOperatorEnum -from evo.common.io.exceptions import DataNotFoundError from evo.common.utils import get_service_health, parse_order_by -from evo.workspaces import ServiceUser - -from .data import ObjectMetadata, ObjectOrderByEnum, ObjectSchema, ObjectVersion, OrgObjectMetadata, Stage -from .endpoints import MetadataApi, ObjectsApi, StagesApi -from .endpoints.models import ( - GeoscienceObject, - GeoscienceObjectVersion, - GetObjectResponse, - ListedObject, - MetadataUpdateBody, - OrgListedObject, - PostObjectResponse, - UpdateGeoscienceObject, -) -from .exceptions import ObjectUUIDError -from .io import ObjectDataDownload, ObjectDataUpload -from .utils import ObjectDataClient -logger = logging.getLogger("object.client") - -__all__ = [ - "DownloadedObject", - "ObjectAPIClient", -] - - -def _version_from_listed_version(model: GeoscienceObjectVersion) -> ObjectVersion: - """Create an ObjectVersion instance from a generated ListedObject model. - - :param model: The model to create the ObjectVersion instance from. - - :return: An ObjectVersion instance. - """ - created_by = None if model.created_by is None else ServiceUser.from_model(model.created_by) # type: ignore - stage = None if model.stage is None else Stage.from_model(model.stage) - return ObjectVersion( - version_id=model.version_id, - created_at=model.created_at, - created_by=created_by, - stage=stage, - ) - - -class DownloadedObject: - """A downloaded geoscience object.""" - - def __init__( - self, object_: GeoscienceObject, metadata: ObjectMetadata, urls_by_name: dict[str, str], connector: APIConnector - ) -> None: - self._object = object_ - self._metadata = metadata - self._urls_by_name = urls_by_name - self._connector = connector - - @property - def schema(self) -> ObjectSchema: - """The schema of the object.""" - return self._metadata.schema_id - - @property - def metadata(self) -> ObjectMetadata: - """The metadata of the object.""" - return self._metadata - - def as_dict(self) -> dict: - """Get this object as a dictionary.""" - return self._object.model_dump(mode="python", by_alias=True) - - def prepare_data_download(self, data_identifiers: Sequence[str | UUID]) -> Iterator[ObjectDataDownload]: - """Prepare to download multiple data files from the geoscience object service, for this object. - - Any data IDs that are not associated with the requested object will raise a DataNotFoundError. - - :param data_identifiers: A list of sha256 digests or UUIDs for the data to be downloaded. +from ..data import ObjectMetadata, ObjectOrderByEnum, ObjectReference, ObjectVersion, OrgObjectMetadata, Stage +from ..endpoints import MetadataApi, ObjectsApi, StagesApi +from ..endpoints.models import GeoscienceObject, MetadataUpdateBody, UpdateGeoscienceObject +from ..exceptions import ObjectUUIDError +from ..io import ObjectDataDownload, ObjectDataUpload +from . import parse +from .object_client import DownloadedObject + +try: + from ..utils import ObjectDataClient +except ImportError: + _DATA_CLIENT_AVAILABLE = False +else: + _DATA_CLIENT_AVAILABLE = True - :return: An iterator of data download contexts that can be used to download the data. +logger = logging.getLogger("object.client") - :raises DataNotFoundError: If any requested data ID is not associated with this object. - """ - try: - filtered_urls_by_name = {str(name): self._urls_by_name[str(name)] for name in data_identifiers} - except KeyError as exc: - raise DataNotFoundError(f"Unable to find the requested data: {exc.args[0]}") from exc - for ctx in ObjectDataDownload._create_multiple( - connector=self._connector, metadata=self._metadata, urls_by_name=filtered_urls_by_name - ): - yield ctx +__all__ = ["ObjectAPIClient"] class ObjectAPIClient(BaseAPIClient): - def __init__(self, environment: Environment, connector: APIConnector) -> None: + def __init__(self, environment: Environment, connector: APIConnector, cache: ICache | None = None) -> None: + """ + :param environment: The target Evo environment, providing org and workspace IDs. + :param connector: The API connector to use for making API calls. + :param cache: An optional cache to use for data downloads. + """ super().__init__(environment, connector) self._stages_api = StagesApi(connector=connector) self._objects_api = ObjectsApi(connector=connector) self._metadata_api = MetadataApi(connector=connector) + self._cache = cache async def get_service_health(self, check_type: HealthCheckType = HealthCheckType.FULL) -> ServiceHealth: """Get the health of the geoscience object service. @@ -128,79 +64,6 @@ async def get_service_health(self, check_type: HealthCheckType = HealthCheckType """ return await get_service_health(self._connector, "geoscience-object", check_type=check_type) - def _metadata_from_listed_object(self, model: ListedObject) -> ObjectMetadata: - """Create an ObjectMetadata instance from a generated ListedObject model. - - :param model: The model to create the ObjectMetadata instance from. - - :return: An ObjectMetadata instance. - """ - created_by = None if model.created_by is None else ServiceUser.from_model(model.created_by) - modified_by = None if model.modified_by is None else ServiceUser.from_model(model.modified_by) - stage = None if model.stage is None else Stage.from_model(model.stage) - return ObjectMetadata( - environment=self._environment, - id=model.object_id, - name=model.name, - created_at=model.created_at, - created_by=created_by, - modified_at=model.modified_at, - modified_by=modified_by, - parent=model.path.rstrip("/"), - schema_id=ObjectSchema.from_id(model.schema_), - version_id=model.version_id, - stage=stage, - ) - - def _metadata_from_org_listed_object(self, model: OrgListedObject) -> OrgObjectMetadata: - """Create an OrgObjectMetadata instance from a generated OrgListedObject model. - - :param model: The model to create the OrgObjectMetadata instance from. - - :return: An OrgObjectMetadata instance. - """ - created_by = None if model.created_by is None else ServiceUser.from_model(model.created_by) - modified_by = None if model.modified_by is None else ServiceUser.from_model(model.modified_by) - stage = None if model.stage is None else Stage.from_model(model.stage) - return OrgObjectMetadata( - environment=self._environment, - workspace_id=model.workspace_id, - workspace_name=model.workspace_name, - id=model.object_id, - name=model.name, - created_at=model.created_at, - created_by=created_by, - modified_at=model.modified_at, - modified_by=modified_by, - schema_id=ObjectSchema.from_id(model.schema_), - stage=stage, - ) - - def _metadata_from_endpoint_model(self, model: GetObjectResponse | PostObjectResponse) -> ObjectMetadata: - """Create an ObjectMetadata instance from a generated GetObjectResponse or PostObjectResponse model. - - :param model: The model to create the ObjectMetadata instance from. - - :return: An ObjectMetadata instance. - """ - object_path = PurePosixPath(model.object_path) - created_by = None if model.created_by is None else ServiceUser.from_model(model.created_by) - modified_by = None if model.modified_by is None else ServiceUser.from_model(model.modified_by) - stage = None if model.stage is None else Stage.from_model(model.stage) - return ObjectMetadata( - environment=self._environment, - id=model.object_id, - name=object_path.name, - created_at=model.created_at, - created_by=created_by, - modified_at=model.modified_at, - modified_by=modified_by, - parent=str(object_path.parent), - schema_id=ObjectSchema.from_id(model.object.schema_), - version_id=model.version_id, - stage=stage, - ) - async def list_objects( self, offset: int = 0, @@ -238,12 +101,7 @@ async def list_objects( request_timeout=request_timeout, deleted=deleted, ) - return Page( - offset=offset, - limit=limit, - total=response.total, - items=[self._metadata_from_listed_object(model) for model in response.objects], - ) + return parse.page_of_metadata(response, self._environment) async def list_all_objects( self, @@ -320,17 +178,7 @@ async def list_objects_for_instance( permitted_workspaces_only=True, deleted=deleted, ) - return Page( - offset=offset, - limit=limit, - total=response.total, - items=[self._metadata_from_org_listed_object(model) for model in response.objects], - ) - - @staticmethod - def _get_object_versions(response: GetObjectResponse) -> list[ObjectVersion]: - object_versions = [_version_from_listed_version(model) for model in response.versions] - return sorted(object_versions, key=lambda v: v.created_at, reverse=True) + return parse.page_of_metadata(response, self._environment) async def list_versions_by_path( self, path: str, request_timeout: int | float | tuple[int | float, int | float] | None = None @@ -350,7 +198,7 @@ async def list_versions_by_path( include_versions=True, request_timeout=request_timeout, ) - return self._get_object_versions(response) + return parse.versions(response) async def list_versions_by_id( self, object_id: UUID, request_timeout: int | float | tuple[int | float, int | float] | None = None @@ -370,7 +218,7 @@ async def list_versions_by_id( include_versions=True, request_timeout=request_timeout, ) - return self._get_object_versions(response) + return parse.versions(response) async def prepare_data_upload(self, data_identifiers: Sequence[str | UUID]) -> AsyncIterator[ObjectDataUpload]: """Prepare to upload multiple data files to the geoscience object service. @@ -416,20 +264,23 @@ async def prepare_data_download( for ctx in downloaded_object.prepare_data_download(data_identifiers): yield ctx - def get_data_client(self, cache: ICache) -> ObjectDataClient: - """Get a data client for the geoscience object service. + if _DATA_CLIENT_AVAILABLE: + # Optional data client functionality, enabled if the data client dependencies are installed. - The data client provides a high-level interface for uploading and downloading data that is referenced in - geoscience objects, and caching the data locally. It depends on the optional dependency `pyarrow`, which is - not installed by default. This dependency can be installed with `pip install evo-objects[utils]`. + def get_data_client(self, cache: ICache) -> ObjectDataClient: + """Get a data client for the geoscience object service. - :param cache: The cache to use for data downloads. + The data client provides a high-level interface for uploading and downloading data that is referenced in + geoscience objects, and caching the data locally. It depends on the optional dependency `pyarrow`, which is + not installed by default. This dependency can be installed with `pip install evo-objects[utils]`. - :return: An ObjectDataClient instance. + :param cache: The cache to use for data downloads. - :raises RuntimeError: If the `pyarrow` package is not installed. - """ - return ObjectDataClient(environment=self._environment, connector=self._connector, cache=cache) + :return: An ObjectDataClient instance. + + :raises RuntimeError: If the `pyarrow` package is not installed. + """ + return ObjectDataClient(environment=self._environment, connector=self._connector, cache=cache) async def create_geoscience_object( self, path: str, object_dict: dict, request_timeout: int | float | tuple[int | float, int | float] | None = None @@ -461,7 +312,7 @@ async def create_geoscience_object( request_timeout=request_timeout, ) object_dict["uuid"] = result.object_id - return self._metadata_from_endpoint_model(result) + return parse.object_metadata(result, self._environment) async def move_geoscience_object( self, path: str, object_dict: dict, request_timeout: int | float | tuple[int | float, int | float] | None = None @@ -488,7 +339,7 @@ async def move_geoscience_object( geoscience_object=object_for_upload, request_timeout=request_timeout, ) - return self._metadata_from_endpoint_model(result) + return parse.object_metadata(result, self._environment) async def update_geoscience_object( self, object_dict: dict, request_timeout: int | float | tuple[int | float, int | float] | None = None @@ -514,18 +365,7 @@ async def update_geoscience_object( update_geoscience_object=object_for_upload, request_timeout=request_timeout, ) - return self._metadata_from_endpoint_model(result) - - def _downloaded_object_from_response(self, response: GetObjectResponse) -> DownloadedObject: - """Parse object metadata and a geoscience object model instance from a get object response - - :param response: The response from one of the get object endpoints. - - :return: A tuple containing the object metadata and a data model of the requested geoscience object. - """ - metadata = self._metadata_from_endpoint_model(response) - urls_by_name = {getattr(link, "name", link.id): link.download_url for link in response.links.data} - return DownloadedObject(response.object, metadata, urls_by_name, self._connector) + return parse.object_metadata(result, self._environment) async def download_object_by_path( self, @@ -543,15 +383,13 @@ async def download_object_by_path( :return: A tuple containing the object metadata and a data model of the requested geoscience object. """ - response = await self._objects_api.get_object( - org_id=str(self._environment.org_id), - workspace_id=str(self._environment.workspace_id), - objects_path=path, - version=version, - additional_headers={"Accept-Encoding": "gzip"}, + reference = ObjectReference.new(environment=self._environment, object_path=path, version_id=version) + return await DownloadedObject.from_reference( + connector=self._connector, + reference=reference, + cache=self._cache, request_timeout=request_timeout, ) - return self._downloaded_object_from_response(response) async def download_object_by_id( self, @@ -569,15 +407,13 @@ async def download_object_by_id( :return: A tuple containing the object metadata and a data model of the requested geoscience object. """ - response = await self._objects_api.get_object_by_id( - org_id=str(self._environment.org_id), - workspace_id=str(self._environment.workspace_id), - object_id=str(object_id), - version=version, - additional_headers={"Accept-Encoding": "gzip"}, + reference = ObjectReference.new(environment=self._environment, object_id=object_id, version_id=version) + return await DownloadedObject.from_reference( + connector=self._connector, + reference=reference, + cache=self._cache, request_timeout=request_timeout, ) - return self._downloaded_object_from_response(response) async def get_latest_object_versions( self, @@ -668,14 +504,14 @@ async def restore_geoscience_object( # If the restore happened with a rename, the response will be the metadata of the restored object if isinstance(result, EmptyResponse): return None - return self._metadata_from_endpoint_model(result) + return parse.object_metadata(result, self._environment) async def list_stages(self) -> list[Stage]: """List all available stages in the organisation. :return: A list of all available stages.""" response = await self._stages_api.list_stages(org_id=str(self._environment.org_id)) - return [Stage.from_model(model) for model in response.stages] + return [parse.stage(model) for model in response.stages] async def set_stage(self, object_id: UUID, version_id: int, stage_id: UUID) -> None: """Set the stage of a specific version of a geoscience object. diff --git a/packages/evo-objects/src/evo/objects/client/object_client.py b/packages/evo-objects/src/evo/objects/client/object_client.py new file mode 100644 index 00000000..bc72bcda --- /dev/null +++ b/packages/evo-objects/src/evo/objects/client/object_client.py @@ -0,0 +1,247 @@ +# Copyright © 2025 Bentley Systems, Incorporated +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import contextlib +from collections.abc import AsyncGenerator, Iterator, Sequence +from typing import Any +from uuid import UUID + +from pydantic import ConfigDict, TypeAdapter + +from evo import jmespath, logging +from evo.common import APIConnector, ICache, IFeedback +from evo.common.io.exceptions import DataNotFoundError +from evo.common.utils import NoFeedback + +from ..data import ObjectMetadata, ObjectReference, ObjectSchema +from ..endpoints import ObjectsApi, models +from ..io import ObjectDataDownload +from . import parse + +try: + import pyarrow as pa + + from ..parquet import ParquetDownloader, ParquetLoader, TableInfo +except ImportError: + _LOADER_AVAILABLE = False +else: + _LOADER_AVAILABLE = True + + _TABLE_INFO_VALIDATOR: TypeAdapter[TableInfo] = TypeAdapter(TableInfo, config=ConfigDict(extra="ignore")) + +try: + import pandas as pd +except ImportError: + _PD_AVAILABLE = False +else: + _PD_AVAILABLE = True + +try: + import numpy as np +except ImportError: + _NP_AVAILABLE = False +else: + _NP_AVAILABLE = True + +__all__ = ["DownloadedObject"] + +logger = logging.getLogger("object.client") + + +class DownloadedObject: + """A downloaded geoscience object.""" + + def __init__( + self, + object_: models.GeoscienceObject, + metadata: ObjectMetadata, + urls_by_name: dict[str, str], + connector: APIConnector, + cache: ICache | None = None, + ) -> None: + """ + :param object_: The raw geoscience object model. + :param metadata: The parsed metadata for the object. + :param urls_by_name: A mapping of data names to their initial download URLs. + :param connector: The API connector to use for downloading data. + :param cache: An optional cache to use for data downloads. + """ + self._object = object_ + self._metadata = metadata + self._urls_by_name = urls_by_name + self._connector = connector + self._cache = cache + + @staticmethod + async def from_reference( + connector: APIConnector, + reference: ObjectReference | str, + cache: ICache | None = None, + request_timeout: int | float | tuple[int | float, int | float] | None = None, + ) -> DownloadedObject: + """Download a geoscience object from the service, given an object reference. + + :param connector: The API connector to use for downloading data. + :param reference: The reference to the object to download, or a URL as a string that can be parsed into + a reference. + :param cache: An optional cache to use for data downloads. + :param request_timeout: An optional timeout to use for API requests. See evo.common.APIConnector for details. + + :raises ValueError: If the reference is invalid, or if the connector base URL does not match the reference hub URL. + """ + ref = ObjectReference(reference) # Parse the reference if it's a string + + if connector.base_url != ref.hub_url: + raise ValueError( + f"The connector base URL '{connector.base_url}' does not match the reference hub URL '{ref.hub_url}'" + ) + + api = ObjectsApi(connector) + + request_kwargs = dict( + org_id=str(ref.org_id), + workspace_id=str(ref.workspace_id), + version=ref.version_id, + additional_headers={"Accept-Encoding": "gzip"}, + request_timeout=request_timeout, + ) + + if ref.object_id is not None and ref.object_path is not None: + raise ValueError("Only one of object_id or object_path should be provided") + + if ref.object_id is not None: + response = await api.get_object_by_id(object_id=ref.object_id, **request_kwargs) + elif ref.object_path is not None: + response = await api.get_object(objects_path=ref.object_path, **request_kwargs) + else: + raise ValueError("Either object_id or object_path must be provided") + + metadata = parse.object_metadata(response, ref.environment) + urls_by_name = {getattr(link, "name", link.id): link.download_url for link in response.links.data} + return DownloadedObject( + object_=response.object, + metadata=metadata, + urls_by_name=urls_by_name, + connector=connector, + cache=cache, + ) + + @property + def schema(self) -> ObjectSchema: + """The schema of the object.""" + return self._metadata.schema_id + + @property + def metadata(self) -> ObjectMetadata: + """The metadata of the object.""" + return self._metadata + + def as_dict(self) -> dict: + """Get this object as a dictionary.""" + return self._object.model_dump(mode="python", by_alias=True) + + def search(self, expression: str) -> Any: + """Search the object metadata using a JMESPath expression. + + :param expression: The JMESPath expression to use for the search. + + :return: The result of the search. + """ + return jmespath.search(expression, self.as_dict()) + + def prepare_data_download(self, data_identifiers: Sequence[str | UUID]) -> Iterator[ObjectDataDownload]: + """Prepare to download multiple data files from the geoscience object service, for this object. + + Any data IDs that are not associated with the requested object will raise a DataNotFoundError. + + :param data_identifiers: A list of sha256 digests or UUIDs for the data to be downloaded. + + :return: An iterator of data download contexts that can be used to download the data. + + :raises DataNotFoundError: If any requested data ID is not associated with this object. + """ + try: + filtered_urls_by_name = {str(name): self._urls_by_name[str(name)] for name in data_identifiers} + except KeyError as exc: + raise DataNotFoundError(f"Unable to find the requested data: {exc.args[0]}") from exc + for ctx in ObjectDataDownload._create_multiple( + connector=self._connector, metadata=self._metadata, urls_by_name=filtered_urls_by_name + ): + yield ctx + + if _LOADER_AVAILABLE: + # Optional support for loading Parquet data using PyArrow. + + @contextlib.asynccontextmanager + async def _with_parquet_loader( + self, table_info: TableInfo | str, fb: IFeedback + ) -> AsyncGenerator[ParquetLoader, None]: + """Download parquet data and get a ParquetLoader for the data referenced by the given + table info or data reference string. + + :param table_info: The table info dict, JMESPath to table info, or data reference string. + :param fb: An optional feedback instance to report download progress to. + + :returns: A ParquetLoader that can be used to read the referenced data. + """ + if isinstance(table_info, str): + if isinstance(resolved := self.search(table_info), jmespath.JMESPathObjectProxy): + table_info = _TABLE_INFO_VALIDATOR.validate_python(resolved.raw) + else: + raise ValueError(f"Expected table info, got {type(resolved)}") + else: + table_info = _TABLE_INFO_VALIDATOR.validate_python(table_info) + + (download,) = self.prepare_data_download([table_info["data"]]) + async with ParquetDownloader(download, self._connector.transport, self._cache).with_feedback(fb) as loader: + loader.validate_with_table_info(table_info) + yield loader + + async def download_table(self, table_info: TableInfo | str, fb: IFeedback = NoFeedback) -> pa.Table: + """Download the data referenced by the given table info or data reference string as a PyArrow Table. + + :param table_info: The table info dict, JMESPath to table info, or data reference string. + :param fb: An optional feedback instance to report download progress to. + + :returns: A PyArrow Table containing the downloaded data. + """ + async with self._with_parquet_loader(table_info, fb) as loader: + return loader.load_as_table() + + if _PD_AVAILABLE: + # Optional support for loading data as Pandas DataFrames. Requires parquet support via PyArrow as well. + + async def download_dataframe(self, table_info: TableInfo | str, fb: IFeedback = NoFeedback) -> pd.DataFrame: + """Download the data referenced by the given table info or data reference string as a Pandas DataFrame. + + :param table_info: The table info dict, JMESPath to table info, or data reference string. + :param fb: An optional feedback instance to report download progress to. + + :returns: A Pandas DataFrame containing the downloaded data. + """ + async with self._with_parquet_loader(table_info, fb) as loader: + return loader.load_as_dataframe() + + if _NP_AVAILABLE: + # Optional support for loading data as NumPy arrays. Requires parquet support via PyArrow as well. + + async def download_array(self, table_info: TableInfo | str, fb: IFeedback = NoFeedback) -> np.ndarray: + """Download the data referenced by the given table info or data reference string as a NumPy array. + + :param table_info: The table info dict, JMESPath to table info, or data reference string. + :param fb: An optional feedback instance to report download progress to. + + :returns: A NumPy array containing the downloaded data. + """ + async with self._with_parquet_loader(table_info, fb) as loader: + return loader.load_as_array() diff --git a/packages/evo-objects/src/evo/objects/client/parse.py b/packages/evo-objects/src/evo/objects/client/parse.py new file mode 100644 index 00000000..5cab5884 --- /dev/null +++ b/packages/evo-objects/src/evo/objects/client/parse.py @@ -0,0 +1,199 @@ +# Copyright © 2025 Bentley Systems, Incorporated +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +from pathlib import PurePosixPath +from typing import overload + +from evo.common import Environment, Page, ServiceUser + +from ..data import ObjectMetadata, ObjectSchema, ObjectVersion, OrgObjectMetadata, Stage +from ..endpoints import models + +__all__ = [ + "object_metadata", + "org_object_metadata", + "page_of_metadata", + "schema", + "stage", + "stage_or_none", + "user_or_none", + "version", + "versions", +] + + +def user_or_none(model: models.User | None) -> ServiceUser | None: + """Parse a ServiceUser or None value from the generated model. + + :param model: The model returned by the generated code, or None. + + :return: The parsed ServiceUser, or None if the input model is None. + """ + return None if model is None else ServiceUser.from_model(model) + + +def stage(model: models.StageResponse) -> Stage: + """Parse a Stage from the generated model. + + :param model: The model returned by the generated code. + + :return: A Stage instance. + """ + return Stage.from_model(model) + + +def stage_or_none(model: models.StageResponse | None) -> Stage | None: + """Parse a Stage or None value from the generated model. + + :param model: The model returned by the generated code, or None. + + :return: The parsed Stage, or None if the input model is None. + """ + return None if model is None else stage(model) + + +def version(model: models.GeoscienceObjectVersion) -> ObjectVersion: + """Parse an ObjectVersion from the generated model. + + :param model: The model returned by the generated code. + + :return: An ObjectVersion instance. + """ + return ObjectVersion( + version_id=model.version_id, + created_at=model.created_at, + created_by=user_or_none(model.created_by), + stage=stage_or_none(model.stage), + ) + + +def versions(model: models.GetObjectResponse) -> list[ObjectVersion]: + """Parse a list of ObjectVersion from the generated model. + + :param model: The model returned by the generated code. + + :return: A list of ObjectVersion instances, sorted by created_at in descending order. + """ + object_versions = [version(model) for model in model.versions] + return sorted(object_versions, key=lambda v: v.created_at, reverse=True) + + +def schema(schema_id: str) -> ObjectSchema: + """Parse an ObjectSchema from the schema ID. + + :param schema_id: The schema ID string. + + :return: An ObjectSchema instance. + """ + return ObjectSchema.from_id(schema_id) + + +def object_metadata( + model: models.ListedObject | models.GetObjectResponse | models.PostObjectResponse, environment: Environment +) -> ObjectMetadata: + """Parse an ObjectMetadata from the generated model. + + :param model: The model returned by the generated code. + :param environment: The Evo environment associated with the object. + + :return: An ObjectMetadata instance. + """ + # There appears to be a schema defect where object_id may possibly be None, even though it shouldn't be. + assert model.object_id is not None + + # Parse name, parent, and schema_id from the appropriate fields depending on the model type. + if isinstance(model, models.ListedObject): + name = model.name + parent = model.path.rstrip("/") + schema_id = model.schema_ + elif model.object_path is not None: + path = PurePosixPath(model.object_path) + name = path.name + parent = str(path.parent) + schema_id = model.object.schema_ + else: + # There appears to be _another_ schema defect where object_path may be None in + # GetObjectResponse or PostObjectResponse, even though this never happens in practice. + raise ValueError("Model must be a ListedObject or have an object_path") + + return ObjectMetadata( + environment=environment, + id=model.object_id, + name=name, + created_at=model.created_at, + created_by=user_or_none(model.created_by), + modified_at=model.modified_at, + modified_by=user_or_none(model.modified_by), + parent=parent, + schema_id=schema(schema_id), + version_id=model.version_id, + stage=stage_or_none(model.stage), + ) + + +def org_object_metadata(model: models.OrgListedObject, environment: Environment) -> OrgObjectMetadata: + """Parse an OrgObjectMetadata from the generated model. + + :param model: The model returned by the generated code. + :param environment: The Evo environment associated with the object. + + :return: An ObjectMetadata instance. + """ + return OrgObjectMetadata( + environment=dataclasses.replace(environment, workspace_id=model.workspace_id), + workspace_id=model.workspace_id, + workspace_name=model.workspace_name, + id=model.object_id, + name=model.name, + created_at=model.created_at, + created_by=user_or_none(model.created_by), + modified_at=model.modified_at, + modified_by=user_or_none(model.modified_by), + schema_id=schema(model.schema_), + stage=stage_or_none(model.stage), + ) + + +@overload +def page_of_metadata(model: models.ListObjectsResponse, environment: Environment) -> Page[ObjectMetadata]: ... + + +@overload +def page_of_metadata(model: models.ListOrgObjectsResponse, environment: Environment) -> Page[OrgObjectMetadata]: ... + + +def page_of_metadata( + model: models.ListObjectsResponse | models.ListOrgObjectsResponse, environment: Environment +) -> Page[ObjectMetadata] | Page[OrgObjectMetadata]: + """Parse a Page of ObjectMetadata or OrgObjectMetadata from the generated model. + + :param model: The model returned by the generated code. + :param environment: The Evo environment associated with the objects. + + :return: A Page of ObjectMetadata or OrgObjectMetadata instances. + + :raises TypeError: If the model type is unsupported. + """ + match model: + case models.ListObjectsResponse(): + parse_metadata = object_metadata + case models.ListOrgObjectsResponse(): + parse_metadata = org_object_metadata + case _: + raise TypeError(f"Unsupported model type: {type(model)}") + + return Page( + offset=model.offset, + limit=model.limit, + total=model.total, + items=[parse_metadata(item, environment) for item in model.objects], + ) diff --git a/packages/evo-objects/src/evo/objects/data.py b/packages/evo-objects/src/evo/objects/data.py index 2dde2f4e..3ab73a5b 100644 --- a/packages/evo-objects/src/evo/objects/data.py +++ b/packages/evo-objects/src/evo/objects/data.py @@ -16,9 +16,10 @@ from dataclasses import dataclass from datetime import datetime from typing import Protocol +from urllib.parse import parse_qs, urlparse from uuid import UUID -from evo.common import ResourceMetadata +from evo.common import Environment, ResourceMetadata from evo.workspaces import ServiceUser from .exceptions import SchemaIDFormatError @@ -26,6 +27,7 @@ __all__ = [ "ObjectMetadata", "ObjectOrderByEnum", + "ObjectReference", "ObjectSchema", "ObjectVersion", "SchemaVersion", @@ -43,6 +45,116 @@ class ObjectOrderByEnum(str, enum.Enum): object_name = "object_name" +class ObjectReference(str): + """A structured URL reference to a geoscience object, optionally including a version ID. + + Geoscience Object URL references are the fully qualified HTTPS URLs used to access objects in the + Geoscience Object API. The URL may follow the path or UUID format, and may optionally include a version ID. + + In most cases, UUID-based references are preferred, as they are immutable and unambiguous. However, path-based references + can be useful in scenarios where the object ID is not known, such as when creating new objects or when working with + objects in a more human-readable way. + """ + + _RE_PATH = re.compile( + r""" + ^/geoscience-object + /orgs/(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}) + /workspaces/(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}) + /objects + (?: + /path/(?P[^?]+) | /(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}) + )$ + """, + re.IGNORECASE | re.VERBOSE, + ) + + hub_url: str + """The base URL of the Evo Hub.""" + + org_id: UUID + """The ID of the Evo Organization the object belongs to.""" + + workspace_id: UUID + """The ID of the Evo Workspace the object belongs to.""" + + object_id: UUID | None + """The UUID of the object, if specified in the URL.""" + + object_path: str | None + """The path of the object, if specified in the URL.""" + + version_id: str | None + """The version ID of the object, if specified in the URL.""" + + def __new__(cls, value: str) -> ObjectReference: + inst = str.__new__(cls, value) + + parsed = urlparse(value) + if parsed.scheme != "https": + raise ValueError("Reference must be a valid HTTPS URL") + + inst.hub_url = f"{parsed.scheme}://{parsed.netloc}/" + + if match := cls._RE_PATH.fullmatch(parsed.path): + inst.org_id = UUID(match.group("org_id")) + inst.workspace_id = UUID(match.group("workspace_id")) + + if match.group("object_id"): + inst.object_id = UUID(match.group("object_id")) + inst.object_path = None + else: + inst.object_id = None + inst.object_path = match.group("object_path").lstrip("/") + else: + raise ValueError("Reference path is not valid") + + query_params = parse_qs(parsed.query) + inst.version_id = query_params.get("version", [None])[0] + return inst + + @property + def environment(self) -> Environment: + return Environment(hub_url=self.hub_url, org_id=self.org_id, workspace_id=self.workspace_id) + + @staticmethod + def new( + environment: Environment, + object_id: UUID | None = None, + object_path: str | None = None, + version_id: str | None = None, + ) -> ObjectReference: + """Create a new ObjectReference from its components. + + Either object_id or object_path must be provided, but not both. + + :param environment: The Evo environment the object belongs to. + :param object_id: The UUID of the object, if known. + :param object_path: The path of the object, if known. + :param version_id: The version ID of the object, if known. + + :returns: A new ObjectReference instance. + + :raises ValueError: If neither or both of object_id and object_path are provided. + """ + if object_id is None and object_path is None: + raise ValueError("Either object_id or object_path must be provided") + if object_id is not None and object_path is not None: + raise ValueError("Only one of object_id or object_path can be provided") + + if object_id is not None: + path = ( + f"geoscience-object/orgs/{environment.org_id}/workspaces/{environment.workspace_id}/objects/{object_id}" + ) + else: + path = f"geoscience-object/orgs/{environment.org_id}/workspaces/{environment.workspace_id}/objects/path/{object_path.lstrip('/')}" + + if version_id is not None: + path += f"?version={version_id}" + + return ObjectReference(f"{environment.hub_url.rstrip('/')}/{path}") + + @dataclass(frozen=True, kw_only=True) class ObjectMetadata(ResourceMetadata): """Metadata about a geoscience object.""" @@ -71,12 +183,10 @@ def path(self) -> str: return f"{self.parent}/{self.name}" @property - def url(self) -> str: + def url(self) -> ObjectReference: """The url of the object.""" - return "{hub_url}/geoscience-object/orgs/{org_id}/workspaces/{workspace_id}/objects/{object_id}?version={version_id}".format( - hub_url=self.environment.hub_url.rstrip("/"), - org_id=self.environment.org_id, - workspace_id=self.environment.workspace_id, + return ObjectReference.new( + environment=self.environment, object_id=self.id, version_id=self.version_id, ) @@ -107,10 +217,8 @@ class OrgObjectMetadata(ResourceMetadata): @property def url(self) -> str: """The url of the object.""" - return "{hub_url}/geoscience-object/orgs/{org_id}/workspaces/{workspace_id}/objects/{object_id}".format( - hub_url=self.environment.hub_url.rstrip("/"), - org_id=self.environment.org_id, - workspace_id=self.workspace_id, + return ObjectReference.new( + environment=self.environment, object_id=self.id, ) diff --git a/packages/evo-objects/src/evo/objects/parquet/__init__.py b/packages/evo-objects/src/evo/objects/parquet/__init__.py new file mode 100644 index 00000000..efb0ae70 --- /dev/null +++ b/packages/evo-objects/src/evo/objects/parquet/__init__.py @@ -0,0 +1,26 @@ +# Copyright © 2025 Bentley Systems, Incorporated +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import pyarrow # noqa: F401 +except ImportError: + raise ImportError("The 'pyarrow' package is required to use ParquetLoader") from None + +from .loader import ParquetDownloader, ParquetLoader +from .types import ArrayTableInfo, LookupTableInfo, TableInfo + +__all__ = [ + "ArrayTableInfo", + "LookupTableInfo", + "ParquetDownloader", + "ParquetLoader", + "TableInfo", +] diff --git a/packages/evo-objects/src/evo/objects/parquet/loader.py b/packages/evo-objects/src/evo/objects/parquet/loader.py new file mode 100644 index 00000000..e9a7d7b9 --- /dev/null +++ b/packages/evo-objects/src/evo/objects/parquet/loader.py @@ -0,0 +1,231 @@ +# Copyright © 2025 Bentley Systems, Incorporated +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import contextlib +from collections.abc import AsyncGenerator +from io import BytesIO +from logging import getLogger +from types import TracebackType +from typing import cast + +import pyarrow as pa +import pyarrow.parquet as pq +from pydantic import TypeAdapter + +from evo.common import ICache, IFeedback, ITransport +from evo.common.io import BytesDestination, ChunkedIOManager, Download, HTTPSource +from evo.common.utils import NoFeedback + +from ..exceptions import SchemaValidationError, TableFormatError +from ..utils import ArrowTableFormat, KnownTableFormat +from .types import TableInfo + +try: + import pandas as pd +except ImportError: + _PD_AVAILABLE = False +else: + _PD_AVAILABLE = True + +try: + import numpy as np +except ImportError: + _NP_AVAILABLE = False +else: + _NP_AVAILABLE = True + +__all__ = [ + "ParquetDownloader", + "ParquetLoader", +] + +logger = getLogger(__name__) + +_TABLE_INFO_ADAPTER: TypeAdapter[TableInfo] = TypeAdapter(TableInfo) + + +class ParquetLoader: + """A loader for Parquet data from a pyarrow.parquet.ParquetFile. + + This class adds standardised support for validating Geoscience Object table info + against the loaded Parquet schema, as well as convenience methods for loading + the data as a PyArrow Table, Pandas DataFrame, or NumPy array. + """ + + def __init__(self, pa_file: pa.NativeFile) -> None: + """ + :param pa_file: A PyArrow NativeFile containing the Parquet data. + """ + self._pa_file = pa_file + self._parquet_file: pq.ParquetFile | None = None + + def __enter__(self) -> ParquetLoader: + if self._parquet_file is not None: + raise RuntimeError("ParquetLoader is already in use") + self._parquet_file = pq.ParquetFile(self._pa_file.__enter__()) + return self + + async def __aenter__(self) -> ParquetLoader: + # Delegate to the synchronous context manager. + # This implementation is just to support async with + # syntax for combination with ParquetDownloader below. + return self.__enter__() + + def __exit__( + self, + exc_type: type[Exception] | None, + exc_val: Exception | None, + exc_tb: TracebackType | None, + ) -> None: + self._parquet_file = None + return self._pa_file.__exit__(exc_type, exc_val, exc_tb) + + async def __aexit__( + self, + exc_type: type[Exception] | None, + exc_val: Exception | None, + exc_tb: TracebackType | None, + ) -> None: + # Delegate to the synchronous context manager. + # This implementation is just to support async with + # syntax for combination with ParquetDownloader below. + return self.__exit__(exc_type, exc_val, exc_tb) + + def validate_with_table_info(self, table_info: TableInfo) -> None: + """Validate the provided TableInfo against the loaded Parquet schema. + + :param table_info: The TableInfo to validate against the loaded Parquet schema. + + :raises SchemaValidationError: If the loaded Parquet schema does not match the expected schema. + """ + if (pa_file := self._parquet_file) is None: + raise RuntimeError("ParquetLoader context is not active") + + logger.debug("Checking parquet data format") + + validated_table_info = _TABLE_INFO_ADAPTER.validate_python(table_info) + expected_format = KnownTableFormat.from_table_info(validated_table_info) + actual_format = ArrowTableFormat.from_schema(pa_file.schema_arrow) + try: + expected_format._check_format(actual_format) + except TableFormatError as e: + raise SchemaValidationError(str(e)) from None + + logger.debug("Checking parquet data length") + actual_length = pa_file.metadata.num_rows + if table_info["length"] != actual_length: + raise SchemaValidationError( + f"Row count ({actual_length}) does not match expectation ({table_info['length']})" + ) + + logger.debug("Parquet metadata checks succeeded") + + def load_as_table(self) -> pa.Table: + """Load the Parquet data as a PyArrow Table.""" + if self._parquet_file is None: + raise RuntimeError("ParquetLoader context is not active") + else: + return self._parquet_file.read() + + if _PD_AVAILABLE: + # Optional support for pandas dataframes + + def load_as_dataframe(self) -> pd.DataFrame: + """Load the Parquet data as a Pandas DataFrame.""" + table = self.load_as_table() + return table.to_pandas() + + if _NP_AVAILABLE: + # Optional support for numpy arrays + + def load_as_array(self) -> np.ndarray: + """Load the Parquet data as a NumPy array. + + The array will have a shape of (N,) for single-column data or (N, M) for multi-column data, + where N is the number of rows and M is the number of columns. The target data _must_ have a uniform dtype. + + :return: A NumPy array containing the data. + """ + table = self.load_as_table() + columns = cast(list[np.ndarray], [col.combine_chunks().to_numpy() for col in table.itercolumns()]) + if len(columns) == 1: + return columns[0] + else: + return np.column_stack(columns) + + +class ParquetDownloader: + """A downloader for Parquet data that provides a ParquetLoader for reading the data. + + This class supports downloading the data to a cache or to memory, and provides + a ParquetLoader for reading the downloaded data. + """ + + def __init__(self, download: Download, transport: ITransport, cache: ICache | None = None) -> None: + """ + :param download: The download information for the Parquet data. + :param transport: The transport to use for data downloads. + :param cache: An optional cache to use for data downloads. + """ + self._evo_download = download + self._transport = transport + self._cache = cache + + async def _download_to_cache(self, fb: IFeedback) -> pa.OSFile: + cached = await self._evo_download.download_to_cache(self._cache, self._transport, fb=fb) + return pa.OSFile(str(cached), "r") + + async def _download_to_memory(self, fb: IFeedback) -> pa.BufferReader: + # Initialize a buffer to store the downloaded data in memory + memory = BytesIO() + + # Use ChunkedIOManager to download the data into the memory buffer + manager = ChunkedIOManager() + async with HTTPSource(self._evo_download.get_download_url, self._transport) as source: + destination = BytesDestination(memory) + await manager.run(source, destination, fb=fb) + + # Reset the buffer's position to the beginning + memory.seek(0) + return pa.BufferReader(memory.getbuffer()) + + async def download(self, fb: IFeedback = NoFeedback) -> ParquetLoader: + """Download the Parquet data and return a ParquetLoader for reading it. + + :param fb: An optional feedback instance to report download progress to. + + :return: A ParquetLoader that can be used to read the downloaded data. + """ + if self._cache is not None: + file = await self._download_to_cache(fb) + else: + file = await self._download_to_memory(fb) + + return ParquetLoader(file) + + @contextlib.asynccontextmanager + async def __aenter__(self) -> AsyncGenerator[ParquetLoader, None]: + # Delegate to the download method to get a ParquetLoader. + async with await self.download() as loader: + yield loader + + @contextlib.asynccontextmanager + async def with_feedback(self, fb: IFeedback) -> AsyncGenerator[ParquetLoader, None]: + """Async context manager to download the Parquet data with feedback and provide a ParquetLoader for reading it. + + :param fb: A feedback instance to report download progress to. + + :yields: A ParquetLoader that can be used to read the downloaded data. + """ + async with await self.download(fb=fb) as loader: + yield loader diff --git a/packages/evo-objects/src/evo/objects/parquet/types.py b/packages/evo-objects/src/evo/objects/parquet/types.py new file mode 100644 index 00000000..abb1775f --- /dev/null +++ b/packages/evo-objects/src/evo/objects/parquet/types.py @@ -0,0 +1,42 @@ +# Copyright © 2025 Bentley Systems, Incorporated +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from typing import TypeAlias + +if sys.version_info >= (3, 12): + from typing import NotRequired, TypedDict +else: + from typing_extensions import NotRequired, TypedDict + +__all__ = [ + "ArrayTableInfo", + "LookupTableInfo", + "TableInfo", +] + + +class _BaseTableInfo(TypedDict): + data: str + length: int + + +class ArrayTableInfo(_BaseTableInfo): + data_type: str + width: NotRequired[int] + + +class LookupTableInfo(_BaseTableInfo): + keys_data_type: str + values_data_type: str + + +TableInfo: TypeAlias = ArrayTableInfo | LookupTableInfo diff --git a/packages/evo-objects/src/evo/objects/utils/__init__.py b/packages/evo-objects/src/evo/objects/utils/__init__.py index c250b0b0..ccd86e99 100644 --- a/packages/evo-objects/src/evo/objects/utils/__init__.py +++ b/packages/evo-objects/src/evo/objects/utils/__init__.py @@ -9,27 +9,33 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._types import DataFrame, Table -from .data import ObjectDataClient -__all__ = [ - "DataFrame", - "ObjectDataClient", - "Table", -] +try: + # Import the table type for backwards compatibility. This should be removed in a future release. + from pyarrow import Table # noqa: F401 +except ImportError: + raise ImportError("pyarrow is required to use the utils package in evo-objects") try: - import pyarrow # noqa: F401 + # Import the dataframe type for backwards compatibility. This should be removed in a future release. + from pandas import DataFrame # noqa: F401 except ImportError: - pass # Omit the following imports if pyarrow is not installed. -else: - from .table_formats import all_known_formats, get_known_format - from .tables import ArrowTableFormat, BaseTableFormat, KnownTableFormat + DataFrame = None # type: ignore - __all__ += [ - "ArrowTableFormat", - "BaseTableFormat", - "KnownTableFormat", - "all_known_formats", - "get_known_format", - ] +from .data import ObjectDataClient +from .table_formats import all_known_formats, get_known_format +from .tables import ArrowTableFormat, BaseTableFormat, KnownTableFormat + +# We _used_ to export Table and DataFrame from this package as custom protocols, but we are using the actual +# pyarrow.Table and pandas.DataFrame types now. We are importing these types here from pyarrow and pandas +# for backwards compatibility, but they are no longer explicitly exported as the exports should be +# removed in a future release. + +__all__ = [ + "ArrowTableFormat", + "BaseTableFormat", + "KnownTableFormat", + "ObjectDataClient", + "all_known_formats", + "get_known_format", +] diff --git a/packages/evo-objects/src/evo/objects/utils/_types.py b/packages/evo-objects/src/evo/objects/utils/_types.py deleted file mode 100644 index 93fd012a..00000000 --- a/packages/evo-objects/src/evo/objects/utils/_types.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2025 Bentley Systems, Incorporated -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -from typing import Protocol - -# `evo-objects` uses protocols for annotating some pyarrow types, because: -# - pyarrow is optional, but type annotations are not. -# - pyarrow has poor type checker support. -# -# These protocols should be treated as aliases for the corresponding pyarrow types. -# Any required interfaces from the corresponding pyarrow types should be added to these protocols as needed. - - -class DataType(Protocol): - """Pyarrow data type. - - https://arrow.apache.org/docs/python/generated/pyarrow.DataType.html - """ - - ... - - -class Schema(Protocol): - """Pyarrow schema. - - https://arrow.apache.org/docs/python/generated/pyarrow.Schema.html - """ - - @property - def names(self) -> list[str]: - """The schema's field names.""" - ... - - @property - def types(self) -> list[DataType]: - """The schema's field types.""" - ... - - -class Table(Protocol): - """Pyarrow table. - - https://arrow.apache.org/docs/python/generated/pyarrow.Table.html - """ - - @property - def schema() -> Schema: - """Schema of the table and its columns.""" - ... - - @property - def num_columns(self) -> int: - """Number of columns in this table.""" - ... - - @property - def num_rows(self) -> int: - """Number of rows in this table. - - Due to the definition of a table, all columns have the same number of rows. - """ - ... - - def to_pandas(self) -> DataFrame: - """Convert to a pandas-compatible NumPy array or DataFrame, as appropriate""" - - -class DataFrame(Protocol): - """Pandas DataFrame. - - https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html - """ diff --git a/packages/evo-objects/src/evo/objects/utils/data.py b/packages/evo-objects/src/evo/objects/utils/data.py index 25491441..cbb38844 100644 --- a/packages/evo-objects/src/evo/objects/utils/data.py +++ b/packages/evo-objects/src/evo/objects/utils/data.py @@ -21,11 +21,20 @@ from evo.common.utils import NoFeedback, PartialFeedback from ..io import _CACHE_SCOPE, ObjectDataUpload -from ._types import DataFrame, Table -__all__ = [ - "ObjectDataClient", -] +try: + import pyarrow as pa +except ImportError: + raise ImportError("ObjectDataClient requires the `pyarrow` package to be installed") + +try: + import pandas as pd +except ImportError: + _PD_AVAILABLE = False +else: + _PD_AVAILABLE = True + +__all__ = ["ObjectDataClient"] logger = logging.getLogger("object.data") @@ -51,13 +60,6 @@ def _iter_refs(target: Any, _key: str | None = None) -> Iterator[str]: yield str(value) -def _as_table(dataframe: DataFrame) -> Table: - """Wrapper around pyarrow.Table.from_pandas() with a local import.""" - import pyarrow - - return pyarrow.Table.from_pandas(dataframe) - - class ObjectDataClient: """An optional wrapper around data upload and download functionality for geoscience objects. @@ -72,11 +74,6 @@ def __init__(self, environment: Environment, connector: APIConnector, cache: ICa :param connector: The API connector to use for uploading and downloading data. :param cache: The cache to use for storing data locally. """ - try: - import pyarrow # noqa: F401 - except ImportError: - raise RuntimeError("Unable to create ObjectDataClient because the `pyarrow` package is not installed") - self._environment = environment self._connector = connector self._cache = cache @@ -90,34 +87,6 @@ def clear_cache(self) -> None: """Clear the cache used by this client.""" self._cache.clear_cache(environment=self._environment, scope=_CACHE_SCOPE) - def save_table(self, table: Table) -> dict: - """Save a pyarrow table to a file, returning the table info as a dictionary. - - :param table: The pyarrow table to save. - - :return: Information about the saved table. - - :raises TableFormatError: If the provided table does not match this format. - :raises StorageFileNotFoundError: If the destination does not exist or is not a directory. - """ - from .table_formats import get_known_format - - known_format = get_known_format(table) - table_info = known_format.save_table(table=table, destination=self.cache_location) - return table_info - - def save_dataframe(self, dataframe: DataFrame) -> dict: - """Save a pandas dataframe to a file, returning the table info as a dictionary. - - :param dataframe: The pandas dataframe to save. - - :return: Information about the saved table. - - :raises TableFormatError: If the provided table does not match this format. - :raises StorageFileNotFoundError: If the destination does not exist or is not a directory. - """ - return self.save_table(_as_table(dataframe)) - async def upload_referenced_data(self, object_model: dict, fb: IFeedback = NoFeedback) -> None: """Upload all data referenced by a geoscience object. @@ -155,7 +124,23 @@ async def upload_referenced_data(self, object_model: dict, fb: IFeedback = NoFee ) fb.progress(1) - async def upload_table(self, table: Table, fb: IFeedback = NoFeedback) -> dict: + def save_table(self, table: pa.Table) -> dict: + """Save a pyarrow table to a file, returning the table info as a dictionary. + + :param table: The pyarrow table to save. + + :return: Information about the saved table. + + :raises TableFormatError: If the provided table does not match this format. + :raises StorageFileNotFoundError: If the destination does not exist or is not a directory. + """ + from .table_formats import get_known_format + + known_format = get_known_format(table) + table_info = known_format.save_table(table=table, destination=self.cache_location) + return table_info + + async def upload_table(self, table: pa.Table, fb: IFeedback = NoFeedback) -> dict: """Upload pyarrow table to the geoscience object service, returning a GO model of the uploaded data. :param table: The table to be uploaded. @@ -174,22 +159,9 @@ async def upload_table(self, table: Table, fb: IFeedback = NoFeedback) -> dict: fb.progress(1) return table_info - async def upload_dataframe(self, dataframe: DataFrame, fb: IFeedback = NoFeedback) -> dict: - """Upload pandas dataframe to the geoscience object service, returning a GO model of the uploaded data. - - :param dataframe: The pandas dataframe to be uploaded. - :param fb: A feedback object for tracking upload progress. - - :return: A description of the uploaded data. - - :raises TableFormatError: If the table does not match a known format. - """ - table_info = await self.upload_table(_as_table(dataframe), fb=fb) - return table_info - async def download_table( self, object_id: UUID, version_id: str, table_info: dict, fb: IFeedback = NoFeedback - ) -> Table: + ) -> pa.Table: """Download pyarrow table from the geoscience object service. The parquet metadata will be used to make sure the file contents matches the expected format before the table @@ -207,44 +179,70 @@ async def download_table( :raises TableFormatError: If the data does not match the expected format. :raises SchemaValidationError: If the data has a different number of rows than expected. """ - from ..client import ObjectAPIClient # Import here to avoid circular import. - from .tables import KnownTableFormat # Import here to avoid import error if pyarrow is not installed. - - parquet_file = self.cache_location / str(table_info["data"]) - if not parquet_file.exists(): # Only download it if it isn't already there. - # Reusing the implementation for preparing a download from ObjectAPIClient to avoid code duplication. - client = ObjectAPIClient(self._environment, self._connector) - (download,) = [d async for d in client.prepare_data_download(object_id, version_id, [table_info["data"]])] - await download.download_to_cache(cache=self._cache, transport=self._connector.transport, fb=fb) - else: - fb.progress(1) - logger.debug(f"Data not downloaded because it already exists locally (label: {table_info['data']})") + # Import here to avoid circular import. + from ..client import ObjectAPIClient + from ..parquet import ParquetDownloader - # Load the table from the cache. - return KnownTableFormat.load_table(table_info, self.cache_location) + client = ObjectAPIClient(self._environment, self._connector) + (download,) = [d async for d in client.prepare_data_download(object_id, version_id, [table_info["data"]])] - async def download_dataframe( - self, object_id: UUID, version_id: str, table_info: dict, fb: IFeedback = NoFeedback - ) -> DataFrame: - """Download pandas dataframe data from the geoscience object service. + # Defer downloading the table to the new ParquetLoader class. + async with ParquetDownloader( + download=download, transport=self._connector.transport, cache=self._cache + ).with_feedback(fb) as loader: + loader.validate_with_table_info(table_info) + return loader.load_as_table() - The parquet metadata will be used to make sure the file contents matches the expected format before the table - is read into memory. + if _PD_AVAILABLE: + # Optional support for pandas dataframes. Depends on both pyarrow and pandas. - :param object_id: The object ID to download the data from. - :param version_id: The version ID to download the data from. - :param table_info: The table info that defines the expected format. The model's `data` will be downloaded from - the service. - :param fb: A feedback object for tracking download progress. + def save_dataframe(self, dataframe: pd.DataFrame) -> dict: + """Save a pandas dataframe to a file, returning the table info as a dictionary. - :return: A pandas dataframe loaded directly from the parquet file. + :param dataframe: The pandas dataframe to save. - :raises DataNotFoundError: If the data does not exist or is not associated with this object version. - :raises TableFormatError: If the data does not match the expected format. - :raises SchemaValidationError: If the data has a different number of rows than expected. - """ - table = await self.download_table(object_id, version_id, table_info, fb) - try: - return table.to_pandas() - except ModuleNotFoundError: - raise RuntimeError("Unable to download dataframe because the `pandas` package is not installed") + :return: Information about the saved table. + + :raises TableFormatError: If the provided table does not match this format. + :raises StorageFileNotFoundError: If the destination does not exist or is not a directory. + """ + return self.save_table(pa.Table.from_pandas(dataframe)) + + async def upload_dataframe(self, dataframe: pd.DataFrame, fb: IFeedback = NoFeedback) -> dict: + """Upload pandas dataframe to the geoscience object service, returning a GO model of the uploaded data. + + :param dataframe: The pandas dataframe to be uploaded. + :param fb: A feedback object for tracking upload progress. + + :return: A description of the uploaded data. + + :raises TableFormatError: If the table does not match a known format. + """ + table_info = await self.upload_table(pa.Table.from_pandas(dataframe), fb=fb) + return table_info + + async def download_dataframe( + self, object_id: UUID, version_id: str, table_info: dict, fb: IFeedback = NoFeedback + ) -> pd.DataFrame: + """Download pandas dataframe data from the geoscience object service. + + The parquet metadata will be used to make sure the file contents matches the expected format before the table + is read into memory. + + :param object_id: The object ID to download the data from. + :param version_id: The version ID to download the data from. + :param table_info: The table info that defines the expected format. The model's `data` will be downloaded from + the service. + :param fb: A feedback object for tracking download progress. + + :return: A pandas dataframe loaded directly from the parquet file. + + :raises DataNotFoundError: If the data does not exist or is not associated with this object version. + :raises TableFormatError: If the data does not match the expected format. + :raises SchemaValidationError: If the data has a different number of rows than expected. + """ + table = await self.download_table(object_id, version_id, table_info, fb) + try: + return table.to_pandas() + except ModuleNotFoundError: + raise RuntimeError("Unable to download dataframe because the `pandas` package is not installed") diff --git a/packages/evo-objects/src/evo/objects/utils/table_formats.py b/packages/evo-objects/src/evo/objects/utils/table_formats.py index 57eee26d..27265792 100644 --- a/packages/evo-objects/src/evo/objects/utils/table_formats.py +++ b/packages/evo-objects/src/evo/objects/utils/table_formats.py @@ -14,7 +14,6 @@ from evo import logging from ..exceptions import TableFormatError -from ._types import Table from .tables import ArrowTableFormat, KnownTableFormat __all__ = [ @@ -135,7 +134,7 @@ ] -def get_known_format(table: Table) -> KnownTableFormat: +def get_known_format(table: pa.Table) -> KnownTableFormat: """Get the known table format that best matches the provided table. If both a multidimensional format and a format with fixed dimensions would match, the format with fixed dimensions diff --git a/packages/evo-objects/src/evo/objects/utils/tables.py b/packages/evo-objects/src/evo/objects/utils/tables.py index 0397a658..baae199a 100644 --- a/packages/evo-objects/src/evo/objects/utils/tables.py +++ b/packages/evo-objects/src/evo/objects/utils/tables.py @@ -19,12 +19,12 @@ import pyarrow as pa import pyarrow.parquet as pq +from typing_extensions import deprecated import evo.logging from evo.common.exceptions import StorageFileNotFoundError from ..exceptions import SchemaValidationError, TableFormatError -from ._types import DataType, Schema, Table logger = evo.logging.getLogger("object.tables") @@ -39,7 +39,7 @@ class _ColumnFormat: - def __init__(self, format_spec: DataType | str): + def __init__(self, format_spec: pa.DataType | str): if isinstance(format_spec, str): self._type = self._get_data_type(format_spec) self._format_id = format_spec @@ -48,7 +48,7 @@ def __init__(self, format_spec: DataType | str): self._format_id = self._get_format_id(format_spec) @staticmethod - def _get_data_type(format_id: str) -> DataType: + def _get_data_type(format_id: str) -> pa.DataType: match format_id: case "float64": return pa.float64() @@ -72,7 +72,7 @@ def _get_data_type(format_id: str) -> DataType: raise TypeError(f"Unsupported column type '{unknown_format}'") @staticmethod - def _get_format_id(data_type: DataType) -> str: + def _get_format_id(data_type: pa.DataType) -> str: match str(data_type): case "double": return "float64" @@ -100,14 +100,14 @@ def id(self) -> str: return self._format_id @property - def type(self) -> DataType: + def type(self) -> pa.DataType: return self._type class BaseTableFormat: """Base type for comparing table formats""" - def __init__(self, name: str, columns: list[DataType | str | EllipsisType]) -> None: + def __init__(self, name: str, columns: list[pa.DataType | str | EllipsisType]) -> None: """ :param name: The display name for this format. :param columns: A list of column data types in this format. A single column data type followed by Ellipsis @@ -157,7 +157,7 @@ class ArrowTableFormat(BaseTableFormat): """Specialised table format type that can be generated from a pyarrow table""" @classmethod - def from_schema(cls, pa_schema: Schema) -> ArrowTableFormat: + def from_schema(cls, pa_schema: pa.Schema) -> ArrowTableFormat: """Generate an ArrowTableFormat instance that represents the structure of the provided table schema. :param pa_schema: Table schema to generate a format representation for. @@ -170,7 +170,7 @@ def from_schema(cls, pa_schema: Schema) -> ArrowTableFormat: class KnownTableFormat(BaseTableFormat): """A definition of a known table format that matches a Geoscience Object Schema model type""" - def __init__(self, name: str, columns: list[DataType | EllipsisType], field_names: list[str] | None) -> None: + def __init__(self, name: str, columns: list[pa.DataType | EllipsisType], field_names: list[str] | None) -> None: """ :param name: The display name for this format. :param columns: A list of column data types in this format. A single column data type followed by Ellipsis @@ -231,7 +231,7 @@ def _get_file_digest(file_path: Path) -> str: return sha256_digest.hexdigest() @classmethod - def _save_table_as_parquet(cls, table: Table, destination: Path) -> str: + def _save_table_as_parquet(cls, table: pa.Table, destination: Path) -> str: """Save a table in parquet format. :param table: The table to save to parquet file. @@ -270,7 +270,7 @@ def _save_table_as_parquet(cls, table: Table, destination: Path) -> str: return data_ref - def save_table(self, table: Table, destination: Path) -> dict: + def save_table(self, table: pa.Table, destination: Path) -> dict: """Save a pyarrow table in parquet format and return a GO model of the table metadata. :param table: The table to save in parquet format. @@ -346,7 +346,8 @@ def from_table_info(cls, table_info: dict) -> KnownTableFormat: return KnownTableFormat(name=type_name, columns=columns, field_names=table_info.get("field_names")) @classmethod - def load_table(cls, table_info: dict, source: Path) -> Table: + @deprecated("KnownTableFormat.load_table is deprecated, use evo.objects.parquet.ParquetLoader instead") + def load_table(cls, table_info: dict, source: Path) -> pa.Table: """Load parquet data as a pyarrow.Table and verify the format against the provided table info. The parquet metadata will be used to make sure the file contents matches the expected format before the table diff --git a/packages/evo-objects/tests/data/get_object.json b/packages/evo-objects/tests/data/get_object.json index d85bc947..a9529db9 100644 --- a/packages/evo-objects/tests/data/get_object.json +++ b/packages/evo-objects/tests/data/get_object.json @@ -41,7 +41,7 @@ }, "etag": "", "links": { - "download": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json", + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json", "data": [ { "id": "00000000-0000-0000-0000-000000000000", diff --git a/packages/evo-objects/tests/data/get_object_detailed.json b/packages/evo-objects/tests/data/get_object_detailed.json new file mode 100644 index 00000000..7bdcf214 --- /dev/null +++ b/packages/evo-objects/tests/data/get_object_detailed.json @@ -0,0 +1,108 @@ +{ + "object_path": "A/m.json", + "object_id": "00000000-0000-0000-0000-000000000002", + "object": { + "schema": "/objects/pointset/1.0.1/pointset.schema.json", + "uuid": "00000000-0000-0000-0000-000000000002", + "name": "Sample pointset", + "description": "A sample pointset object", + "bounding_box": { + "min_x": 0.0, + "max_x": 1.0, + "min_y": 2.0, + "max_y": 3.0, + "min_z": 4.0, + "max_z": 5.0 + }, + "coordinate_reference_system": { + "epsg_code": 2048 + }, + "locations": { + "coordinates": { + "data": "0000000000000000000000000000000000000000000000000000000000000000", + "length": 123, + "width": 3, + "data_type": "float64" + }, + "attributes": [ + { + "table": { + "data": "0000000000000000000000000000000000000000000000000000000000000001", + "length": 12, + "keys_data_type": "int32", + "values_data_type": "string" + }, + "values": { + "data": "0000000000000000000000000000000000000000000000000000000000000002", + "length": 123, + "width": 1, + "data_type": "int32" + }, + "name": "Stn", + "nan_description": { + "values": [ + 0 + ] + }, + "attribute_type": "category" + }, + { + "name": "InvRes", + "nan_description": { + "values": [] + }, + "values": { + "data": "0000000000000000000000000000000000000000000000000000000000000003", + "length": 123, + "width": 1, + "data_type": "float64" + }, + "attribute_type": "scalar" + } + ] + } + }, + "version_id": "2023-08-03T05:47:18.3402289Z", + "created_at": "2023-08-03T05:47:18Z", + "created_by": { + "id": "00000000-0000-0000-0000-0000000003e8", + "name": "Test User", + "email": "t.user@example.com" + }, + "modified_at": "2023-08-04T05:47:18Z", + "modified_by": { + "id": "00000000-0000-0000-0000-0000000003e8", + "name": "Test User", + "email": "t.user@example.com" + }, + "etag": "", + "links": { + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json", + "data": [ + { + "id": "00000000-0000-0000-0000-000000000000", + "name": "0000000000000000000000000000000000000000000000000000000000000000", + "download_url": "https://storage.unittest.localhost/wheres/my/data/0000000000000000000000000000000000000000000000000000000000000000" + }, + { + "id": "00000000-0000-0000-0000-000000000001", + "name": "0000000000000000000000000000000000000000000000000000000000000001", + "download_url": "https://storage.unittest.localhost/wheres/my/data/0000000000000000000000000000000000000000000000000000000000000001" + }, + { + "id": "00000000-0000-0000-0000-000000000002", + "name": "0000000000000000000000000000000000000000000000000000000000000002", + "download_url": "https://storage.unittest.localhost/wheres/my/data/0000000000000000000000000000000000000000000000000000000000000002" + }, + { + "id": "00000000-0000-0000-0000-000000000003", + "name": "0000000000000000000000000000000000000000000000000000000000000003", + "download_url": "https://storage.unittest.localhost/wheres/my/data/0000000000000000000000000000000000000000000000000000000000000003" + } + ] + }, + "stage": { + "name": "Approved", + "stage_id": "00000000-0000-0000-0000-000000000888" + } +} \ No newline at end of file diff --git a/packages/evo-objects/tests/data/get_object_validator_check.json b/packages/evo-objects/tests/data/get_object_validator_check.json index ba48bde4..5b96efeb 100644 --- a/packages/evo-objects/tests/data/get_object_validator_check.json +++ b/packages/evo-objects/tests/data/get_object_validator_check.json @@ -39,7 +39,7 @@ }, "etag": "", "links": { - "download": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json", + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json", "data": [ { "name": "995f2e6cab5ad17147d9c5fddf371189bef4b623f657dde91f175a0734ed17dc", diff --git a/packages/evo-objects/tests/data/list_objects_0.json b/packages/evo-objects/tests/data/list_objects_0.json index dafed5a9..baebf699 100644 --- a/packages/evo-objects/tests/data/list_objects_0.json +++ b/packages/evo-objects/tests/data/list_objects_0.json @@ -20,7 +20,7 @@ }, "etag": "", "links": { - "download": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json" + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json" }, "stage": { "name": "Approved", @@ -47,7 +47,7 @@ }, "etag": "", "links": { - "download": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/n.json" + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/n.json" } } ], @@ -55,7 +55,7 @@ "limit": 2, "total": 3, "links": { - "next": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects?path=pointset/&limit=2&continuation_token=a_continuation_token", + "next": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects?path=pointset/&limit=2&continuation_token=a_continuation_token", "prev": null } } diff --git a/packages/evo-objects/tests/data/list_objects_1.json b/packages/evo-objects/tests/data/list_objects_1.json index 0c9c8dbe..2d81291b 100644 --- a/packages/evo-objects/tests/data/list_objects_1.json +++ b/packages/evo-objects/tests/data/list_objects_1.json @@ -20,7 +20,7 @@ }, "etag": "", "links": { - "download": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/B/o.json" + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/B/o.json" } } ], @@ -29,6 +29,6 @@ "total": 3, "links": { "next": null, - "prev": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects?limit=2" + "prev": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects?limit=2" } } diff --git a/packages/evo-objects/tests/data/list_objects_for_instance_0.json b/packages/evo-objects/tests/data/list_objects_for_instance_0.json index 59b7d0d6..c8c09b78 100644 --- a/packages/evo-objects/tests/data/list_objects_for_instance_0.json +++ b/packages/evo-objects/tests/data/list_objects_for_instance_0.json @@ -49,7 +49,7 @@ "limit": 2, "total": 3, "links": { - "next": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/objects?limit=2&continuation_token=a_continuation_token", + "next": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/objects?limit=2&continuation_token=a_continuation_token", "prev": null } } diff --git a/packages/evo-objects/tests/data/list_objects_for_instance_1.json b/packages/evo-objects/tests/data/list_objects_for_instance_1.json index 21367fe7..3afdfdef 100644 --- a/packages/evo-objects/tests/data/list_objects_for_instance_1.json +++ b/packages/evo-objects/tests/data/list_objects_for_instance_1.json @@ -26,6 +26,6 @@ "total": 3, "links": { "next": null, - "prev": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/objects?limit=2" + "prev": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/objects?limit=2" } } diff --git a/packages/evo-objects/tests/data/list_versions.json b/packages/evo-objects/tests/data/list_versions.json index 9e634026..b11599ce 100644 --- a/packages/evo-objects/tests/data/list_versions.json +++ b/packages/evo-objects/tests/data/list_versions.json @@ -21,7 +21,7 @@ }, "etag": "", "links": { - "download": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json", + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json", "data": [] }, "versions": [{ @@ -35,11 +35,11 @@ "etag": "", "links": { "data": [{ - "download_url": "http://unittest.localhost/geoscience-data/00000000-0000-0000-0000-0000000004d2/00000000-0000-0000-0000-00000000162e/d565a304-a618-4020-a05b-a245f8177d2d?se=2025-03-26T00%3A26%3A11Z&sp=r&sv=2025-01-05&sr=b&skoid=e00b2565-5ef7-46ac-b8fd-b6aff6f93a9d&sktid=067e9632-ea4c-4ed9-9e6d-e294956e284b&skt=2025-03-25T23%3A32%3A48Z&ske=2025-03-26T00%3A32%3A48Z&sks=b&skv=2025-01-05&sig=6HCMQXBZJFvVSNYE%2FdP0E6D9cBTZ1OQfApTR97ccXew%3D", + "download_url": "https://unittest.localhost/geoscience-data/00000000-0000-0000-0000-0000000004d2/00000000-0000-0000-0000-00000000162e/d565a304-a618-4020-a05b-a245f8177d2d?se=2025-03-26T00%3A26%3A11Z&sp=r&sv=2025-01-05&sr=b&skoid=e00b2565-5ef7-46ac-b8fd-b6aff6f93a9d&sktid=067e9632-ea4c-4ed9-9e6d-e294956e284b&skt=2025-03-25T23%3A32%3A48Z&ske=2025-03-26T00%3A32%3A48Z&sks=b&skv=2025-01-05&sig=6HCMQXBZJFvVSNYE%2FdP0E6D9cBTZ1OQfApTR97ccXew%3D", "id": "d565a304-a618-4020-a05b-a245f8177d2d", "name": "35b8b8ba5479a34e905a1b9e212e1cb4a52ec484b969359f2c28f5a00311dbca" }], - "download": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json" + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json" }, "stage": { "name": "Approved", @@ -57,11 +57,11 @@ "etag": "", "links": { "data": [{ - "download_url": "http://unittest.localhost/geoscience-data/00000000-0000-0000-0000-0000000004d2/00000000-0000-0000-0000-00000000162e/d565a304-a618-4020-a05b-a245f8177d2d?se=2025-03-26T00%3A26%3A11Z&sp=r&sv=2025-01-05&sr=b&skoid=e00b2565-5ef7-46ac-b8fd-b6aff6f93a9d&sktid=067e9632-ea4c-4ed9-9e6d-e294956e284b&skt=2025-03-25T23%3A32%3A48Z&ske=2025-03-26T00%3A32%3A48Z&sks=b&skv=2025-01-05&sig=6HCMQXBZJFvVSNYE%2FdP0E6D9cBTZ1OQfApTR97ccXew%3D", + "download_url": "https://unittest.localhost/geoscience-data/00000000-0000-0000-0000-0000000004d2/00000000-0000-0000-0000-00000000162e/d565a304-a618-4020-a05b-a245f8177d2d?se=2025-03-26T00%3A26%3A11Z&sp=r&sv=2025-01-05&sr=b&skoid=e00b2565-5ef7-46ac-b8fd-b6aff6f93a9d&sktid=067e9632-ea4c-4ed9-9e6d-e294956e284b&skt=2025-03-25T23%3A32%3A48Z&ske=2025-03-26T00%3A32%3A48Z&sks=b&skv=2025-01-05&sig=6HCMQXBZJFvVSNYE%2FdP0E6D9cBTZ1OQfApTR97ccXew%3D", "id": "d565a304-a618-4020-a05b-a245f8177d2d", "name": "35b8b8ba5479a34e905a1b9e212e1cb4a52ec484b969359f2c28f5a00311dbca" }], - "download": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json" + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json" }, "stage": { "name": "Approved", @@ -79,11 +79,11 @@ "etag": "", "links": { "data": [{ - "download_url": "http://unittest.localhost/geoscience-data/00000000-0000-0000-0000-0000000004d2/00000000-0000-0000-0000-00000000162e/d565a304-a618-4020-a05b-a245f8177d2d?se=2025-03-26T00%3A26%3A11Z&sp=r&sv=2025-01-05&sr=b&skoid=e00b2565-5ef7-46ac-b8fd-b6aff6f93a9d&sktid=067e9632-ea4c-4ed9-9e6d-e294956e284b&skt=2025-03-25T23%3A32%3A48Z&ske=2025-03-26T00%3A32%3A48Z&sks=b&skv=2025-01-05&sig=6HCMQXBZJFvVSNYE%2FdP0E6D9cBTZ1OQfApTR97ccXew%3D", + "download_url": "https://unittest.localhost/geoscience-data/00000000-0000-0000-0000-0000000004d2/00000000-0000-0000-0000-00000000162e/d565a304-a618-4020-a05b-a245f8177d2d?se=2025-03-26T00%3A26%3A11Z&sp=r&sv=2025-01-05&sr=b&skoid=e00b2565-5ef7-46ac-b8fd-b6aff6f93a9d&sktid=067e9632-ea4c-4ed9-9e6d-e294956e284b&skt=2025-03-25T23%3A32%3A48Z&ske=2025-03-26T00%3A32%3A48Z&sks=b&skv=2025-01-05&sig=6HCMQXBZJFvVSNYE%2FdP0E6D9cBTZ1OQfApTR97ccXew%3D", "id": "d565a304-a618-4020-a05b-a245f8177d2d", "name": "35b8b8ba5479a34e905a1b9e212e1cb4a52ec484b969359f2c28f5a00311dbca" }], - "download": "http://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json" + "download": "https://unittest.localhost/path/geoscience-object/orgs/00000000-0000-0000-0000-0000000004d2/workspaces/00000000-0000-0000-0000-00000000162e/objects/path/A/m.json" } } ] diff --git a/packages/evo-objects/tests/helpers.py b/packages/evo-objects/tests/helpers.py index af7c43b4..7a91a5b0 100644 --- a/packages/evo-objects/tests/helpers.py +++ b/packages/evo-objects/tests/helpers.py @@ -9,7 +9,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import random import sys +from collections.abc import Iterator +from datetime import datetime, timezone +from io import BytesIO + +import numpy +import pyarrow as pa +import pyarrow.parquet as pq + +from evo.objects.utils.tables import BaseTableFormat, _ColumnFormat class NoImport: @@ -20,13 +30,139 @@ def __init__(self, *names: str) -> None: :param names: The names of the modules to prevent from being imported. """ self._names = names + self._unloaded_modules = {} def __enter__(self) -> None: for name in self._names: - # Set the module to None to prevent it from being imported. + # If the module is already imported, save it and set to None. + self._unloaded_modules[name] = sys.modules[name] + # Set the module to None to prevent it from being re-imported. sys.modules[name] = None def __exit__(self, exc_type, exc_val, exc_tb) -> bool: - for name in self._names: - # Remove the module from sys.modules to clean up. + # Restore the unloaded modules. + for name, module in self._unloaded_modules.items(): + sys.modules[name] = module + + +class UnloadModule: + """Simple context manager to unload one or more named modules on entry and restore on exit.""" + + def __init__(self, *names: str) -> None: + """ + :param names: The names of the modules to unload on entry and restore on exit. + """ + self._names = names + self._unloaded_modules = {} + + def _unload_module(self, name: str) -> None: + if name in sys.modules: + self._unloaded_modules[name] = sys.modules[name] del sys.modules[name] + + parent, *_ = name.rpartition(".") + if parent: + self._unload_module(parent) + + def __enter__(self) -> None: + for name in self._names: + self._unload_module(name) + + def __exit__(self, exc_type, exc_val, exc_tb) -> bool: + # Restore the unloaded modules. + for name, module in self._unloaded_modules.items(): + sys.modules[name] = module + + +def _generate_float64_data(n_samples: int) -> Iterator[float]: + max_ = numpy.finfo("float64").max + for _ in range(n_samples): + yield max_ * random.uniform(-1.0, 1.0) + + +def _generate_int_data(int_type: str, n_samples: int) -> Iterator[int]: + min_, max_ = numpy.iinfo(int_type).min, numpy.iinfo(int_type).max + for _ in range(n_samples): + yield random.randint(min_, max_) + + +def _generate_bool_data(n_samples: int) -> Iterator[bool]: + for _ in range(n_samples): + yield random.choice((True, False)) + + +def _generate_string_data(n_samples: int) -> Iterator[str]: + str_sample = "0123456789ABCDEF " + for _ in range(n_samples): + length = random.randint(10, 10000) + yield "".join(random.choices(str_sample, k=length)) + + +def _generate_timestamp_data(n_samples: int) -> Iterator[datetime]: + min_ = datetime(1970, 1, 1, tzinfo=timezone.utc).timestamp() + max_ = datetime(2038, 12, 31, 23, 59, 59, 999999, tzinfo=timezone.utc).timestamp() + for _ in range(n_samples): + yield datetime.utcfromtimestamp(random.uniform(min_, max_)) + + +def _generate_data(format_id: str, n_samples: int) -> Iterator: + match format_id: + case "float64": + yield from _generate_float64_data(n_samples) + case "uint8" | "uint32" | "uint64" | "int32" | "int64" as int_type: + yield from _generate_int_data(int_type, n_samples) + case "bool": + yield from _generate_bool_data(n_samples) + case "string": + yield from _generate_string_data(n_samples) + case "timestamp": + yield from _generate_timestamp_data(n_samples) + case unknown_format: + raise TypeError(f"Unsupported format '{unknown_format}'") + + +def _change_format(current_format: _ColumnFormat) -> _ColumnFormat: + match current_format.id: + case "float64": + return _ColumnFormat("int64") + case "uint8" | "uint32" | "uint64" | "int32" | "int64": + return _ColumnFormat("float64") + case "bool" | "timestamp": + return _ColumnFormat("string") + case "string": + return _ColumnFormat("bool") + case unknown_format: + raise TypeError(f"Unsupported format '{unknown_format}'") + + +def get_sample_table( + table_format: BaseTableFormat, n_rows: int, add_column: bool = False, change_types: bool = False +) -> pa.Table: + column_formats = [column for column in table_format._columns] + + if add_column: + column_formats.append(_ColumnFormat(column_formats[-1].type)) + + if change_types: + column_formats = [_change_format(column) for column in column_formats] + + if table_format._multi_dimensional: + # Test multidimensional tables with an arbitrary number of columns. If the number of columns matches a more + # specific GO type (one with a fixed number of columns), the more specific type would be instantiated. + column_formats *= 20 + + sample_schema = pa.schema( + [pa.field(f"{column.id}[{i}]", column.type, nullable=False) for i, column in enumerate(column_formats)] + ) + sample_data = [ + pa.array(_generate_data(column_format.id, n_rows), type=column_format.type, size=n_rows) + for column_format in column_formats + ] + return pa.table(sample_data, names=sample_schema.names).cast(sample_schema) + + +def get_sample_table_and_bytes(table_format: BaseTableFormat, n_rows: int) -> tuple[pa.Table, bytes]: + memory = BytesIO() + table = get_sample_table(table_format, n_rows) + pq.write_table(table, where=memory, version="2.4", compression="gzip") + return table, memory.getvalue() diff --git a/packages/evo-objects/tests/test_data_client.py b/packages/evo-objects/tests/test_data_client.py index 71616a9d..1af51e32 100644 --- a/packages/evo-objects/tests/test_data_client.py +++ b/packages/evo-objects/tests/test_data_client.py @@ -13,12 +13,15 @@ from unittest import mock from uuid import UUID +from pandas.testing import assert_frame_equal + from data import load_test_data from evo.common import IFeedback, RequestMethod from evo.common.io.exceptions import DataExistsError from evo.common.test_tools import TestWithConnector, TestWithStorage from evo.common.utils import NoFeedback, PartialFeedback from evo.objects.utils import KnownTableFormat, ObjectDataClient +from helpers import NoImport, UnloadModule, get_sample_table_and_bytes class TestObjectDataClient(TestWithConnector, TestWithStorage): @@ -27,6 +30,10 @@ def setUp(self) -> None: TestWithStorage.setUp(self) self.data_client = ObjectDataClient(environment=self.environment, connector=self.connector, cache=self.cache) + def tearDown(self) -> None: + # Clear cache between tests to avoid cached files interfering with subsequent tests. + self.cache.clear_cache() + @property def base_path(self) -> str: return f"geoscience-object/orgs/{self.environment.org_id}/workspaces/{self.environment.workspace_id}" @@ -302,21 +309,28 @@ async def test_download_table(self) -> None: object_id = UUID(int=2) with ( self.transport.set_http_response(status_code=200, content=json.dumps(get_object_response)), - mock.patch("evo.objects.utils.tables.KnownTableFormat") as mock_known_table_format, mock.patch("evo.common.io.download.HTTPSource", autospec=True) as mock_source, ): - mock_table_info = {} - mock_table_info["data"] = mock_data_id = "0000000000000000000000000000000000000000000000000000000000000001" - mock_known_table_format.load_table = mock_load_table = mock.Mock() + mock_table_info = { + "data": "0000000000000000000000000000000000000000000000000000000000000001", + "length": 1, + "width": 3, + "data_type": "float64", + } + mock_data_id: str = mock_table_info["data"] + expected_filename = self.data_client.cache_location / mock_data_id + sample_table, payload_bytes = get_sample_table_and_bytes( + KnownTableFormat.from_table_info(mock_table_info), 1 + ) async def _mock_download_file_side_effect(*args, **kwargs): - expected_filename = self.data_client.cache_location / mock_data_id expected_download_url = get_object_response["links"]["data"][1]["download_url"] actual_download_url = await kwargs["url_generator"]() self.assertEqual(expected_filename, kwargs["filename"]) self.assertEqual(expected_download_url, actual_download_url) self.assertIs(self.transport, kwargs["transport"]) self.assertIs(NoFeedback, kwargs["fb"]) + expected_filename.write_bytes(payload_bytes) mock_source.download_file.side_effect = _mock_download_file_side_effect actual_table = await self.data_client.download_table(object_id, None, mock_table_info) @@ -327,8 +341,7 @@ async def _mock_download_file_side_effect(*args, **kwargs): path=f"{self.base_path}/objects/{object_id}", headers={"Accept": "application/json", "Accept-Encoding": "gzip"}, ) - mock_load_table.assert_called_once_with(mock_table_info, self.data_client.cache_location) - self.assertIs(mock_load_table.return_value, actual_table) + self.assertEqual(sample_table, actual_table) async def test_download_dataframe(self) -> None: """Test downloading tabular data using pandas.""" @@ -336,25 +349,31 @@ async def test_download_dataframe(self) -> None: object_id = UUID(int=2) with ( self.transport.set_http_response(status_code=200, content=json.dumps(get_object_response)), - mock.patch("evo.objects.utils.tables.KnownTableFormat") as mock_known_table_format, mock.patch("evo.common.io.download.HTTPSource", autospec=True) as mock_source, ): - mock_table_info = {} - mock_table_info["data"] = mock_data_id = "0000000000000000000000000000000000000000000000000000000000000001" - mock_known_table_format.load_table = mock_load_table = mock.Mock() + mock_table_info = { + "data": "0000000000000000000000000000000000000000000000000000000000000001", + "length": 1, + "width": 3, + "data_type": "float64", + } + mock_data_id: str = mock_table_info["data"] + expected_filename = self.data_client.cache_location / mock_data_id + sample_table, payload_bytes = get_sample_table_and_bytes( + KnownTableFormat.from_table_info(mock_table_info), 1 + ) async def _mock_download_file_side_effect(*args, **kwargs): - expected_filename = self.data_client.cache_location / mock_data_id expected_download_url = get_object_response["links"]["data"][1]["download_url"] actual_download_url = await kwargs["url_generator"]() self.assertEqual(expected_filename, kwargs["filename"]) self.assertEqual(expected_download_url, actual_download_url) self.assertIs(self.transport, kwargs["transport"]) self.assertIs(NoFeedback, kwargs["fb"]) + expected_filename.write_bytes(payload_bytes) mock_source.download_file.side_effect = _mock_download_file_side_effect - - _actual_dataframe = await self.data_client.download_dataframe(object_id, None, mock_table_info) + actual_dataframe = await self.data_client.download_dataframe(object_id, None, mock_table_info) mock_source.download_file.assert_called_once() self.assert_request_made( @@ -362,99 +381,23 @@ async def _mock_download_file_side_effect(*args, **kwargs): path=f"{self.base_path}/objects/{object_id}", headers={"Accept": "application/json", "Accept-Encoding": "gzip"}, ) - mock_load_table.assert_called_once_with(mock_table_info, self.data_client.cache_location) - - async def test_download_dataframe_error(self) -> None: - """Test error when trying to download dataframe without pandas installed.""" - get_object_response = load_test_data("get_object.json") - object_id = UUID(int=2) - with ( - self.transport.set_http_response(status_code=200, content=json.dumps(get_object_response)), - mock.patch("evo.objects.utils.tables.KnownTableFormat") as mock_known_table_format, - mock.patch("evo.common.io.download.HTTPSource", autospec=True), - ): - mock_table_info = {} - mock_table_info["data"] = "0000000000000000000000000000000000000000000000000000000000000001" - - mock_known_table_format.load_table.return_value = mock_table = mock.Mock() - # This is the error that a non-mocked `pyarrow.Table.to_pandas()` would raise. - mock_table.to_pandas.side_effect = ModuleNotFoundError("No module named 'pandas'") - - with self.assertRaisesRegex( - RuntimeError, "Unable to download dataframe because the `pandas` package is not installed" - ): - _ = await self.data_client.download_dataframe(object_id, None, mock_table_info) - - async def test_download_table_confusable(self) -> None: - """Test downloading tabular data using pyarrow that includes confusable types.""" - get_object_response = load_test_data("get_object_validator_check.json") - object_id = UUID(int=2) - with ( - self.transport.set_http_response(status_code=200, content=json.dumps(get_object_response)), - mock.patch("evo.objects.utils.tables.KnownTableFormat") as mock_known_table_format, - mock.patch("evo.common.io.download.HTTPSource", autospec=True) as mock_source, - ): - mock_table_info = {} - mock_table_info["data"] = mock_data_name = ( - "995f2e6cab5ad17147d9c5fddf371189bef4b623f657dde91f175a0734ed17dc" + assert_frame_equal(sample_table.to_pandas(), actual_dataframe) + + async def test_download_dataframe_optional(self) -> None: + """Test download dataframe is not available if pandas is not installed.""" + with UnloadModule("evo.objects.utils.data"), NoImport("pandas"): + from evo.objects.utils.data import ObjectDataClient + + client = ObjectDataClient(environment=self.environment, connector=self.connector, cache=self.cache) + self.assertFalse( + any( + ( + hasattr(ObjectDataClient, "download_dataframe"), + hasattr(client, "download_dataframe"), + ) + ), + "download_dataframe should not be available if pandas is missing", ) - mock_known_table_format.load_table = mock_load_table = mock.Mock() - - async def _mock_download_file_side_effect(*args, **kwargs): - expected_filename = self.data_client.cache_location / str(mock_data_name) - expected_download_url = get_object_response["links"]["data"][0]["download_url"] - actual_download_url = await kwargs["url_generator"]() - self.assertEqual(expected_filename, kwargs["filename"]) - self.assertEqual(expected_download_url, actual_download_url) - self.assertIs(self.transport, kwargs["transport"]) - self.assertIs(NoFeedback, kwargs["fb"]) - - mock_source.download_file.side_effect = _mock_download_file_side_effect - actual_table = await self.data_client.download_table(object_id, None, mock_table_info) - - mock_source.download_file.assert_called_once() - self.assert_request_made( - method=RequestMethod.GET, - path=f"{self.base_path}/objects/{object_id}", - headers={"Accept": "application/json", "Accept-Encoding": "gzip"}, - ) - mock_load_table.assert_called_once_with(mock_table_info, self.data_client.cache_location) - self.assertIs(mock_load_table.return_value, actual_table) - - async def test_download_dataframe_confusable(self) -> None: - """Test downloading tabular data using pandas that includes confusable types.""" - get_object_response = load_test_data("get_object_validator_check.json") - object_id = UUID(int=2) - with ( - self.transport.set_http_response(status_code=200, content=json.dumps(get_object_response)), - mock.patch("evo.objects.utils.tables.KnownTableFormat") as mock_known_table_format, - mock.patch("evo.common.io.download.HTTPSource", autospec=True) as mock_source, - ): - mock_table_info = {} - mock_table_info["data"] = mock_data_name = ( - "995f2e6cab5ad17147d9c5fddf371189bef4b623f657dde91f175a0734ed17dc" - ) - mock_known_table_format.load_table = mock_load_table = mock.Mock() - - async def _mock_download_file_side_effect(*args, **kwargs): - expected_filename = self.data_client.cache_location / str(mock_data_name) - expected_download_url = get_object_response["links"]["data"][0]["download_url"] - actual_download_url = await kwargs["url_generator"]() - self.assertEqual(expected_filename, kwargs["filename"]) - self.assertEqual(expected_download_url, actual_download_url) - self.assertIs(self.transport, kwargs["transport"]) - self.assertIs(NoFeedback, kwargs["fb"]) - - mock_source.download_file.side_effect = _mock_download_file_side_effect - _actual_dataframe = await self.data_client.download_dataframe(object_id, None, mock_table_info) - - mock_source.download_file.assert_called_once() - self.assert_request_made( - method=RequestMethod.GET, - path=f"{self.base_path}/objects/{object_id}", - headers={"Accept": "application/json", "Accept-Encoding": "gzip"}, - ) - mock_load_table.assert_called_once_with(mock_table_info, self.data_client.cache_location) async def test_download_table_already_downloaded(self) -> None: """Test downloading tabular data using pyarrow or pandas when the table is already downloaded.""" @@ -462,16 +405,21 @@ async def test_download_table_already_downloaded(self) -> None: object_id = UUID(int=2) with ( self.transport.set_http_response(status_code=200, content=json.dumps(get_object_response)), - mock.patch("evo.objects.utils.tables.KnownTableFormat") as mock_known_table_format, mock.patch("evo.common.io.download.HTTPSource", autospec=True) as mock_source, ): - mock_table_info = {} - mock_table_info["data"] = mock_data_id = "0000000000000000000000000000000000000000000000000000000000000001" - mock_known_table_format.load_table = mock_load_table = mock.Mock() - expected_file = self.data_client.cache_location / mock_data_id + mock_table_info = { + "data": "0000000000000000000000000000000000000000000000000000000000000001", + "length": 1, + "width": 3, + "data_type": "float64", + } + mock_data_id: str = mock_table_info["data"] + expected_filename = self.data_client.cache_location / mock_data_id + sample_table, payload_bytes = get_sample_table_and_bytes( + KnownTableFormat.from_table_info(mock_table_info), 1 + ) async def _mock_download_file_side_effect(*args, **kwargs): - expected_filename = self.data_client.cache_location / mock_data_id expected_download_url = get_object_response["links"]["data"][1]["download_url"] actual_download_url = await kwargs["url_generator"]() self.assertEqual(expected_filename, kwargs["filename"]) @@ -481,17 +429,18 @@ async def _mock_download_file_side_effect(*args, **kwargs): mock_source.download_file.side_effect = _mock_download_file_side_effect - expected_file.touch() + expected_filename.write_bytes(payload_bytes) actual_table = await self.data_client.download_table(object_id, None, mock_table_info) mock_source.download_file.assert_not_called() - self.transport.assert_no_requests() - mock_load_table.assert_called_once_with(mock_table_info, self.data_client.cache_location) - self.assertIs(mock_load_table.return_value, actual_table) - - # Otherwise this will interfere with the other "already_download" test, since cache cleanup in TestWithStorage - # is in class setup, not individual test setup. - expected_file.unlink() + # the object metadata is still requested to get the initial download URL and check permissions. + self.assert_request_made( + method=RequestMethod.GET, + path=f"{self.base_path}/objects/{object_id}", + headers={"Accept": "application/json", "Accept-Encoding": "gzip"}, + ) + self.transport.request.assert_called_once() # Ensure no other requests were made. + self.assertEqual(sample_table, actual_table) async def test_download_dataframe_already_downloaded(self) -> None: """Test downloading tabular data using pandas when the table is already downloaded.""" @@ -499,16 +448,21 @@ async def test_download_dataframe_already_downloaded(self) -> None: object_id = UUID(int=2) with ( self.transport.set_http_response(status_code=200, content=json.dumps(get_object_response)), - mock.patch("evo.objects.utils.tables.KnownTableFormat") as mock_known_table_format, mock.patch("evo.common.io.download.HTTPSource", autospec=True) as mock_source, ): - mock_table_info = {} - mock_table_info["data"] = mock_data_id = "0000000000000000000000000000000000000000000000000000000000000001" - mock_known_table_format.load_table = mock_load_table = mock.Mock() - expected_file = self.data_client.cache_location / mock_data_id + mock_table_info = { + "data": "0000000000000000000000000000000000000000000000000000000000000001", + "length": 1, + "width": 3, + "data_type": "float64", + } + mock_data_id: str = mock_table_info["data"] + expected_filename = self.data_client.cache_location / mock_data_id + sample_table, payload_bytes = get_sample_table_and_bytes( + KnownTableFormat.from_table_info(mock_table_info), 1 + ) async def _mock_download_file_side_effect(*args, **kwargs): - expected_filename = self.data_client.cache_location / mock_data_id expected_download_url = get_object_response["links"]["data"][1]["download_url"] actual_download_url = await kwargs["url_generator"]() self.assertEqual(expected_filename, kwargs["filename"]) @@ -518,13 +472,15 @@ async def _mock_download_file_side_effect(*args, **kwargs): mock_source.download_file.side_effect = _mock_download_file_side_effect - expected_file.touch() - _actual_dataframe = await self.data_client.download_dataframe(object_id, None, mock_table_info) + expected_filename.write_bytes(payload_bytes) + actual_dataframe = await self.data_client.download_dataframe(object_id, None, mock_table_info) mock_source.download_file.assert_not_called() - self.transport.assert_no_requests() - mock_load_table.assert_called_once_with(mock_table_info, self.data_client.cache_location) - - # Otherwise this will interfere with the other "already_download" test, since cache cleanup in TestWithStorage - # is in class setup, not individual test setup. - expected_file.unlink() + # the object metadata is still requested to get the initial download URL and check permissions. + self.assert_request_made( + method=RequestMethod.GET, + path=f"{self.base_path}/objects/{object_id}", + headers={"Accept": "application/json", "Accept-Encoding": "gzip"}, + ) + self.transport.request.assert_called_once() # Ensure no other requests were made. + assert_frame_equal(sample_table.to_pandas(), actual_dataframe) diff --git a/packages/evo-objects/tests/test_downloaded_object.py b/packages/evo-objects/tests/test_downloaded_object.py new file mode 100644 index 00000000..62e8681a --- /dev/null +++ b/packages/evo-objects/tests/test_downloaded_object.py @@ -0,0 +1,334 @@ +# Copyright © 2025 Bentley Systems, Incorporated +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import json +from collections.abc import Generator +from typing import cast +from unittest import mock +from urllib.parse import quote +from uuid import UUID + +import pyarrow as pa +from numpy.testing import assert_array_equal +from pandas.testing import assert_frame_equal +from parameterized import parameterized + +from data import load_test_data +from evo.common import RequestMethod +from evo.common.test_tools import ( + BASE_URL, + ORG, + WORKSPACE_ID, + DownloadRequestHandler, + TestWithConnector, + TestWithStorage, +) +from evo.common.utils import NoFeedback +from evo.jmespath import JMESPathObjectProxy +from evo.objects import DownloadedObject, ObjectReference +from evo.objects.endpoints import models +from evo.objects.io import _CACHE_SCOPE +from evo.objects.parquet import TableInfo +from evo.objects.utils import KnownTableFormat +from helpers import NoImport, UnloadModule, get_sample_table_and_bytes + +_OBJECTS_URL = f"{BASE_URL.rstrip('/')}/geoscience-object/orgs/{ORG.id}/workspaces/{WORKSPACE_ID}/objects" + +_TABLE_INFO_VARIANTS: list[tuple[str, TableInfo | str]] = [ + ( + "with TableInfo dict", + { + "data": "0000000000000000000000000000000000000000000000000000000000000000", + "length": 123, + "width": 3, + "data_type": "float64", + }, + ), + ("with JMESPath reference", "locations.coordinates"), +] + + +class TestDownloadedObject(TestWithConnector, TestWithStorage): + def setUp(self) -> None: + from evo.objects.client import parse + + TestWithConnector.setUp(self) + TestWithStorage.setUp(self) + + raw = models.GetObjectResponse.model_validate(load_test_data("get_object_detailed.json")) + self.object = DownloadedObject( + object_=raw.object, + metadata=parse.object_metadata(raw, self.environment), + urls_by_name={link.name: link.download_url for link in raw.links.data}, + connector=self.connector, + cache=self.cache, + ) + + def tearDown(self) -> None: + # Clear cache between tests to avoid cached files interfering with subsequent tests. + self.cache.clear_cache() + + @parameterized.expand( + [ + ("by id as string", f"{_OBJECTS_URL}/00000000-0000-0000-0000-000000000002"), + ( + "by id as ObjectReference", + ObjectReference(f"{_OBJECTS_URL}/00000000-0000-0000-0000-000000000002"), + ), + ( + "by id with version id", + ObjectReference( + f"{_OBJECTS_URL}/00000000-0000-0000-0000-000000000002?version=2023-08-03T05:47:18.3402289Z" + ), + ), + ("by path as string", f"{_OBJECTS_URL}/path/A/m.json"), + ("by path as ObjectReference", ObjectReference(f"{_OBJECTS_URL}/path/A/m.json")), + ( + "by path with version id", + ObjectReference(f"{_OBJECTS_URL}/path/A/m.json?version=2023-08-03T05:47:18.3402289Z"), + ), + ] + ) + async def test_from_reference(self, _label: str, reference: str) -> None: + """Test downloading a geoscience object by reference.""" + get_object_response = load_test_data("get_object.json") + expected_uuid = UUID(int=2) + expected_object_dict = { + "schema": "/objects/pointset/1.0.1/pointset.schema.json", + "uuid": UUID("00000000-0000-0000-0000-000000000002"), + "name": "Sample pointset", + "description": "A sample pointset object", + "bounding_box": {"min_x": 0.0, "max_x": 0.0, "min_y": 0.0, "max_y": 0.0, "min_z": 0.0, "max_z": 0.0}, + "coordinate_reference_system": {"epsg_code": 2048}, + "locations": { + "coordinates": { + "data": "0000000000000000000000000000000000000000000000000000000000000001", + "length": 1, + "width": 3, + "data_type": "float64", + } + }, + } + expected_path = "A/m.json" + expected_version = "2023-08-03T05:47:18.3402289Z" + with self.transport.set_http_response(status_code=200, content=json.dumps(get_object_response)): + actual_object = await DownloadedObject.from_reference(self.connector, reference, self.cache) + + ref = ObjectReference(reference) + if ref.object_id is not None: + expected_request_path = f"{_OBJECTS_URL}/{ref.object_id}" + else: + expected_request_path = f"{_OBJECTS_URL}/path/{ref.object_path}" + + if ref.version_id is not None: + expected_request_path += f"?version={quote(ref.version_id)}" + + self.assert_request_made( + method=RequestMethod.GET, + path=expected_request_path, + headers={"Accept": "application/json", "Accept-Encoding": "gzip"}, + ) + # Check metadata. + actual_metadata = actual_object.metadata + self.assertEqual(expected_path, actual_metadata.path) + self.assertEqual("A", actual_metadata.parent) + self.assertEqual("m.json", actual_metadata.name) + self.assertEqual(expected_uuid, actual_metadata.id) + self.assertEqual(expected_version, actual_metadata.version_id) + + # Check geoscience object. + self.assertEqual(expected_object_dict, actual_object.as_dict()) + + def test_search(self) -> None: + """Test the JMESPath search implementation.""" + expected_result = JMESPathObjectProxy( + { + "x": [0.0, 1.0], + "y": [2.0, 3.0], + "z": [4.0, 5.0], + } + ) + actual_result = self.object.search("bounding_box | {x: [min_x, max_x], y: [min_y, max_y], z: [min_z, max_z]}") + self.assertEqual(expected_result, actual_result) + + def _assert_optional_method(self, method_name: str, *, unload: list[str], no_import: list[str]) -> None: + # Verify the method exists before unloading any modules. + from evo.objects.client import DownloadedObject + + self.assertTrue( + all( + [ + hasattr(DownloadedObject, method_name), + hasattr(self.object, method_name), + ] + ), + f"DownloadedObject.{method_name} should be available for this test to be valid", + ) + + with UnloadModule("evo.objects.client.object_client", *unload), NoImport(*no_import): + # Re-import the class to ensure the module is re-evaluated without the optional dependency. + from evo.objects.client import DownloadedObject + + # Re-create the object to ensure the class is re-evaluated without the optional dependency. + client = DownloadedObject( + object_=self.object._object, + metadata=self.object.metadata, + urls_by_name=self.object._urls_by_name, + connector=self.object._connector, + cache=self.object._cache, + ) + self.assertFalse( + all( + [ + hasattr(DownloadedObject, method_name), + hasattr(client, method_name), + ] + ), + f"DownloadedObject.{method_name} should not be available if " + f"{', '.join(no_import)} {'is' if len(no_import) == 1 else 'are'} not available", + ) + + @contextlib.contextmanager + def _patch_downloading_table(self, table_info: TableInfo | str) -> Generator[pa.Table, None, None]: + mock_table_info = table_info + if isinstance(mock_table_info, str): + mock_table_info = cast(TableInfo, self.object.search(mock_table_info)) + + mock_data_id = mock_table_info["data"] + expected_filename = self.cache.get_location(self.environment, _CACHE_SCOPE) / mock_data_id + sample_table, payload_bytes = get_sample_table_and_bytes( + KnownTableFormat.from_table_info(mock_table_info), mock_table_info["length"] + ) + with mock.patch("evo.common.io.download.HTTPSource", autospec=True) as mock_source: + + async def _mock_download_file_side_effect(*args, **kwargs): + expected_download_url = self.object._urls_by_name[mock_data_id] + actual_download_url = await kwargs["url_generator"]() + self.assertEqual(expected_filename, kwargs["filename"]) + self.assertEqual(expected_download_url, actual_download_url) + self.assertIs(self.transport, kwargs["transport"]) + self.assertIs(NoFeedback, kwargs["fb"]) + expected_filename.write_bytes(payload_bytes) + + mock_source.download_file.side_effect = _mock_download_file_side_effect + yield sample_table + + mock_source.download_file.assert_called_once() + self.transport.assert_no_requests() + + @parameterized.expand(_TABLE_INFO_VARIANTS) + async def test_download_table(self, _label: str, table_info: TableInfo | str) -> None: + """Test downloading parquet data as a pyarrow.Table.""" + with self._patch_downloading_table(table_info) as sample_table: + actual_table = await self.object.download_table(table_info) + + # Should use cache second time. + # The _patch_downloading_table context manager verifies this by checking the data is only downloaded once. + cached_table = await self.object.download_table(table_info) + + self.assertEqual(sample_table, actual_table) + self.assertEqual(sample_table, cached_table) + + @contextlib.contextmanager + def _patch_downloading_table_in_memory(self, table_info: TableInfo | str) -> Generator[pa.Table, None, None]: + self.object._cache = None # Disable the cache for this test. + + mock_table_info = table_info + if isinstance(mock_table_info, str): + mock_table_info = cast(TableInfo, self.object.search(mock_table_info)) + + sample_table, payload_bytes = get_sample_table_and_bytes( + KnownTableFormat.from_table_info(mock_table_info), mock_table_info["length"] + ) + + # Use the DownloadRequestHandler from evo.common.test_tools.io to mock the binary download. + download_handler = DownloadRequestHandler(data=payload_bytes) + self.transport.set_request_handler(download_handler) + yield sample_table + + @parameterized.expand(_TABLE_INFO_VARIANTS) + async def test_download_table_without_cache(self, _label: str, table_info: TableInfo | str) -> None: + """Test downloading parquet data in memory as a pyarrow.Table.""" + with self._patch_downloading_table_in_memory(table_info) as sample_table: + actual_table = await self.object.download_table(table_info) + + self.assertEqual(sample_table, actual_table) + + def test_download_table_is_optional(self) -> None: + """Test that the download_table method is not available when pyarrow is not installed.""" + self._assert_optional_method("download_table", unload=["evo.objects.parquet.loader"], no_import=["pyarrow"]) + + @parameterized.expand(_TABLE_INFO_VARIANTS) + async def test_download_dataframe(self, _label: str, table_info: TableInfo | str) -> None: + """Test downloading parquet data as a pandas.DataFrame.""" + with self._patch_downloading_table(table_info) as sample_table: + actual_dataframe = await self.object.download_dataframe(table_info) + + # Should use cache second time. + # The _patch_downloading_table context manager verifies this by checking the data is only downloaded once. + cached_dataframe = await self.object.download_dataframe(table_info) + + expected_dataframe = sample_table.to_pandas() + assert_frame_equal(expected_dataframe, actual_dataframe) + assert_frame_equal(expected_dataframe, cached_dataframe) + + @parameterized.expand(_TABLE_INFO_VARIANTS) + async def test_download_dataframe_without_cache(self, _label: str, table_info: TableInfo | str) -> None: + """Test downloading parquet data in memory as a pandas.DataFrame.""" + with self._patch_downloading_table_in_memory(table_info) as sample_table: + actual_dataframe = await self.object.download_dataframe(table_info) + + expected_dataframe = sample_table.to_pandas() + assert_frame_equal(expected_dataframe, actual_dataframe) + + @parameterized.expand( + [ + ("pyarrow",), + ("pandas",), + ] + ) + def test_download_dataframe_is_optional(self, missing: str) -> None: + """Test that the download_dataframe method is not available when pandas or pyarrow is not installed.""" + self._assert_optional_method("download_dataframe", unload=["evo.objects.parquet.loader"], no_import=[missing]) + + @parameterized.expand(_TABLE_INFO_VARIANTS) + async def test_download_array(self, _label: str, table_info: TableInfo | str) -> None: + """Test downloading parquet data as a numpy.ndarray.""" + with self._patch_downloading_table(table_info) as sample_table: + actual_array = await self.object.download_array(table_info) + + # Should use cache second time. + # The _patch_downloading_table context manager verifies this by checking the data is only downloaded once. + cached_array = await self.object.download_array(table_info) + + expected_array = sample_table.to_pandas().to_numpy() + assert_array_equal(expected_array, actual_array, strict=True) + assert_array_equal(expected_array, cached_array, strict=True) + + @parameterized.expand(_TABLE_INFO_VARIANTS) + async def test_download_array_without_cache(self, _label: str, table_info: TableInfo | str) -> None: + """Test downloading parquet data in memory as a numpy.ndarray.""" + with self._patch_downloading_table_in_memory(table_info) as sample_table: + actual_array = await self.object.download_array(table_info) + + expected_array = sample_table.to_pandas().to_numpy() + assert_array_equal(expected_array, actual_array, strict=True) + + @parameterized.expand( + [ + ("pyarrow",), + ("numpy",), + ] + ) + def test_download_array_is_optional(self, missing: str) -> None: + """Test that the download_array method is not available when numpy or pyarrow is not installed.""" + self._assert_optional_method("download_array", unload=["evo.objects.parquet.loader"], no_import=[missing]) diff --git a/packages/evo-objects/tests/test_object_service_client.py b/packages/evo-objects/tests/test_object_service_client.py index 6cf3d1df..eaacee1d 100644 --- a/packages/evo-objects/tests/test_object_service_client.py +++ b/packages/evo-objects/tests/test_object_service_client.py @@ -9,6 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import dataclasses import datetime import json from unittest import mock @@ -34,7 +35,7 @@ from evo.objects.data import ObjectOrderByEnum, OrgObjectMetadata, Stage from evo.objects.exceptions import ObjectAlreadyExistsError, ObjectUUIDError from evo.objects.utils import ObjectDataClient -from helpers import NoImport +from helpers import NoImport, UnloadModule EMPTY_CONTENT = '{"objects": [], "links": {"next": null, "prev": null}}' MOCK_VERSION_CONTENT = json.dumps(load_test_data("list_versions.json")) @@ -57,7 +58,7 @@ def base_path(self) -> str: async def test_check_service_health(self) -> None: """Test service health check implementation""" - with mock.patch("evo.objects.client.get_service_health", spec_set=True) as mock_get_service_health: + with mock.patch("evo.objects.client.api_client.get_service_health", spec_set=True) as mock_get_service_health: await self.object_client.get_service_health() mock_get_service_health.assert_called_once_with( self.connector, "geoscience-object", check_type=HealthCheckType.FULL @@ -243,6 +244,8 @@ async def test_list_objects_for_instance(self) -> None: ), ] self.assertIsInstance(page_one, Page) + for item in page_one: + self.assertEqual(item.environment.workspace_id, item.workspace_id, "workspace_id should match environment") self.assertEqual(expected_items_page_one, page_one.items()) self.assertEqual(0, page_one.offset) self.assertEqual(2, page_one.limit) @@ -257,7 +260,9 @@ async def test_list_objects_for_instance(self) -> None: page_two = await self.object_client.list_objects_for_instance(offset=page_one.next_offset, limit=page_one.limit) expected_items_page_two = [ OrgObjectMetadata( - environment=self.environment, + environment=dataclasses.replace( + self.environment, workspace_id=UUID("00000000-0000-0000-0000-0000000004d2") + ), workspace_id=UUID("00000000-0000-0000-0000-0000000004d2"), workspace_name="Test Workspace 2", id=UUID("00000000-0000-0000-0000-000000000002"), @@ -279,6 +284,8 @@ async def test_list_objects_for_instance(self) -> None: ), ] self.assertIsInstance(page_two, Page) + for item in page_two: + self.assertEqual(item.environment.workspace_id, item.workspace_id, "workspace_id should match environment") self.assertEqual(expected_items_page_two, page_two.items()) self.assertEqual(2, page_two.offset) self.assertEqual(2, page_two.limit) @@ -662,8 +669,19 @@ def test_get_data_client(self) -> None: def test_get_data_client_missing_dependencies(self) -> None: """Test getting a data client with missing dependencies.""" - with NoImport("pyarrow"), self.assertRaises(RuntimeError): - self.object_client.get_data_client(self.cache) + with UnloadModule("evo.objects.client.api_client", "evo.objects.utils.data"), NoImport("pyarrow"): + from evo.objects.client import ObjectAPIClient + + client = ObjectAPIClient(self.environment, self.connector) + self.assertFalse( + any( + ( + hasattr(ObjectAPIClient, "get_data_client"), + hasattr(client, "get_data_client"), + ) + ), + "get_data_client should not be available if pyarrow is missing", + ) async def test_get_latest_object_versions(self) -> None: content = json.dumps( diff --git a/packages/evo-objects/tests/test_tables.py b/packages/evo-objects/tests/test_tables.py index 91f3f6ca..9a79d529 100644 --- a/packages/evo-objects/tests/test_tables.py +++ b/packages/evo-objects/tests/test_tables.py @@ -10,18 +10,15 @@ # limitations under the License. import hashlib -import random import unittest import uuid from collections.abc import Iterator -from datetime import datetime, timezone from io import BytesIO from pathlib import Path from typing import BinaryIO from unittest import mock from uuid import UUID -import numpy import pyarrow as pa import pyarrow.parquet as pq from parameterized import parameterized, parameterized_class @@ -37,7 +34,7 @@ all_known_formats, get_known_format, ) -from evo.objects.utils.tables import _ColumnFormat +from helpers import get_sample_table SAMPLE_DATA_LENGTH = 10 ENVIRONMENT = Environment(hub_url=BASE_URL, org_id=UUID(int=0), workspace_id=UUID(int=0)) @@ -66,101 +63,6 @@ def _all_known_formats_for_testing() -> Iterator[dict]: yield {"data_format": known_format, "expected_field_names": known_format._field_names} -def _generate_float64_data(n_samples: int) -> Iterator[float]: - max_ = numpy.finfo("float64").max - for _ in range(n_samples): - yield max_ * random.uniform(-1.0, 1.0) - - -def _generate_int_data(int_type: str, n_samples: int) -> Iterator[int]: - min_, max_ = numpy.iinfo(int_type).min, numpy.iinfo(int_type).max - for _ in range(n_samples): - yield random.randint(min_, max_) - - -def _generate_bool_data(n_samples: int) -> Iterator[bool]: - for _ in range(n_samples): - yield random.choice((True, False)) - - -def _generate_string_data(n_samples: int) -> Iterator[str]: - str_sample = "0123456789ABCDEF " - for _ in range(n_samples): - length = random.randint(10, 10000) - yield "".join(random.choices(str_sample, k=length)) - - -def _generate_timestamp_data(n_samples: int) -> Iterator[datetime]: - min_ = datetime(1970, 1, 1, tzinfo=timezone.utc).timestamp() - max_ = datetime(2038, 12, 31, 23, 59, 59, 999999, tzinfo=timezone.utc).timestamp() - for _ in range(n_samples): - yield datetime.utcfromtimestamp(random.uniform(min_, max_)) - - -def _generate_data(format_id: str, n_samples: int) -> Iterator: - match format_id: - case "float64": - yield from _generate_float64_data(n_samples) - case "uint8" | "uint32" | "uint64" | "int32" | "int64" as int_type: - yield from _generate_int_data(int_type, n_samples) - case "bool": - yield from _generate_bool_data(n_samples) - case "string": - yield from _generate_string_data(n_samples) - case "timestamp": - yield from _generate_timestamp_data(n_samples) - case unknown_format: - raise TypeError(f"Unsupported format '{unknown_format}'") - - -def _get_sample_column(column_format: _ColumnFormat, n_samples: int) -> pa.Array: - return pa.array(_generate_data(column_format.id, n_samples), type=column_format.type, size=n_samples) - - -def _get_table_schema(columns: list[_ColumnFormat]) -> pa.Schema: - return pa.schema([pa.field(f"{column.id}[{i}]", column.type, nullable=False) for i, column in enumerate(columns)]) - - -def _change_format(current_format: _ColumnFormat) -> _ColumnFormat: - match current_format.id: - case "float64": - return _ColumnFormat("int64") - case "uint8" | "uint32" | "uint64" | "int32" | "int64": - return _ColumnFormat("float64") - case "bool" | "timestamp": - return _ColumnFormat("string") - case "string": - return _ColumnFormat("bool") - case unknown_format: - raise TypeError(f"Unsupported format '{unknown_format}'") - - -def _get_sample_table( - table_format: BaseTableFormat, n_rows: int, add_column: bool = False, change_types: bool = False -) -> pa.Table: - column_formats = [column for column in table_format._columns] - - if add_column: - column_formats.append(_ColumnFormat(column_formats[-1].type)) - - if change_types: - column_formats = [_change_format(column) for column in column_formats] - - if table_format._multi_dimensional: - # Test multidimensional tables with an arbitrary number of columns. If the number of columns matches a more - # specific GO type (one with a fixed number of columns), the more specific type would be instantiated. - column_formats *= 20 - - sample_schema = pa.schema( - [pa.field(f"{column.id}[{i}]", column.type, nullable=False) for i, column in enumerate(column_formats)] - ) - sample_data = [ - pa.array(_generate_data(column_format.id, n_rows), type=column_format.type, size=n_rows) - for column_format in column_formats - ] - return pa.table(sample_data, names=sample_schema.names).cast(sample_schema) - - def _get_buffer_digest(buffer: BinaryIO) -> str: """Return a sha256 digest of a binary buffer""" buffer.seek(0) @@ -189,7 +91,7 @@ class TestKnownFormat(unittest.TestCase): expected_field_names: str def setUp(self) -> None: - self.sample_table = _get_sample_table(table_format=self.data_format, n_rows=SAMPLE_DATA_LENGTH) + self.sample_table = get_sample_table(table_format=self.data_format, n_rows=SAMPLE_DATA_LENGTH) self.expected_parquet_digest = _get_table_digest(self.sample_table) self.data_dir = CACHE.get_location(ENVIRONMENT, self.__class__.__name__) self.parquet_file = self.data_dir / self.expected_parquet_digest @@ -251,7 +153,7 @@ def _save_parquet_file(self, add_column: bool = False, add_row: bool = False, ch sample_length = SAMPLE_DATA_LENGTH if add_row: sample_length += 1 - self.sample_table = _get_sample_table( + self.sample_table = get_sample_table( table_format=self.data_format, n_rows=sample_length, add_column=add_column, change_types=change_type ) @@ -400,7 +302,7 @@ class TestComplexFormats(unittest.TestCase): expect_extra_column_fails: bool def setUp(self) -> None: - self.sample_table = _get_sample_table(table_format=self.data_format, n_rows=SAMPLE_DATA_LENGTH) + self.sample_table = get_sample_table(table_format=self.data_format, n_rows=SAMPLE_DATA_LENGTH) self.expected_parquet_digest = _get_table_digest(self.sample_table) self.data_dir = CACHE.get_location(ENVIRONMENT, self.__class__.__name__) self.parquet_file = self.data_dir / self.expected_parquet_digest @@ -422,7 +324,7 @@ def test_save_table(self) -> None: def test_save_table_extra_column_fails(self) -> None: self.assertFalse(self.parquet_file.is_file()) - sample_table = _get_sample_table(self.data_format, n_rows=SAMPLE_DATA_LENGTH, add_column=True) + sample_table = get_sample_table(self.data_format, n_rows=SAMPLE_DATA_LENGTH, add_column=True) if self.expect_extra_column_fails: with self.assertRaises(TableFormatError): @@ -435,7 +337,7 @@ def test_save_table_extra_column_fails(self) -> None: def test_save_table_different_column_types_fails(self) -> None: self.assertFalse(self.parquet_file.is_file()) - sample_table = _get_sample_table(self.data_format, n_rows=SAMPLE_DATA_LENGTH, change_types=True) + sample_table = get_sample_table(self.data_format, n_rows=SAMPLE_DATA_LENGTH, change_types=True) known_format = get_known_format(sample_table) with self.assertRaises(TableFormatError): diff --git a/packages/evo-sdk-common/src/evo/common/test_tools/consts.py b/packages/evo-sdk-common/src/evo/common/test_tools/consts.py index 4bd065bd..e91dd212 100644 --- a/packages/evo-sdk-common/src/evo/common/test_tools/consts.py +++ b/packages/evo-sdk-common/src/evo/common/test_tools/consts.py @@ -13,7 +13,7 @@ from evo.discovery import Hub, Organization -BASE_URL = "http://unittest.localhost/" +BASE_URL = "https://unittest.localhost/" ACCESS_TOKEN = "" HUB = Hub( diff --git a/packages/evo-sdk-common/tests/data/list_workspaces_0.json b/packages/evo-sdk-common/tests/data/list_workspaces_0.json index 653daeb2..ef4e4dc5 100644 --- a/packages/evo-sdk-common/tests/data/list_workspaces_0.json +++ b/packages/evo-sdk-common/tests/data/list_workspaces_0.json @@ -1,8 +1,8 @@ { "links": { - "first": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=0", - "last": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=2", - "next": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=2", + "first": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=0", + "last": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=2", + "next": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=2", "previous": null, "count": 2, "total": 3 @@ -28,7 +28,7 @@ }, "default_coordinate_system": "", "labels": [], - "self_link": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/00000000-0000-0000-0000-00000000000a" + "self_link": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/00000000-0000-0000-0000-00000000000a" }, { "id": "00000000-0000-0000-0000-00000000000b", @@ -50,7 +50,7 @@ }, "default_coordinate_system": "", "labels": [], - "self_link": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/00000000-0000-0000-0000-00000000000b" + "self_link": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/00000000-0000-0000-0000-00000000000b" } ] } diff --git a/packages/evo-sdk-common/tests/data/list_workspaces_1.json b/packages/evo-sdk-common/tests/data/list_workspaces_1.json index 472aecae..6dec7967 100644 --- a/packages/evo-sdk-common/tests/data/list_workspaces_1.json +++ b/packages/evo-sdk-common/tests/data/list_workspaces_1.json @@ -1,9 +1,9 @@ { "links": { - "first": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=0", - "last": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=2", + "first": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=0", + "last": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=2", "next": null, - "previous": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=0", + "previous": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces?limit=2&offset=0", "count": 1, "total": 3 }, @@ -28,7 +28,7 @@ }, "default_coordinate_system": "", "labels": [], - "self_link": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/00000000-0000-0000-0000-00000000000c" + "self_link": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/00000000-0000-0000-0000-00000000000c" } ] } \ No newline at end of file diff --git a/packages/evo-sdk-common/tests/data/list_workspaces_summary.json b/packages/evo-sdk-common/tests/data/list_workspaces_summary.json index 1c577014..2a456de6 100644 --- a/packages/evo-sdk-common/tests/data/list_workspaces_summary.json +++ b/packages/evo-sdk-common/tests/data/list_workspaces_summary.json @@ -1,10 +1,10 @@ { "links": { - "first": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary", + "first": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary", "next": null, "previous": null, "count": 3, - "last": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary", + "last": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary", "total": 3 }, "results": [ diff --git a/packages/evo-sdk-common/tests/data/list_workspaces_summary_paginated_0.json b/packages/evo-sdk-common/tests/data/list_workspaces_summary_paginated_0.json index 2589be52..de3030e4 100644 --- a/packages/evo-sdk-common/tests/data/list_workspaces_summary_paginated_0.json +++ b/packages/evo-sdk-common/tests/data/list_workspaces_summary_paginated_0.json @@ -1,10 +1,10 @@ { "links": { - "first": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=0", - "next": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=2", + "first": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=0", + "next": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=2", "previous": null, "count": 2, - "last": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=2", + "last": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=2", "total": 3 }, "results": [ diff --git a/packages/evo-sdk-common/tests/data/list_workspaces_summary_paginated_1.json b/packages/evo-sdk-common/tests/data/list_workspaces_summary_paginated_1.json index 31093789..d2a6ecbf 100644 --- a/packages/evo-sdk-common/tests/data/list_workspaces_summary_paginated_1.json +++ b/packages/evo-sdk-common/tests/data/list_workspaces_summary_paginated_1.json @@ -1,10 +1,10 @@ { "links": { - "first": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=0", + "first": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=0", "next": null, - "previous": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=0", + "previous": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=0", "count": 1, - "last": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=2", + "last": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces/summary?limit=2&offset=2", "total": 3 }, "results": [ diff --git a/packages/evo-sdk-common/tests/data/new_workspace.json b/packages/evo-sdk-common/tests/data/new_workspace.json index 81b4a70f..f1bbef9c 100644 --- a/packages/evo-sdk-common/tests/data/new_workspace.json +++ b/packages/evo-sdk-common/tests/data/new_workspace.json @@ -19,5 +19,5 @@ "ml_enabled": false, "default_coordinate_system": "", "labels": [], - "self_link": "http://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces" + "self_link": "https://unittest.localhost/workspace/orgs/00000000-0000-0000-0000-000000000000/workspaces" } diff --git a/uv.lock b/uv.lock index e88879c3..4c7b78e0 100644 --- a/uv.lock +++ b/uv.lock @@ -786,10 +786,10 @@ test = [ [[package]] name = "evo-objects" -version = "0.2.3" +version = "0.3.0" source = { editable = "packages/evo-objects" } dependencies = [ - { name = "evo-sdk-common" }, + { name = "evo-sdk-common", extra = ["jmespath"] }, { name = "pydantic" }, ] @@ -801,8 +801,10 @@ notebooks = [ { name = "evo-sdk-common", extra = ["notebooks"] }, ] utils = [ + { name = "numpy" }, { name = "pandas" }, { name = "pyarrow" }, + { name = "pyarrow-stubs" }, ] [package.dev-dependencies] @@ -828,11 +830,13 @@ test = [ [package.metadata] requires-dist = [ - { name = "evo-sdk-common", editable = "packages/evo-sdk-common" }, { name = "evo-sdk-common", extras = ["aiohttp"], marker = "extra == 'aiohttp'", editable = "packages/evo-sdk-common" }, + { name = "evo-sdk-common", extras = ["jmespath"], editable = "packages/evo-sdk-common" }, { name = "evo-sdk-common", extras = ["notebooks"], marker = "extra == 'notebooks'", editable = "packages/evo-sdk-common" }, + { name = "numpy", marker = "extra == 'utils'" }, { name = "pandas", marker = "extra == 'utils'" }, { name = "pyarrow", marker = "extra == 'utils'" }, + { name = "pyarrow-stubs", marker = "extra == 'utils'" }, { name = "pydantic", specifier = ">=2,<3" }, ] provides-extras = ["aiohttp", "notebooks", "utils"] @@ -2205,44 +2209,57 @@ wheels = [ [[package]] name = "pyarrow" -version = "19.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7f/09/a9046344212690f0632b9c709f9bf18506522feb333c894d0de81d62341a/pyarrow-19.0.1.tar.gz", hash = "sha256:3bf266b485df66a400f282ac0b6d1b500b9d2ae73314a153dbe97d6d5cc8a99e", size = 1129437, upload-time = "2025-02-18T18:55:57.027Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/36/01/b23b514d86b839956238d3f8ef206fd2728eee87ff1b8ce150a5678d9721/pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69", size = 30688914, upload-time = "2025-02-18T18:51:37.575Z" }, - { url = "https://files.pythonhosted.org/packages/c6/68/218ff7cf4a0652a933e5f2ed11274f724dd43b9813cb18dd72c0a35226a2/pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec", size = 32102866, upload-time = "2025-02-18T18:51:44.358Z" }, - { url = "https://files.pythonhosted.org/packages/98/01/c295050d183014f4a2eb796d7d2bbfa04b6cccde7258bb68aacf6f18779b/pyarrow-19.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76aef7f5f7e4a757fddcdcf010a8290958f09e3470ea458c80d26f4316ae89", size = 41147682, upload-time = "2025-02-18T18:51:49.481Z" }, - { url = "https://files.pythonhosted.org/packages/40/17/a6c3db0b5f3678f33bbb552d2acbc16def67f89a72955b67b0109af23eb0/pyarrow-19.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d03c9d6f2a3dffbd62671ca070f13fc527bb1867b4ec2b98c7eeed381d4f389a", size = 42179192, upload-time = "2025-02-18T18:51:56.265Z" }, - { url = "https://files.pythonhosted.org/packages/cf/75/c7c8e599300d8cebb6cb339014800e1c720c9db2a3fcb66aa64ec84bac72/pyarrow-19.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:65cf9feebab489b19cdfcfe4aa82f62147218558d8d3f0fc1e9dea0ab8e7905a", size = 40517272, upload-time = "2025-02-18T18:52:02.969Z" }, - { url = "https://files.pythonhosted.org/packages/ef/c9/68ab123ee1528699c4d5055f645ecd1dd68ff93e4699527249d02f55afeb/pyarrow-19.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:41f9706fbe505e0abc10e84bf3a906a1338905cbbcf1177b71486b03e6ea6608", size = 42069036, upload-time = "2025-02-18T18:52:10.173Z" }, - { url = "https://files.pythonhosted.org/packages/54/e3/d5cfd7654084e6c0d9c3ce949e5d9e0ccad569ae1e2d5a68a3ec03b2be89/pyarrow-19.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6cb2335a411b713fdf1e82a752162f72d4a7b5dbc588e32aa18383318b05866", size = 25277951, upload-time = "2025-02-18T18:52:15.459Z" }, - { url = "https://files.pythonhosted.org/packages/a0/55/f1a8d838ec07fe3ca53edbe76f782df7b9aafd4417080eebf0b42aab0c52/pyarrow-19.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc55d71898ea30dc95900297d191377caba257612f384207fe9f8293b5850f90", size = 30713987, upload-time = "2025-02-18T18:52:20.463Z" }, - { url = "https://files.pythonhosted.org/packages/13/12/428861540bb54c98a140ae858a11f71d041ef9e501e6b7eb965ca7909505/pyarrow-19.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7a544ec12de66769612b2d6988c36adc96fb9767ecc8ee0a4d270b10b1c51e00", size = 32135613, upload-time = "2025-02-18T18:52:25.29Z" }, - { url = "https://files.pythonhosted.org/packages/2f/8a/23d7cc5ae2066c6c736bce1db8ea7bc9ac3ef97ac7e1c1667706c764d2d9/pyarrow-19.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0148bb4fc158bfbc3d6dfe5001d93ebeed253793fff4435167f6ce1dc4bddeae", size = 41149147, upload-time = "2025-02-18T18:52:30.975Z" }, - { url = "https://files.pythonhosted.org/packages/a2/7a/845d151bb81a892dfb368bf11db584cf8b216963ccce40a5cf50a2492a18/pyarrow-19.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f24faab6ed18f216a37870d8c5623f9c044566d75ec586ef884e13a02a9d62c5", size = 42178045, upload-time = "2025-02-18T18:52:36.859Z" }, - { url = "https://files.pythonhosted.org/packages/a7/31/e7282d79a70816132cf6cae7e378adfccce9ae10352d21c2fecf9d9756dd/pyarrow-19.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4982f8e2b7afd6dae8608d70ba5bd91699077323f812a0448d8b7abdff6cb5d3", size = 40532998, upload-time = "2025-02-18T18:52:42.578Z" }, - { url = "https://files.pythonhosted.org/packages/b8/82/20f3c290d6e705e2ee9c1fa1d5a0869365ee477e1788073d8b548da8b64c/pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:49a3aecb62c1be1d822f8bf629226d4a96418228a42f5b40835c1f10d42e4db6", size = 42084055, upload-time = "2025-02-18T18:52:48.749Z" }, - { url = "https://files.pythonhosted.org/packages/ff/77/e62aebd343238863f2c9f080ad2ef6ace25c919c6ab383436b5b81cbeef7/pyarrow-19.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:008a4009efdb4ea3d2e18f05cd31f9d43c388aad29c636112c2966605ba33466", size = 25283133, upload-time = "2025-02-18T18:52:54.549Z" }, - { url = "https://files.pythonhosted.org/packages/78/b4/94e828704b050e723f67d67c3535cf7076c7432cd4cf046e4bb3b96a9c9d/pyarrow-19.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:80b2ad2b193e7d19e81008a96e313fbd53157945c7be9ac65f44f8937a55427b", size = 30670749, upload-time = "2025-02-18T18:53:00.062Z" }, - { url = "https://files.pythonhosted.org/packages/7e/3b/4692965e04bb1df55e2c314c4296f1eb12b4f3052d4cf43d29e076aedf66/pyarrow-19.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:ee8dec072569f43835932a3b10c55973593abc00936c202707a4ad06af7cb294", size = 32128007, upload-time = "2025-02-18T18:53:06.581Z" }, - { url = "https://files.pythonhosted.org/packages/22/f7/2239af706252c6582a5635c35caa17cb4d401cd74a87821ef702e3888957/pyarrow-19.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5d1ec7ec5324b98887bdc006f4d2ce534e10e60f7ad995e7875ffa0ff9cb14", size = 41144566, upload-time = "2025-02-18T18:53:11.958Z" }, - { url = "https://files.pythonhosted.org/packages/fb/e3/c9661b2b2849cfefddd9fd65b64e093594b231b472de08ff658f76c732b2/pyarrow-19.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ad4c0eb4e2a9aeb990af6c09e6fa0b195c8c0e7b272ecc8d4d2b6574809d34", size = 42202991, upload-time = "2025-02-18T18:53:17.678Z" }, - { url = "https://files.pythonhosted.org/packages/fe/4f/a2c0ed309167ef436674782dfee4a124570ba64299c551e38d3fdaf0a17b/pyarrow-19.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d383591f3dcbe545f6cc62daaef9c7cdfe0dff0fb9e1c8121101cabe9098cfa6", size = 40507986, upload-time = "2025-02-18T18:53:26.263Z" }, - { url = "https://files.pythonhosted.org/packages/27/2e/29bb28a7102a6f71026a9d70d1d61df926887e36ec797f2e6acfd2dd3867/pyarrow-19.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b4c4156a625f1e35d6c0b2132635a237708944eb41df5fbe7d50f20d20c17832", size = 42087026, upload-time = "2025-02-18T18:53:33.063Z" }, - { url = "https://files.pythonhosted.org/packages/16/33/2a67c0f783251106aeeee516f4806161e7b481f7d744d0d643d2f30230a5/pyarrow-19.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:5bd1618ae5e5476b7654c7b55a6364ae87686d4724538c24185bbb2952679960", size = 25250108, upload-time = "2025-02-18T18:53:38.462Z" }, - { url = "https://files.pythonhosted.org/packages/2b/8d/275c58d4b00781bd36579501a259eacc5c6dfb369be4ddeb672ceb551d2d/pyarrow-19.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e45274b20e524ae5c39d7fc1ca2aa923aab494776d2d4b316b49ec7572ca324c", size = 30653552, upload-time = "2025-02-18T18:53:44.357Z" }, - { url = "https://files.pythonhosted.org/packages/a0/9e/e6aca5cc4ef0c7aec5f8db93feb0bde08dbad8c56b9014216205d271101b/pyarrow-19.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d9dedeaf19097a143ed6da37f04f4051aba353c95ef507764d344229b2b740ae", size = 32103413, upload-time = "2025-02-18T18:53:52.971Z" }, - { url = "https://files.pythonhosted.org/packages/6a/fa/a7033f66e5d4f1308c7eb0dfcd2ccd70f881724eb6fd1776657fdf65458f/pyarrow-19.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ebfb5171bb5f4a52319344ebbbecc731af3f021e49318c74f33d520d31ae0c4", size = 41134869, upload-time = "2025-02-18T18:53:59.471Z" }, - { url = "https://files.pythonhosted.org/packages/2d/92/34d2569be8e7abdc9d145c98dc410db0071ac579b92ebc30da35f500d630/pyarrow-19.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a21d39fbdb948857f67eacb5bbaaf36802de044ec36fbef7a1c8f0dd3a4ab2", size = 42192626, upload-time = "2025-02-18T18:54:06.062Z" }, - { url = "https://files.pythonhosted.org/packages/0a/1f/80c617b1084fc833804dc3309aa9d8daacd46f9ec8d736df733f15aebe2c/pyarrow-19.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:99bc1bec6d234359743b01e70d4310d0ab240c3d6b0da7e2a93663b0158616f6", size = 40496708, upload-time = "2025-02-18T18:54:12.347Z" }, - { url = "https://files.pythonhosted.org/packages/e6/90/83698fcecf939a611c8d9a78e38e7fed7792dcc4317e29e72cf8135526fb/pyarrow-19.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1b93ef2c93e77c442c979b0d596af45e4665d8b96da598db145b0fec014b9136", size = 42075728, upload-time = "2025-02-18T18:54:19.364Z" }, - { url = "https://files.pythonhosted.org/packages/40/49/2325f5c9e7a1c125c01ba0c509d400b152c972a47958768e4e35e04d13d8/pyarrow-19.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:d9d46e06846a41ba906ab25302cf0fd522f81aa2a85a71021826f34639ad31ef", size = 25242568, upload-time = "2025-02-18T18:54:25.846Z" }, - { url = "https://files.pythonhosted.org/packages/3f/72/135088d995a759d4d916ec4824cb19e066585b4909ebad4ab196177aa825/pyarrow-19.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c0fe3dbbf054a00d1f162fda94ce236a899ca01123a798c561ba307ca38af5f0", size = 30702371, upload-time = "2025-02-18T18:54:30.665Z" }, - { url = "https://files.pythonhosted.org/packages/2e/01/00beeebd33d6bac701f20816a29d2018eba463616bbc07397fdf99ac4ce3/pyarrow-19.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:96606c3ba57944d128e8a8399da4812f56c7f61de8c647e3470b417f795d0ef9", size = 32116046, upload-time = "2025-02-18T18:54:35.995Z" }, - { url = "https://files.pythonhosted.org/packages/1f/c9/23b1ea718dfe967cbd986d16cf2a31fe59d015874258baae16d7ea0ccabc/pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f04d49a6b64cf24719c080b3c2029a3a5b16417fd5fd7c4041f94233af732f3", size = 41091183, upload-time = "2025-02-18T18:54:42.662Z" }, - { url = "https://files.pythonhosted.org/packages/3a/d4/b4a3aa781a2c715520aa8ab4fe2e7fa49d33a1d4e71c8fc6ab7b5de7a3f8/pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a9137cf7e1640dce4c190551ee69d478f7121b5c6f323553b319cac936395f6", size = 42171896, upload-time = "2025-02-18T18:54:49.808Z" }, - { url = "https://files.pythonhosted.org/packages/23/1b/716d4cd5a3cbc387c6e6745d2704c4b46654ba2668260d25c402626c5ddb/pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7c1bca1897c28013db5e4c83944a2ab53231f541b9e0c3f4791206d0c0de389a", size = 40464851, upload-time = "2025-02-18T18:54:57.073Z" }, - { url = "https://files.pythonhosted.org/packages/ed/bd/54907846383dcc7ee28772d7e646f6c34276a17da740002a5cefe90f04f7/pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:58d9397b2e273ef76264b45531e9d552d8ec8a6688b7390b5be44c02a37aade8", size = 42085744, upload-time = "2025-02-18T18:55:08.562Z" }, +version = "21.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/d9/110de31880016e2afc52d8580b397dbe47615defbf09ca8cf55f56c62165/pyarrow-21.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e563271e2c5ff4d4a4cbeb2c83d5cf0d4938b891518e676025f7268c6fe5fe26", size = 31196837, upload-time = "2025-07-18T00:54:34.755Z" }, + { url = "https://files.pythonhosted.org/packages/df/5f/c1c1997613abf24fceb087e79432d24c19bc6f7259cab57c2c8e5e545fab/pyarrow-21.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fee33b0ca46f4c85443d6c450357101e47d53e6c3f008d658c27a2d020d44c79", size = 32659470, upload-time = "2025-07-18T00:54:38.329Z" }, + { url = "https://files.pythonhosted.org/packages/3e/ed/b1589a777816ee33ba123ba1e4f8f02243a844fed0deec97bde9fb21a5cf/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7be45519b830f7c24b21d630a31d48bcebfd5d4d7f9d3bdb49da9cdf6d764edb", size = 41055619, upload-time = "2025-07-18T00:54:42.172Z" }, + { url = "https://files.pythonhosted.org/packages/44/28/b6672962639e85dc0ac36f71ab3a8f5f38e01b51343d7aa372a6b56fa3f3/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:26bfd95f6bff443ceae63c65dc7e048670b7e98bc892210acba7e4995d3d4b51", size = 42733488, upload-time = "2025-07-18T00:54:47.132Z" }, + { url = "https://files.pythonhosted.org/packages/f8/cc/de02c3614874b9089c94eac093f90ca5dfa6d5afe45de3ba847fd950fdf1/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd04ec08f7f8bd113c55868bd3fc442a9db67c27af098c5f814a3091e71cc61a", size = 43329159, upload-time = "2025-07-18T00:54:51.686Z" }, + { url = "https://files.pythonhosted.org/packages/a6/3e/99473332ac40278f196e105ce30b79ab8affab12f6194802f2593d6b0be2/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9b0b14b49ac10654332a805aedfc0147fb3469cbf8ea951b3d040dab12372594", size = 45050567, upload-time = "2025-07-18T00:54:56.679Z" }, + { url = "https://files.pythonhosted.org/packages/7b/f5/c372ef60593d713e8bfbb7e0c743501605f0ad00719146dc075faf11172b/pyarrow-21.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9d9f8bcb4c3be7738add259738abdeddc363de1b80e3310e04067aa1ca596634", size = 26217959, upload-time = "2025-07-18T00:55:00.482Z" }, + { url = "https://files.pythonhosted.org/packages/94/dc/80564a3071a57c20b7c32575e4a0120e8a330ef487c319b122942d665960/pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b", size = 31243234, upload-time = "2025-07-18T00:55:03.812Z" }, + { url = "https://files.pythonhosted.org/packages/ea/cc/3b51cb2db26fe535d14f74cab4c79b191ed9a8cd4cbba45e2379b5ca2746/pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10", size = 32714370, upload-time = "2025-07-18T00:55:07.495Z" }, + { url = "https://files.pythonhosted.org/packages/24/11/a4431f36d5ad7d83b87146f515c063e4d07ef0b7240876ddb885e6b44f2e/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e", size = 41135424, upload-time = "2025-07-18T00:55:11.461Z" }, + { url = "https://files.pythonhosted.org/packages/74/dc/035d54638fc5d2971cbf1e987ccd45f1091c83bcf747281cf6cc25e72c88/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569", size = 42823810, upload-time = "2025-07-18T00:55:16.301Z" }, + { url = "https://files.pythonhosted.org/packages/2e/3b/89fced102448a9e3e0d4dded1f37fa3ce4700f02cdb8665457fcc8015f5b/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e", size = 43391538, upload-time = "2025-07-18T00:55:23.82Z" }, + { url = "https://files.pythonhosted.org/packages/fb/bb/ea7f1bd08978d39debd3b23611c293f64a642557e8141c80635d501e6d53/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c", size = 45120056, upload-time = "2025-07-18T00:55:28.231Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0b/77ea0600009842b30ceebc3337639a7380cd946061b620ac1a2f3cb541e2/pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6", size = 26220568, upload-time = "2025-07-18T00:55:32.122Z" }, + { url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" }, + { url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" }, + { url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" }, + { url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" }, + { url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" }, + { url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" }, + { url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" }, + { url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" }, + { url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" }, + { url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" }, + { url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" }, + { url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" }, + { url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" }, + { url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" }, + { url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" }, + { url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" }, + { url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" }, + { url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" }, + { url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" }, + { url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" }, +] + +[[package]] +name = "pyarrow-stubs" +version = "20.0.0.20250928" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyarrow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/18/5f/9520b0a5cd42b95a945b8ca3bc47f723fc7ec906b7a7de76f2d075d69911/pyarrow_stubs-20.0.0.20250928.tar.gz", hash = "sha256:e802b18e8e5fdf0a78afa05fae78f1456d861fcb1f95ec0234be5d6a5ecdcde2", size = 236588, upload-time = "2025-09-28T02:50:04.839Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/83/13/75c86a8ef61ea2c758c924318cf894dced2436b0f7aeb3c5f0fe9e4305b4/pyarrow_stubs-20.0.0.20250928-py3-none-any.whl", hash = "sha256:5389057a55db3c2662c05f22685a52e15e5effaf4345f41f12fb9b6b348647b9", size = 235745, upload-time = "2025-09-28T02:50:03.205Z" }, ] [[package]]