-
Notifications
You must be signed in to change notification settings - Fork 12
Open
Description
Issue Description : “Read failed: The server failed to authenticate the request for ABFS.”
Catalog Name : Infrastructure
Dataset Name : Microsoft Building Footprints
URL: Microsoft Building Footprints | Planetary Computer
Impacted File Names
- Indonesia_2022-07-06
- Asia_2022-07-06
Sample code:
def process_parquet_via_fsspec(asset, bbox):
import geopandas as gpd
import pandas as pd
import tempfile
from shapely.geometry import box
from adlfs import AzureBlobFileSystem
try:
# Get Azure Blob storage options from the asset
storage_opts = asset.extra_fields["table:storage_options"]
# Create Azure filesystem
fs = AzureBlobFileSystem(**storage_opts)
# List all parquet part files under the href
parts = fs.ls(asset.href)
# Read and combine them into a single GeoDataFrame
dfs = [
gpd.read_parquet(f"az://{p}", storage_options=storage_opts) for p in parts
]
df = pd.concat(dfs)
# Spatial filter by bounding box
minx, miny, maxx, maxy = bbox
geom = box(minx, miny, maxx, maxy)
subset = df[df.intersects(geom)]
# Save filtered data to a temporary parquet file
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".parquet")
subset.to_parquet(tmp_file.name)
return tmp_file.name, None
except Exception as e:
return None, str(e)
records = []
seen = set()
for item in items:
signed_item = planetary_computer.sign(item)
item_id = signed_item.id
dt = (
signed_item.datetime
or signed_item.properties.get("start_datetime")
or "Not Available"
)
dt_str = dt.isoformat() if hasattr(dt, "isoformat") else str(dt)
date_part = dt_str[:10]
for key, asset in signed_item.assets.items():
# Only filter by .zarr
if "parquet" not in asset.href.lower():
print(f" Skipping non-Zarr: {key}")
continue
file_name = f"{item_id}_{key}.nc"
rel_path = f"ms-buildings/{date_part}/{file_name}"
if rel_path in seen:
continue
seen.add(rel_path)
print(f" Attempting: {key} from {item_id}")
asset = planetary_computer.sign(asset)
tmp_path, error = process_parquet_via_fsspec(asset, bbox)
if tmp_path:
success, result = write_to_lakehouse(tmp_path, rel_path)
if success:
status = "success"
file_path = f"/lakehouse/default/Files/{rel_path}"
err_msg = None
print(f" Saved: {file_name}")
else:
status = "failed"
file_path = None
err_msg = f"Write failed: {result}"
print(f" Write failed: {result}")
else:
status = "failed"
file_path = None
err_msg = f"Read failed: {error}"
print(f" Read failed: {error}")
records.append(
{
"item_id": item_id,
"datetime": dt_str,
"asset_key": key,
"asset_href": asset.href,
"file_type": ".nc",
"file_path": file_path,
"status": status,
"error_message": err_msg,
}
)Metadata
Metadata
Assignees
Labels
No labels