Skip to content

Commit b0a4de5

Browse files
Enhance S1 Burst downloaders
1 parent a929127 commit b0a4de5

2 files changed

Lines changed: 31 additions & 33 deletions

File tree

insardev_toolkit/insardev_toolkit/ASF.py

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -746,21 +746,13 @@ def download_burst(result, basedir, session):
746746
else:
747747
# Download and validate TIFF entirely in memory before writing to disk
748748
import io
749-
import rasterio
750-
from rasterio.io import MemoryFile
751749

752-
# Download TIFF to memory with total time limit
750+
# Download TIFF fully into memory (no streaming — requests will raise
751+
# on incomplete/truncated responses instead of silently returning partial data)
753752
tiff_url = get_burst_url(properties['url'])
754-
response = session.get(tiff_url, stream=True, timeout=(10, 60))
753+
response = session.get(tiff_url, timeout=(10, 300))
755754
response.raise_for_status()
756-
chunks = []
757-
download_start = time.time()
758-
for chunk in response.iter_content(chunk_size=1024*1024):
759-
chunks.append(chunk)
760-
if time.time() - download_start > 120:
761-
response.close()
762-
raise Exception(f'Download exceeded 120s for {burst}')
763-
tiff_bytes = b''.join(chunks)
755+
tiff_bytes = response.content
764756
if debug:
765757
cache_status = response.headers.get('x-cache', 'N/A')
766758
size_mb = len(tiff_bytes) / 1024 / 1024
@@ -780,25 +772,23 @@ def download_burst(result, basedir, session):
780772
except json.JSONDecodeError:
781773
raise Exception(f'ERROR: ASF server returned invalid response for {burst}: {tiff_bytes[:100]!r}')
782774

783-
# Validate TIFF structure and dimensions in memory using TiffFile
775+
# Validate TIFF structure, dimensions, and completeness using TiffFile
784776
with TiffFile(io.BytesIO(tiff_bytes)) as tif:
785777
page = tif.pages[0]
786778
actual_lines, actual_samples = page.shape
787779
if actual_lines != lines_per_burst or actual_samples != samples_per_burst:
788780
raise Exception(f'ERROR: Downloaded TIFF dimensions mismatch for {burst}: '
789781
f'got {actual_lines}x{actual_samples}, expected {lines_per_burst}x{samples_per_burst}. '
790782
f'ASF burst extraction may have failed.')
783+
# Verify all strip data fits within the downloaded bytes
784+
for offset, bytecount in zip(page.dataoffsets, page.databytecounts):
785+
if offset + bytecount > len(tiff_bytes):
786+
raise Exception(f'ERROR: Downloaded TIFF truncated for {burst}: '
787+
f'strip at offset {offset} needs {bytecount} bytes '
788+
f'but file is only {len(tiff_bytes)} bytes.')
791789
# Also get offset for XML creation
792790
tiff_offset = page.dataoffsets[0]
793791

794-
# Validate TIFF can be read by rasterio/GDAL (detects corruption)
795-
with MemoryFile(tiff_bytes) as memfile:
796-
with memfile.open() as ds:
797-
if ds.width != samples_per_burst or ds.height != lines_per_burst:
798-
raise Exception(f'ERROR: Rasterio dimensions mismatch for {burst}')
799-
# Read a small portion to verify data is accessible
800-
_ = ds.read(1, window=rasterio.windows.Window(0, 0, min(100, ds.width), min(100, ds.height)))
801-
802792
# TIFF validated - now build XML content in memory before writing anything
803793

804794
# Build XML content in memory (or skip if files exist)
@@ -949,16 +939,19 @@ def download_burst(result, basedir, session):
949939

950940
xml_contents[xml_calib_file] = xmltodict.unparse({'calibration': calibration}, pretty=True, indent=' ')
951941

952-
# All validations passed - now write everything to disk atomically
953-
# Write TIFF if we downloaded it (tiff_bytes exists in local scope)
942+
# All validations passed - write to temp files then atomic rename.
943+
# This guarantees no partial files on disk if interrupted mid-write.
954944
if 'tiff_bytes' in dir():
955-
with open(tif_file, 'wb') as f:
945+
tmp = tif_file + '.tmp'
946+
with open(tmp, 'wb') as f:
956947
f.write(tiff_bytes)
948+
os.rename(tmp, tif_file)
957949

958-
# Write all XML files
959950
for filepath, content in xml_contents.items():
960-
with open(filepath, 'w') as f:
951+
tmp = filepath + '.tmp'
952+
with open(tmp, 'w') as f:
961953
f.write(content)
954+
os.rename(tmp, filepath)
962955

963956
with tqdm(desc=f'Downloading ASF Catalog'.ljust(25), total=1) as pbar:
964957
results = asf_search.granule_search(bursts_missed)

insardev_toolkit/insardev_toolkit/CDSE.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -695,11 +695,11 @@ def filter_azimuth_time(items, start_utc_dt, stop_utc_dt, delta=3):
695695
redirect_url = response.headers.get('Location')
696696
if redirect_url:
697697
# Follow redirect with auth header, allow further redirects
698-
response = requests.post(redirect_url, headers=headers, stream=True,
699-
timeout=120, allow_redirects=True)
698+
response = requests.post(redirect_url, headers=headers,
699+
timeout=(10, 300), allow_redirects=True)
700700
else:
701701
# Cache proxy - simple GET
702-
response = session.get(url, stream=True, timeout=120)
702+
response = session.get(url, timeout=(10, 300))
703703

704704
if response.status_code != 200:
705705
try:
@@ -712,7 +712,7 @@ def filter_azimuth_time(items, start_utc_dt, stop_utc_dt, delta=3):
712712
cache_status = response.headers.get('cf-cache-status', response.headers.get('x-cache', 'N/A'))
713713
cache_enc = response.headers.get('content-encoding', 'none')
714714

715-
zip_bytes = b''.join(response.iter_content(chunk_size=1024*1024))
715+
zip_bytes = response.content
716716
if len(zip_bytes) == 0:
717717
raise Exception(f'ERROR: Downloaded ZIP is empty for {burst}')
718718

@@ -978,13 +978,18 @@ def filter_azimuth_time(items, start_utc_dt, stop_utc_dt, delta=3):
978978

979979
xml_contents[xml_calib_file] = xmltodict.unparse({'calibration': calibration}, pretty=True, indent=' ')
980980

981-
# All validations passed - write everything to disk atomically
982-
with open(tif_file, 'wb') as f:
981+
# All validations passed - write to temp files then atomic rename.
982+
# This guarantees no partial files on disk if interrupted mid-write.
983+
tmp = tif_file + '.tmp'
984+
with open(tmp, 'wb') as f:
983985
f.write(tiff_bytes)
986+
os.rename(tmp, tif_file)
984987

985988
for filepath, content in xml_contents.items():
986-
with open(filepath, 'w') as f:
989+
tmp = filepath + '.tmp'
990+
with open(tmp, 'w') as f:
987991
f.write(content)
992+
os.rename(tmp, filepath)
988993

989994
return cache_status # Return cache status (HIT/MISS/etc)
990995

0 commit comments

Comments
 (0)