Skip to content

Commit f02fd70

Browse files
Add ASF Sentinel-1 bursts caching
1 parent 8245005 commit f02fd70

1 file changed

Lines changed: 71 additions & 12 deletions

File tree

  • insardev_toolkit/insardev_toolkit

insardev_toolkit/insardev_toolkit/ASF.py

Lines changed: 71 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -206,22 +206,49 @@ class _asf_search_module:
206206
asf_search = _asf_search_module()
207207
# ============================================================================
208208

209+
# Cloudflare Worker cache proxy for S1 bursts (handles auth internally)
210+
_S1_CACHE_PROXY = 'https://s1-cache-asf.insar.dev'
211+
_ASF_BURST_HOST = 'https://sentinel1-burst.asf.alaska.edu'
212+
213+
209214
class ASF(progressbar_joblib):
210215
import pandas as pd
211216
from datetime import timedelta
212217

213218
def __init__(self, username=None, password=None):
214-
import getpass
215-
if username is None:
216-
username = getpass.getpass('Please enter your ASF username and press Enter key:')
217-
if password is None:
218-
password = getpass.getpass('Please enter your ASF password and press Enter key:')
219+
"""Initialize ASF downloader.
220+
221+
Parameters
222+
----------
223+
username : str, optional
224+
Earthdata Login username. If not provided, uses cache proxy.
225+
password : str, optional
226+
Earthdata Login password. If not provided, uses cache proxy.
227+
228+
Notes
229+
-----
230+
When no credentials provided, downloads use Cloudflare cache proxy
231+
at s1-cache-asf.insar.dev which handles authentication internally.
232+
"""
219233
self.username = username
220234
self.password = password
221235

222236
def _get_asf_session(self):
237+
"""Get authenticated session for ASF downloads.
238+
239+
Returns plain requests.Session if no credentials (uses cache proxy).
240+
"""
241+
if self.username is None:
242+
# Cache proxy handles auth - just need a plain session
243+
return requests.Session()
223244
return asf_search.ASFSession().auth_with_creds(self.username, self.password)
224245

246+
def _get_burst_url(self, original_url):
247+
"""Convert ASF burst URL to cache proxy URL if no credentials."""
248+
if self.username is None and original_url.startswith(_ASF_BURST_HOST):
249+
return original_url.replace(_ASF_BURST_HOST, _S1_CACHE_PROXY)
250+
return original_url
251+
225252
@staticmethod
226253
def _detect_mission(granule_name):
227254
"""Detect satellite mission from granule/burst name.
@@ -360,7 +387,7 @@ def _nisar_exists(basedir, granule_id, polarization):
360387
return os.path.exists(out_path)
361388

362389
# https://asf.alaska.edu/datasets/data-sets/derived-data-sets/sentinel-1-bursts/
363-
def download(self, basedir, bursts, polarization=None, frequency=None, session=None, n_jobs=8, joblib_backend='loky', skip_exist=True,
390+
def download(self, basedir, bursts, polarization=None, frequency=None, session=None, n_jobs=4, joblib_backend='loky', skip_exist=True,
364391
retries=30, timeout_second=3, debug=False):
365392
"""
366393
Download SAR data from ASF.
@@ -556,6 +583,10 @@ def filter_azimuth_time(items, start_utc_dt, stop_utc_dt, delta=3):
556583
if len(bursts_missed) == 0:
557584
return None
558585

586+
# URL transformer for cache proxy
587+
def get_burst_url(url):
588+
return self._get_burst_url(url)
589+
559590
def download_burst(result, basedir, session):
560591
properties = result.geojson()['properties']
561592
#print ('result properties', properties)
@@ -606,10 +637,23 @@ def download_burst(result, basedir, session):
606637
return
607638

608639
# download manifest to memory to get dimensions for TIFF validation
609-
manifest_url = properties['additionalUrls'][0]
610-
response = session.get(manifest_url)
640+
manifest_url = get_burst_url(properties['additionalUrls'][0])
641+
response = session.get(manifest_url, stream=True)
611642
response.raise_for_status()
612-
xml_content = response.text
643+
cache_status = response.headers.get('x-cache', 'N/A')
644+
cache_enc = response.headers.get('content-encoding', 'none')
645+
# Read and decompress for debug logging
646+
if debug and cache_enc in ('br', 'gzip', 'deflate'):
647+
import brotli
648+
compressed_bytes = response.raw.read()
649+
transfer_mb = len(compressed_bytes) / 1024 / 1024
650+
xml_content = brotli.decompress(compressed_bytes).decode('utf-8')
651+
print(f' XML {cache_status:4} {cache_enc:4} {transfer_mb:5.1f}MB {burst}')
652+
else:
653+
xml_content = response.text
654+
if debug:
655+
size_mb = len(xml_content.encode()) / 1024 / 1024
656+
print(f' XML {cache_status:4} {cache_enc:4} {size_mb:5.1f}MB {burst}')
613657
if len(xml_content) == 0:
614658
raise Exception(f'ERROR: Downloaded manifest is empty: {manifest_url}')
615659
# check if server returned JSON error instead of XML
@@ -662,10 +706,25 @@ def download_burst(result, basedir, session):
662706
from rasterio.io import MemoryFile
663707

664708
# Download TIFF to memory
665-
tiff_url = properties['url']
666-
response = session.get(tiff_url)
709+
tiff_url = get_burst_url(properties['url'])
710+
response = session.get(tiff_url, stream=True)
667711
response.raise_for_status()
668-
tiff_bytes = response.content
712+
cache_status = response.headers.get('x-cache', 'N/A')
713+
cache_enc = response.headers.get('content-encoding', 'none')
714+
# Read raw compressed bytes to measure transfer size
715+
if debug and cache_enc in ('br', 'gzip', 'deflate'):
716+
# Read compressed bytes directly
717+
compressed_bytes = response.raw.read()
718+
transfer_mb = len(compressed_bytes) / 1024 / 1024
719+
# Decompress manually
720+
import brotli
721+
tiff_bytes = brotli.decompress(compressed_bytes)
722+
print(f' TIFF {cache_status:4} {cache_enc:4} {transfer_mb:5.1f}MB {burst}')
723+
else:
724+
tiff_bytes = response.content
725+
if debug:
726+
size_mb = len(tiff_bytes) / 1024 / 1024
727+
print(f' TIFF {cache_status:4} {cache_enc:4} {size_mb:5.1f}MB {burst}')
669728
if len(tiff_bytes) == 0:
670729
raise Exception(f'ERROR: Downloaded TIFF is empty: {tiff_url}')
671730

0 commit comments

Comments
 (0)