Geepers provides a unified interface to access GPS time series data from multiple sources and compare the time series to InSAR line-of-sight (LOS) measurements.
The current available GPS data sources are:
UnrSource: Nevada Geodetic Laboratory (UNR) GPS dataUnrGridSource: Nevada Geodetic Laboratory (UNR) Gridded DataSideshowSource: Jet Propulsion Laboratory (JPL) Sideshow GPS data
All sources implement a common interface through the BaseGpsSource class, making it easy to switch between data providers or combine data from multiple sources.
from geepers.gps_sources import UnrSource
# Create a GPS data source
unr = UnrSource()
# Get station information within a bounding box
stations = unr.stations(bbox=(-120, 35, -115, 40)) # (west, south, east, north)
print(f"Found {len(stations)} stations")
print(stations[['name', 'lat', 'lon']].head())
# Load time series data for a specific station
timeseries = unr.timeseries('P123', frame='ENU', zero_by='mean')
print(timeseries.head())
# Load data for many stations into one GeoDataFrame
df_many = unr.timeseries_many(bbox=(-115, 32, -105, 37))
print(df_many.head())The basic InSAR/GNSS comparison workflow is offered by the geepers command line tool.
geepers --los F33039_los_enu.tif --timeseries-files displacement_20160711_*tif --temporal-coherence-files temporal_coherence_*.tif --similarity-files phase_similarity*.tifThe results are saved in the current directory in the GPS folder by default.
(TODO: Example data prep for this)
from geepers.gps_sources import UnrSource, SideshowSource
# Compare data from different sources
unr = UnrSource()
jpl = SideshowSource()
# Get the same station from both sources
station_id = 'P123'
unr_data = unr.timeseries(station_id, frame='ENU')
jpl_data = jpl.timeseries(station_id, frame='ENU')
print(f"UNR: {len(unr_data)} observations")
print(f"JPL: {len(jpl_data)} observations")The UNR source provides access to high-quality GPS time series data from the Nevada Geodetic Laboratory.
from geepers.gps_sources import UnrSource
unr = UnrSource()
# Get all available stations
all_stations = unr.stations()
# Load time series with date filtering
data = unr.timeseries(
'P123',
frame='ENU', # 'ENU' or 'XYZ'
start_date='2020-01-01', # ISO format date
end_date='2023-12-31',
zero_by='mean', # 'mean' or 'start'
download_if_missing=True # Download if not cached
)Data characteristics:
- High spatial coverage
- Both ENU and XYZ coordinate frames
The UNR Grid source provides interpolated GPS velocities on a regular grid, useful for regional studies.
from geepers.gps_sources import UnrGridSource
grid = UnrGridSource()
# Get grid points within a region
grid_points = grid.stations(bbox=(-120, 35, -115, 40))
# Load grid time series (note: different parameters)
data = grid.timeseries(
'000123', # 6-digit grid point ID
frame='ENU', # Only ENU supported
plate='IGS14', # 'NA', 'PA', or 'IGS14'
zero_by='mean'
)Data characteristics:
- Reprocessed data onto a regular hexagonal grid (~10-20 km)
- ENU coordinates only
- Different plate reference frames
- Regional coverage
JPL's Sideshow provides high quality processed GPS time series using the GipsyX software.
from geepers.gps_sources import SideshowSource
jpl = SideshowSource()
# Get all available stations
stations = jpl.stations()
# Load time series for a specific station
data = jpl.timeseries(
'AB01',
frame='ENU',
start_date='2010-01-01',
end_date='2020-12-31'
)Data characteristics:
- Reprocessed for consistency
- High precision
from geepers.gps_sources import UnrSource
import matplotlib.pyplot as plt
# Define study region (e.g., Southern California)
bbox = (-121, 34, -117, 36) # (west, south, east, north)
# Get GPS stations in the region
unr = UnrSource()
stations = unr.stations(bbox=bbox)
print(f"Found {len(stations)} stations in California")
# Load data for all stations
df_all = unr.timeseries_many(bbox=bbox)
print(df_all.head())
# id date east north up sigma_east sigma_north sigma_up corr_en corr_eu corr_nu lon lat alt geometry
# 0 CHIR 2010-08-20 0.054361 0.027192 -0.007630 0.000737 0.000847 0.003072 0.058084 -0.134020 -0.073392 -109.366439 32.005681 1614.17452 POINT (-109.36644 32.00568)
# 1 CHIR 2010-08-21 0.056075 0.028543 -0.022277 0.000742 0.000844 0.003166 0.075097 -0.176761 -0.122798 -109.366439 32.005681 1614.17452 POINT (-109.36644 32.00568)
# ...
# Plot time series from 2 example stations
df = df_all[df_all.id.isin (["BAK1", "P809"])]
df_wide = df.pivot(index="date", columns=["id"], values=["east", "north", "up"])
fig, axes = plt.subplots(3, 1, figsize=(12, 8), sharex=True)
for i, component in enumerate(['east', 'north', 'up']):
df_wide[component].plot(ax=axes[i])
axes[i].set_ylabel(f'{component.title()} (mm)')
plt.tight_layout()
plt.show()import geopandas as gpd
import matplotlib.pyplot as plt
from geepers.gps_sources import UnrSource
# Get stations and plot on map
unr = UnrSource()
stations = unr.stations(bbox=(-121, 34, -117, 36))
# Create a simple map
fig, ax = plt.subplots(figsize=(10, 8))
stations.plot(ax=ax, markersize=50, alpha=0.6)
# Add basemap context
import contextily as ctx
ctx.add_basemap(ax, crs=stations.crs, source=ctx.providers.OpenStreetMap.Mapnik)
ax.set_title('GPS Stations in Southern California')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
plt.show()# Assess data quality for a station
station_name = 'P123'
data = unr.timeseries(station_name)
# Basic statistics
print(f"Station: {station_name}")
print(f"Date range: {data['date'].min()} to {data['date'].max()}")
print(f"Number of observations: {len(data)}")
# Check uncertainties
print("\nUncertainty statistics (mm):")
for component in ['sigma_east', 'sigma_north', 'sigma_up']:
sigma = data[component].describe()
print(f"{component}: mean={sigma['mean']:.2f}, std={sigma['std']:.2f}")
# Plot uncertainties over time
fig, ax = plt.subplots(figsize=(12, 6))
for component, sigma_col in [('East', 'sigma_east'), ('North', 'sigma_north'), ('Up', 'sigma_up')]:
ax.plot(data['date'], 1000 *data[sigma_col], label=f'{component} σ', alpha=0.7)
ax.set_ylabel('Uncertainty (mm)')
ax.set_xlabel('Date')
ax.legend()
ax.set_title(f'GPS Uncertainty Time Series - {station_name}')
plt.show()from geepers.gps_sources import UnrSource, SideshowSource
# Compare UNR and JPL data for the same station
station_id = 'P123'
unr = UnrSource()
jpl = SideshowSource()
# Load data from both sources
unr_data = unr.timeseries(station_id, start_date='2015-01-01', end_date='2020-12-31')
jpl_data = jpl.timeseries(station_id, start_date='2015-01-01', end_date='2020-12-31')
# Merge on date for comparison
import pandas as pd
comparison = pd.merge(unr_data, jpl_data, on='date', suffixes=('_unr', '_jpl'))
# Plot comparison
fig, axes = plt.subplots(3, 1, figsize=(12, 10), sharex=True)
components = ['east', 'north', 'up']
for i, comp in enumerate(components):
axes[i].plot(comparison['date'], comparison[f'{comp}_unr'],
label='UNR', alpha=0.7)
axes[i].plot(comparison['date'], comparison[f'{comp}_jpl'],
label='JPL', alpha=0.7)
axes[i].set_ylabel(f'{comp.title()} (mm)')
axes[i].legend()
# Calculate RMS difference
diff = comparison[f'{comp}_unr'] - comparison[f'{comp}_jpl']
rms = (diff**2).mean()**0.5
axes[i].set_title(f'{comp.title()} - RMS difference: {rms:.2f} mm')
plt.xlabel('Date')
plt.suptitle(f'GPS Data Comparison: UNR vs JPL - Station {station_id}')
plt.tight_layout()
plt.show()from geepers.gps_sources import UnrSource
# Use custom cache directory
unr = UnrSource(cache_dir='/path/to/my/cache')from shapely.geometry import Polygon
import geopandas as gpd
# Define a custom polygon (e.g., California outline)
california_poly = Polygon([
(-125, 32), (-114, 32), (-114, 42), (-125, 42), (-125, 32)
])
california_mask = gpd.GeoSeries([california_poly], crs='EPSG:4326')
# Get stations within the polygon
stations = unr.stations(mask=california_mask)import concurrent.futures
from tqdm import tqdm
def load_station_data(station_name):
"""Load data for a single station."""
try:
return station_name, unr.timeseries(station_name)
except Exception as e:
return station_name, None
# Get list of stations
stations = unr.stations(bbox=(-120, 35, -115, 40))
station_names = stations['name'].tolist()
# Load data in parallel
results = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(load_station_data, name) for name in station_names]
for future in tqdm(concurrent.futures.as_completed(futures),
total=len(futures), desc="Loading GPS data"):
station_name, data = future.result()
if data is not None:
results[station_name] = data
print(f"Successfully loaded data for {len(results)} stations")See geepers.schemas for more information about the Pandera data schemas.
