Skip to content

opera-adt/geepers

Repository files navigation

geepers

Actions Status PyPI version Documentation Status

Conda-Forge GitHub Discussion

Overview

Geepers provides a unified interface to access GPS time series data from multiple sources and compare the time series to InSAR line-of-sight (LOS) measurements.

The current available GPS data sources are:

All sources implement a common interface through the BaseGpsSource class, making it easy to switch between data providers or combine data from multiple sources.

Quick Start

Basic Usage

from geepers.gps_sources import UnrSource

# Create a GPS data source
unr = UnrSource()

# Get station information within a bounding box
stations = unr.stations(bbox=(-120, 35, -115, 40))  # (west, south, east, north)
print(f"Found {len(stations)} stations")
print(stations[['name', 'lat', 'lon']].head())

# Load time series data for a specific station
timeseries = unr.timeseries('P123', frame='ENU', zero_by='mean')
print(timeseries.head())

# Load data for many stations into one GeoDataFrame

df_many = unr.timeseries_many(bbox=(-115, 32, -105, 37))
print(df_many.head())

Example: Comparing GPS and InSAR Data

The basic InSAR/GNSS comparison workflow is offered by the geepers command line tool.

geepers --los F33039_los_enu.tif --timeseries-files displacement_20160711_*tif --temporal-coherence-files temporal_coherence_*.tif --similarity-files phase_similarity*.tif

The results are saved in the current directory in the GPS folder by default.

(TODO: Example data prep for this)

Working with Multiple Sources

from geepers.gps_sources import UnrSource, SideshowSource

# Compare data from different sources
unr = UnrSource()
jpl = SideshowSource()

# Get the same station from both sources
station_id = 'P123'
unr_data = unr.timeseries(station_id, frame='ENU')
jpl_data = jpl.timeseries(station_id, frame='ENU')

print(f"UNR: {len(unr_data)} observations")
print(f"JPL: {len(jpl_data)} observations")

Data Sources

1. UnrSource - Nevada Geodetic Laboratory (UNR)

The UNR source provides access to high-quality GPS time series data from the Nevada Geodetic Laboratory.

from geepers.gps_sources import UnrSource

unr = UnrSource()

# Get all available stations
all_stations = unr.stations()

# Load time series with date filtering
data = unr.timeseries(
    'P123',
    frame='ENU',                    # 'ENU' or 'XYZ'
    start_date='2020-01-01',       # ISO format date
    end_date='2023-12-31',
    zero_by='mean',                # 'mean' or 'start'
    download_if_missing=True       # Download if not cached
)

Data characteristics:

  • High spatial coverage
  • Both ENU and XYZ coordinate frames

2. UnrGridSource - Nevada Geodetic Laboratory (UNR) Gridded Data

The UNR Grid source provides interpolated GPS velocities on a regular grid, useful for regional studies.

from geepers.gps_sources import UnrGridSource

grid = UnrGridSource()

# Get grid points within a region
grid_points = grid.stations(bbox=(-120, 35, -115, 40))

# Load grid time series (note: different parameters)
data = grid.timeseries(
    '000123',                      # 6-digit grid point ID
    frame='ENU',                   # Only ENU supported
    plate='IGS14',                 # 'NA', 'PA', or 'IGS14'
    zero_by='mean'
)

Data characteristics:

  • Reprocessed data onto a regular hexagonal grid (~10-20 km)
  • ENU coordinates only
  • Different plate reference frames
  • Regional coverage

3. SideshowSource - Jet Propulsion Laboratory (JPL) Sideshow

JPL's Sideshow provides high quality processed GPS time series using the GipsyX software.

from geepers.gps_sources import SideshowSource

jpl = SideshowSource()

# Get all available stations
stations = jpl.stations()

# Load time series for a specific station
data = jpl.timeseries(
    'AB01',
    frame='ENU',
    start_date='2010-01-01',
    end_date='2020-12-31'
)

Data characteristics:

  • Reprocessed for consistency
  • High precision

Common Workflows

1. Regional GPS Analysis

from geepers.gps_sources import UnrSource
import matplotlib.pyplot as plt

# Define study region (e.g., Southern California)
bbox = (-121, 34, -117, 36)  # (west, south, east, north)
# Get GPS stations in the region
unr = UnrSource()
stations = unr.stations(bbox=bbox)
print(f"Found {len(stations)} stations in California")

# Load data for all stations
df_all = unr.timeseries_many(bbox=bbox)
print(df_all.head())
#      id       date      east     north        up  sigma_east  sigma_north  sigma_up   corr_en   corr_eu   corr_nu         lon        lat         alt                     geometry
# 0  CHIR 2010-08-20  0.054361  0.027192 -0.007630    0.000737     0.000847  0.003072  0.058084 -0.134020 -0.073392 -109.366439  32.005681  1614.17452  POINT (-109.36644 32.00568)
# 1  CHIR 2010-08-21  0.056075  0.028543 -0.022277    0.000742     0.000844  0.003166  0.075097 -0.176761 -0.122798 -109.366439  32.005681  1614.17452  POINT (-109.36644 32.00568)
# ...


# Plot time series from 2 example stations
df = df_all[df_all.id.isin (["BAK1", "P809"])]
df_wide = df.pivot(index="date", columns=["id"], values=["east", "north", "up"])

fig, axes = plt.subplots(3, 1, figsize=(12, 8), sharex=True)
for i, component in enumerate(['east', 'north', 'up']):
    df_wide[component].plot(ax=axes[i])
    axes[i].set_ylabel(f'{component.title()} (mm)')

plt.tight_layout()
plt.show()

2. Spatial GPS Visualization

import geopandas as gpd
import matplotlib.pyplot as plt
from geepers.gps_sources import UnrSource

# Get stations and plot on map
unr = UnrSource()
stations = unr.stations(bbox=(-121, 34, -117, 36))

# Create a simple map
fig, ax = plt.subplots(figsize=(10, 8))
stations.plot(ax=ax, markersize=50, alpha=0.6)

# Add basemap context
import contextily as ctx
ctx.add_basemap(ax, crs=stations.crs, source=ctx.providers.OpenStreetMap.Mapnik)

ax.set_title('GPS Stations in Southern California')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
plt.show()

3. Data Quality Assessment

# Assess data quality for a station
station_name = 'P123'
data = unr.timeseries(station_name)

# Basic statistics
print(f"Station: {station_name}")
print(f"Date range: {data['date'].min()} to {data['date'].max()}")
print(f"Number of observations: {len(data)}")

# Check uncertainties
print("\nUncertainty statistics (mm):")
for component in ['sigma_east', 'sigma_north', 'sigma_up']:
    sigma = data[component].describe()
    print(f"{component}: mean={sigma['mean']:.2f}, std={sigma['std']:.2f}")

# Plot uncertainties over time
fig, ax = plt.subplots(figsize=(12, 6))
for component, sigma_col in [('East', 'sigma_east'), ('North', 'sigma_north'), ('Up', 'sigma_up')]:
    ax.plot(data['date'], 1000 *data[sigma_col], label=f'{component} σ', alpha=0.7)

ax.set_ylabel('Uncertainty (mm)')
ax.set_xlabel('Date')
ax.legend()
ax.set_title(f'GPS Uncertainty Time Series - {station_name}')
plt.show()

4. Multi-Source Comparison

from geepers.gps_sources import UnrSource, SideshowSource

# Compare UNR and JPL data for the same station
station_id = 'P123'
unr = UnrSource()
jpl = SideshowSource()

# Load data from both sources
unr_data = unr.timeseries(station_id, start_date='2015-01-01', end_date='2020-12-31')
jpl_data = jpl.timeseries(station_id, start_date='2015-01-01', end_date='2020-12-31')

# Merge on date for comparison
import pandas as pd
comparison = pd.merge(unr_data, jpl_data, on='date', suffixes=('_unr', '_jpl'))

# Plot comparison
fig, axes = plt.subplots(3, 1, figsize=(12, 10), sharex=True)
components = ['east', 'north', 'up']

for i, comp in enumerate(components):
    axes[i].plot(comparison['date'], comparison[f'{comp}_unr'],
                label='UNR', alpha=0.7)
    axes[i].plot(comparison['date'], comparison[f'{comp}_jpl'],
                label='JPL', alpha=0.7)
    axes[i].set_ylabel(f'{comp.title()} (mm)')
    axes[i].legend()

    # Calculate RMS difference
    diff = comparison[f'{comp}_unr'] - comparison[f'{comp}_jpl']
    rms = (diff**2).mean()**0.5
    axes[i].set_title(f'{comp.title()} - RMS difference: {rms:.2f} mm')

plt.xlabel('Date')
plt.suptitle(f'GPS Data Comparison: UNR vs JPL - Station {station_id}')
plt.tight_layout()
plt.show()

Advanced Usage

Custom Cache Directory

from geepers.gps_sources import UnrSource

# Use custom cache directory
unr = UnrSource(cache_dir='/path/to/my/cache')

Filtering by Geometry

from shapely.geometry import Polygon
import geopandas as gpd

# Define a custom polygon (e.g., California outline)
california_poly = Polygon([
    (-125, 32), (-114, 32), (-114, 42), (-125, 42), (-125, 32)
])
california_mask = gpd.GeoSeries([california_poly], crs='EPSG:4326')

# Get stations within the polygon
stations = unr.stations(mask=california_mask)

Batch Processing

import concurrent.futures
from tqdm import tqdm

def load_station_data(station_name):
    """Load data for a single station."""
    try:
        return station_name, unr.timeseries(station_name)
    except Exception as e:
        return station_name, None

# Get list of stations
stations = unr.stations(bbox=(-120, 35, -115, 40))
station_names = stations['name'].tolist()

# Load data in parallel
results = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    futures = [executor.submit(load_station_data, name) for name in station_names]

    for future in tqdm(concurrent.futures.as_completed(futures),
                      total=len(futures), desc="Loading GPS data"):
        station_name, data = future.result()
        if data is not None:
            results[station_name] = data

print(f"Successfully loaded data for {len(results)} stations")

Data Schema and Validation

See geepers.schemas for more information about the Pandera data schemas.

About

No description, website, or topics provided.

Resources

License

Stars

Watchers

Forks

Packages

No packages published

Contributors 3

  •  
  •  
  •