From ab70abae5780b13a003095a15d6836b05b973ead Mon Sep 17 00:00:00 2001
From: Rong Bo <rongbo@Bos-MacBook-Pro-2.local>
Date: Wed, 6 Aug 2025 15:28:42 -0600
Subject: [PATCH 1/2] fix bug of cut_templates

---
 examples/california/cut_templates_cc.py | 43 +++++++++++++++++++++----
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/examples/california/cut_templates_cc.py b/examples/california/cut_templates_cc.py
index 5c6aeaf..d6f01c0 100644
--- a/examples/california/cut_templates_cc.py
+++ b/examples/california/cut_templates_cc.py
@@ -32,6 +32,12 @@ def fillin_missing_picks(picks, events, stations, config):
     pivoted = picks.pivot(index=["event_index", "station_id"], columns="phase_type", values="traveltime")
     pivoted.columns.name = None
     pivoted = pivoted.reset_index()
+    
+    # in case no P picks or S picks
+    if "P" not in pivoted.columns:
+        pivoted["P"] = np.nan
+    if "S" not in pivoted.columns:
+        pivoted["S"] = np.nan
 
     pivoted["P_pred"] = np.where(pivoted["P"].isna(), pivoted["S"] / vp_vs_ratio, pivoted["P"])
     pivoted["S_pred"] = np.where(pivoted["S"].isna(), pivoted["P"] * vp_vs_ratio, pivoted["S"])
@@ -407,12 +413,33 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
     config["zlim_km"] = (zmin, zmax)
 
     # %% Eikonal for 1D velocity model
-    zz = [0.0, 5.5, 16.0, 32.0]
-    vp = [5.5, 5.5, 6.7, 7.8]
+    # zz = [0.0, 5.5, 16.0, 32.0]
+    # vp = [5.5, 5.5, 6.7, 7.8]
     # zz = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 30]
     # vp = [4.74, 5.01, 5.35, 5.71, 6.07, 6.17, 6.27, 6.34, 6.39, 7.8]
     vp_vs_ratio = 1.73
-    vs = [v / vp_vs_ratio for v in vp]
+    # vs = [v / vp_vs_ratio for v in vp]
+    # Mendocino velocity in Clara's paper, the depth is the top of the layer corresponding to the velocity
+    zz = [0.0000, 0.6000, 1.2000, 1.8000, 2.4000, 
+        3.0000, 3.6000, 4.2000, 4.8000, 5.4000, 
+        6.0000, 8.0000, 10.000, 12.000, 14.000, 
+        16.000, 18.000, 20.000, 22.000, 24.000, 
+        26.000, 28.000, 30.000, 32.000, 34.000, 
+        36.000, 38.000, 40.000, 60.000]
+    
+    vp = [3.5000, 3.7214, 3.9429, 4.1643, 
+        4.3857, 4.6071, 4.8286, 5.0500, 5.1124, 
+        5.1747, 5.2371, 5.4449, 5.6528, 5.8607, 
+        6.0685, 6.2764, 6.4843, 6.6921, 6.9000, 
+        7.9000, 7.9000, 7.9000, 7.9000, 7.9000,
+        7.9000, 7.9000, 7.9000, 7.9000, 7.9000]
+
+    vs = [2.0208, 2.1486, 2.2765, 2.4043, 
+        2.5322, 2.6600, 2.7879, 2.9157, 2.9517, 
+        2.9877, 3.0237, 3.1437, 3.2637, 3.3838, 
+        3.5038, 3.6238, 3.7438, 3.8638, 3.9838, 
+        4.5612, 4.5612, 4.5612, 4.5612, 4.5612, 
+        4.5612, 4.5612, 4.5612, 4.5612, 4.5612]
     h = 0.3
 
     if os.path.exists(f"{root_path}/{region}/obspy/velocity.csv"):
@@ -476,15 +503,19 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
         # events = pd.read_csv("adloc_events.csv", parse_dates=["time"])
         if protocol == "file":
             events = pd.read_csv(
-                f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"]
+                f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"],
                 # f"{root_path}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv",
-                parse_dates=["time"],
+                # parse_dates=["time"],
             )
         else:
             with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
             # with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", "r") as fp:
                 events = pd.read_csv(fp, parse_dates=["time"])
-
+        
+        # avoid empty events dataframe
+        if len(events) == 0:
+            print(f"No events for {year:04d}-{jday:03d}")
+            continue
         # # ############### DEBUG ###############
         # events = events[
         #     (events["latitude"] >= minlat)

From d57e86929f5c04c5388a3df088c79ea2f47929a6 Mon Sep 17 00:00:00 2001
From: Rong Bo <rongbo@Bos-MacBook-Pro-2.local>
Date: Wed, 13 Aug 2025 23:26:31 -0700
Subject: [PATCH 2/2] update merge daily waveform templates

---
 examples/california/cut_templates_merge.py | 37 ++++++++++++++++++----
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/examples/california/cut_templates_merge.py b/examples/california/cut_templates_merge.py
index d4f653d..e3120f5 100644
--- a/examples/california/cut_templates_merge.py
+++ b/examples/california/cut_templates_merge.py
@@ -247,12 +247,31 @@ def process_day(year_jday):
 
         events_["latitude"] = events_["latitude"].astype(float)
         events_["longitude"] = events_["longitude"].astype(float)
-        events_ = events_[
-            (events_["latitude"] > config["minlatitude"])
-            & (events_["latitude"] < config["maxlatitude"])
-            & (events_["longitude"] > config["minlongitude"])
-            & (events_["longitude"] < config["maxlongitude"])
-        ]
+        
+        # the following filter cannot effectively remove the horizontal boundary events
+        # events_ = events_[
+        #     (events_["latitude"] > config["minlatitude"])
+        #     & (events_["latitude"] < config["maxlatitude"])
+        #     & (events_["longitude"] > config["minlongitude"])
+        #     & (events_["longitude"] < config["maxlongitude"])
+        # ]
+        
+        # remove horizontal boundary events
+        # filtering by x_km and y_km is better than by latitude and longitude
+        lon0 = (config["minlongitude"] + config["maxlongitude"]) / 2
+        lat0 = (config["minlatitude"] + config["maxlatitude"]) / 2
+        proj = Proj(f"+proj=aeqd +lon_0={lon0} +lat_0={lat0}  +units=km")
+        events_[["x_km", "y_km"]] = events_.apply(
+            lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
+        )
+        xmin, ymin = proj(config["minlongitude"], config["minlatitude"])
+        xmax, ymax = proj(config["maxlongitude"], config["maxlatitude"])
+        zmin, zmax = config["mindepth"], config["maxdepth"]
+        idx = (events_["y_km"] > ymin)
+        idx &= (events_["y_km"] < ymax)
+        idx &= (events_["x_km"] > xmin)
+        idx &= (events_["x_km"] < xmax)
+        events_ = events_[idx]
 
         picks_["year"] = year
         picks_["jday"] = jday
@@ -318,6 +337,9 @@ def process_day(year_jday):
         + picks["idx_eve"].astype(str).str.zfill(6)
     )
     events["idx_eve"] = np.arange(len(events))
+    # keep the original event index and new idx_eve
+    # important for tracking the events
+    events[["event_index", "idx_eve"]].to_csv(f"{root_path}/{result_path}/events_index_pair.csv", index=False)
     events["event_index"] = np.arange(len(events))
     print(f"events: {len(events):,} ")
     print(events.iloc[:5])
@@ -438,6 +460,9 @@ def process_day(year_jday):
         fs.put(f"{root_path}/{result_path}/config.json", f"{bucket}/{folder}/config.json")
         print(f"{root_path}/{result_path}/pairs.txt -> {bucket}/{folder}/pairs.txt")
         fs.put(f"{root_path}/{result_path}/pairs.txt", f"{bucket}/{folder}/pairs.txt")
+        # upload the event index pair file
+        print(f"{root_path}/{result_path}/events_index_pair.csv -> {bucket}/{folder}/events_index_pair.csv")
+        fs.put(f"{root_path}/{result_path}/events_index_pair.csv", f"{bucket}/{folder}/events_index_pair.csv")
         # print(f"{root_path}/{result_path}/template.dat -> {bucket}/{folder}/template.dat")
         # fs.put(f"{root_path}/{result_path}/template.dat", f"{bucket}/{folder}/template.dat")
         # print(f"{root_path}/{result_path}/traveltime.dat -> {bucket}/{folder}/traveltime.dat")