From 9075771874853c7c1aa73c072543c8b547c2b29b Mon Sep 17 00:00:00 2001
From: David Hensle <davidh@sandag.org>
Date: Mon, 20 Feb 2023 12:41:16 -0800
Subject: [PATCH 01/58] estimation mode enhancements

---
 .../joint_tour_frequency_composition.py       |  11 +-
 activitysim/abm/models/school_escorting.py    | 110 +++++++++---------
 .../abm/models/telecommute_frequency.py       |   2 +-
 .../abm/models/transit_pass_ownership.py      |   2 +-
 activitysim/abm/models/work_from_home.py      |   2 +-
 5 files changed, 69 insertions(+), 58 deletions(-)

diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py
index 3e52d27fae..9fc8f61b1a 100644
--- a/activitysim/abm/models/joint_tour_frequency_composition.py
+++ b/activitysim/abm/models/joint_tour_frequency_composition.py
@@ -35,9 +35,13 @@ def joint_tour_frequency_composition(
 
     model_settings = config.read_model_settings(model_settings_file_name)
 
+    # alt_tdd = simulate.read_model_alts(
+    #     "joint_tour_frequency_composition_alternatives.csv", set_index="alt"
+    # )
     alt_tdd = simulate.read_model_alts(
-        "joint_tour_frequency_composition_alternatives.csv", set_index="alt"
+        "joint_tour_frequency_composition_alternatives.csv", set_index=None
     )
+    # alt_tdd.index = alt_tdd['alt'].values
 
     # - only interested in households with more than one cdap travel_active person and
     # - at least one non-preschooler
@@ -94,13 +98,15 @@ def joint_tour_frequency_composition(
         estimator.write_model_settings(model_settings, model_settings_file_name)
         estimator.write_coefficients(coefficients_df, model_settings)
         estimator.write_choosers(choosers)
-        estimator.write_alternatives(alts)
 
         assert choosers.index.name == "household_id"
         assert "household_id" not in choosers.columns
         choosers["household_id"] = choosers.index
 
         estimator.set_chooser_id(choosers.index.name)
+        
+        # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables?
+        estimator.set_alt_id("alt_id")
 
     # The choice value 'joint_tour_frequency_composition' assigned by interaction_simulate
     # is the index value of the chosen alternative in the alternatives table.
@@ -134,6 +140,7 @@ def joint_tour_frequency_composition(
     # - but we don't know the tour participants yet
     # - so we arbitrarily choose the first person in the household
     # - to be point person for the purpose of generating an index and setting origin
+    # FIXME: not all models are guaranteed to have PNUM
     temp_point_persons = persons.loc[persons.PNUM == 1]
     temp_point_persons["person_id"] = temp_point_persons.index
     temp_point_persons = temp_point_persons.set_index("household_id")
diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
index 1b5a97fc93..52f62f7a01 100644
--- a/activitysim/abm/models/school_escorting.py
+++ b/activitysim/abm/models/school_escorting.py
@@ -480,62 +480,66 @@ def school_escorting(
             )
             escort_bundles.append(bundles)
 
+    pipeline.replace_table("households", households)
+
     escort_bundles = pd.concat(escort_bundles)
-    escort_bundles["bundle_id"] = (
-        escort_bundles["household_id"] * 10
-        + escort_bundles.groupby("household_id").cumcount()
-        + 1
-    )
-    escort_bundles.sort_values(
-        by=["household_id", "school_escort_direction"],
-        ascending=[True, False],
-        inplace=True,
-    )
 
-    school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours(
-        escort_bundles
-    )
-    chauf_tour_id_map = {
-        v: k for k, v in school_escort_tours["bundle_id"].to_dict().items()
-    }
-    escort_bundles["chauf_tour_id"] = np.where(
-        escort_bundles["escort_type"] == "ride_share",
-        escort_bundles["first_mand_tour_id"],
-        escort_bundles["bundle_id"].map(chauf_tour_id_map),
-    )
+    if len(escort_bundles) > 0:
+        escort_bundles["bundle_id"] = (
+            escort_bundles["household_id"] * 10
+            + escort_bundles.groupby("household_id").cumcount()
+            + 1
+        )
+        escort_bundles.sort_values(
+            by=["household_id", "school_escort_direction"],
+            ascending=[True, False],
+            inplace=True,
+        )
 
-    tours = school_escort_tours_trips.add_pure_escort_tours(tours, school_escort_tours)
-    tours = school_escort_tours_trips.process_tours_after_escorting_model(
-        escort_bundles, tours
-    )
+        school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours(
+            escort_bundles
+        )
+        chauf_tour_id_map = {
+            v: k for k, v in school_escort_tours["bundle_id"].to_dict().items()
+        }
+        escort_bundles["chauf_tour_id"] = np.where(
+            escort_bundles["escort_type"] == "ride_share",
+            escort_bundles["first_mand_tour_id"],
+            escort_bundles["bundle_id"].map(chauf_tour_id_map),
+        )
 
-    school_escort_trips = school_escort_tours_trips.create_school_escort_trips(
-        escort_bundles
-    )
+        tours = school_escort_tours_trips.add_pure_escort_tours(tours, school_escort_tours)
+        tours = school_escort_tours_trips.process_tours_after_escorting_model(
+            escort_bundles, tours
+        )
 
-    # update pipeline
-    pipeline.replace_table("households", households)
-    pipeline.replace_table("tours", tours)
-    pipeline.get_rn_generator().drop_channel("tours")
-    pipeline.get_rn_generator().add_channel("tours", tours)
-    pipeline.replace_table("escort_bundles", escort_bundles)
-    # save school escorting tours and trips in pipeline so we can overwrite results from downstream models
-    pipeline.replace_table("school_escort_tours", school_escort_tours)
-    pipeline.replace_table("school_escort_trips", school_escort_trips)
-
-    # updating timetable object with pure escort tours so joint tours do not schedule ontop
-    timetable = inject.get_injectable("timetable")
-
-    # Need to do this such that only one person is in nth_tours
-    # thus, looping through tour_category and tour_num
-    # including mandatory tours because their start / end times may have
-    # changed to match the school escort times
-    for tour_category in tours.tour_category.unique():
-        for tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby(
-            "tour_num", sort=True
-        ):
-            timetable.assign(
-                window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"]
-            )
+        school_escort_trips = school_escort_tours_trips.create_school_escort_trips(
+            escort_bundles
+        )
 
-    timetable.replace_table()
+        # update pipeline
+        
+        pipeline.replace_table("tours", tours)
+        pipeline.get_rn_generator().drop_channel("tours")
+        pipeline.get_rn_generator().add_channel("tours", tours)
+        pipeline.replace_table("escort_bundles", escort_bundles)
+        # save school escorting tours and trips in pipeline so we can overwrite results from downstream models
+        pipeline.replace_table("school_escort_tours", school_escort_tours)
+        pipeline.replace_table("school_escort_trips", school_escort_trips)
+
+        # updating timetable object with pure escort tours so joint tours do not schedule ontop
+        timetable = inject.get_injectable("timetable")
+
+        # Need to do this such that only one person is in nth_tours
+        # thus, looping through tour_category and tour_num
+        # including mandatory tours because their start / end times may have
+        # changed to match the school escort times
+        for tour_category in tours.tour_category.unique():
+            for tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby(
+                "tour_num", sort=True
+            ):
+                timetable.assign(
+                    window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"]
+                )
+
+        timetable.replace_table()
diff --git a/activitysim/abm/models/telecommute_frequency.py b/activitysim/abm/models/telecommute_frequency.py
index cc1eec4893..4596a89115 100755
--- a/activitysim/abm/models/telecommute_frequency.py
+++ b/activitysim/abm/models/telecommute_frequency.py
@@ -57,7 +57,7 @@ def telecommute_frequency(persons_merged, persons, chunk_size, trace_hh_id):
     if estimator:
         estimator.write_model_settings(model_settings, model_settings_file_name)
         estimator.write_spec(model_settings)
-        estimator.write_coefficients(coefficients_df)
+        estimator.write_coefficients(coefficients_df, model_settings)
         estimator.write_choosers(choosers)
 
     choices = simulate.simple_simulate(
diff --git a/activitysim/abm/models/transit_pass_ownership.py b/activitysim/abm/models/transit_pass_ownership.py
index 6507ab8256..92d97080f9 100644
--- a/activitysim/abm/models/transit_pass_ownership.py
+++ b/activitysim/abm/models/transit_pass_ownership.py
@@ -51,7 +51,7 @@ def transit_pass_ownership(persons_merged, persons, chunk_size, trace_hh_id):
     if estimator:
         estimator.write_model_settings(model_settings, model_settings_file_name)
         estimator.write_spec(model_settings)
-        estimator.write_coefficients(coefficients_df)
+        estimator.write_coefficients(coefficients_df, model_settings)
         estimator.write_choosers(choosers)
 
     choices = simulate.simple_simulate(
diff --git a/activitysim/abm/models/work_from_home.py b/activitysim/abm/models/work_from_home.py
index 97a80d301f..0432640002 100755
--- a/activitysim/abm/models/work_from_home.py
+++ b/activitysim/abm/models/work_from_home.py
@@ -58,7 +58,7 @@ def work_from_home(persons_merged, persons, chunk_size, trace_hh_id):
     if estimator:
         estimator.write_model_settings(model_settings, model_settings_file_name)
         estimator.write_spec(model_settings)
-        estimator.write_coefficients(coefficients_df)
+        estimator.write_coefficients(coefficients_df, model_settings)
         estimator.write_choosers(choosers)
 
     # - iterative single process what-if adjustment if specified

From 54cb3d1eda9f8df3b0104c998012010fed106c63 Mon Sep 17 00:00:00 2001
From: David Hensle <davidh@sandag.org>
Date: Tue, 21 Feb 2023 15:39:17 -0800
Subject: [PATCH 02/58] blacken

---
 .../abm/models/joint_tour_frequency_composition.py   |  3 ++-
 activitysim/abm/models/school_escorting.py           | 12 +++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py
index 9fc8f61b1a..af6d8b62f2 100644
--- a/activitysim/abm/models/joint_tour_frequency_composition.py
+++ b/activitysim/abm/models/joint_tour_frequency_composition.py
@@ -38,6 +38,7 @@ def joint_tour_frequency_composition(
     # alt_tdd = simulate.read_model_alts(
     #     "joint_tour_frequency_composition_alternatives.csv", set_index="alt"
     # )
+    # FIXME setting index as "alt" causes crash in estimation mode...
     alt_tdd = simulate.read_model_alts(
         "joint_tour_frequency_composition_alternatives.csv", set_index=None
     )
@@ -104,7 +105,7 @@ def joint_tour_frequency_composition(
         choosers["household_id"] = choosers.index
 
         estimator.set_chooser_id(choosers.index.name)
-        
+
         # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables?
         estimator.set_alt_id("alt_id")
 
diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
index 52f62f7a01..8da0068a7e 100644
--- a/activitysim/abm/models/school_escorting.py
+++ b/activitysim/abm/models/school_escorting.py
@@ -508,7 +508,9 @@ def school_escorting(
             escort_bundles["bundle_id"].map(chauf_tour_id_map),
         )
 
-        tours = school_escort_tours_trips.add_pure_escort_tours(tours, school_escort_tours)
+        tours = school_escort_tours_trips.add_pure_escort_tours(
+            tours, school_escort_tours
+        )
         tours = school_escort_tours_trips.process_tours_after_escorting_model(
             escort_bundles, tours
         )
@@ -518,7 +520,7 @@ def school_escorting(
         )
 
         # update pipeline
-        
+
         pipeline.replace_table("tours", tours)
         pipeline.get_rn_generator().drop_channel("tours")
         pipeline.get_rn_generator().add_channel("tours", tours)
@@ -535,9 +537,9 @@ def school_escorting(
         # including mandatory tours because their start / end times may have
         # changed to match the school escort times
         for tour_category in tours.tour_category.unique():
-            for tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby(
-                "tour_num", sort=True
-            ):
+            for tour_num, nth_tours in tours[
+                tours.tour_category == tour_category
+            ].groupby("tour_num", sort=True):
                 timetable.assign(
                     window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"]
                 )

From b461dd29fbd36cf81d7507b4817892f20d21b2ae Mon Sep 17 00:00:00 2001
From: David Hensle <davidh@sandag.org>
Date: Thu, 2 Mar 2023 12:23:16 -0800
Subject: [PATCH 03/58] ao preprocessor, cdap & jtfc estimation

---
 activitysim/abm/models/auto_ownership.py        | 17 ++++++++++++++++-
 activitysim/abm/models/cdap.py                  |  4 ++++
 .../models/joint_tour_frequency_composition.py  |  4 ++--
 .../abm/models/joint_tour_participation.py      |  4 ++++
 activitysim/abm/models/transit_pass_subsidy.py  |  2 +-
 5 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py
index 564d6f94b6..3e01513cfe 100644
--- a/activitysim/abm/models/auto_ownership.py
+++ b/activitysim/abm/models/auto_ownership.py
@@ -2,7 +2,7 @@
 # See full license in LICENSE.txt.
 import logging
 
-from activitysim.core import config, inject, pipeline, simulate, tracing
+from activitysim.core import config, expressions, inject, pipeline, simulate, tracing
 
 from .util import estimation
 
@@ -32,6 +32,21 @@ def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_
 
     logger.info("Running %s with %d households", trace_label, len(choosers))
 
+    # - preprocessor
+    preprocessor_settings = model_settings.get("preprocessor", None)
+    if preprocessor_settings:
+
+        locals_d = {}
+        if constants is not None:
+            locals_d.update(constants)
+
+        expressions.assign_columns(
+            df=choosers,
+            model_settings=preprocessor_settings,
+            locals_dict=locals_d,
+            trace_label=trace_label,
+        )
+
     if estimator:
         estimator.write_model_settings(model_settings, model_settings_file_name)
         estimator.write_spec(model_settings)
diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
index f7da93687b..79c650a7a2 100644
--- a/activitysim/abm/models/cdap.py
+++ b/activitysim/abm/models/cdap.py
@@ -151,6 +151,9 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id):
         for hhsize in range(2, cdap.MAX_HHSIZE + 1):
             spec = cdap.get_cached_spec(hhsize)
             estimator.write_table(spec, "spec_%s" % hhsize, append=False)
+            if add_joint_tour_utility:
+                joint_spec = cdap.get_cached_joint_spec(hhsize)
+                estimator.write_table(joint_spec, "joint_spec_%s" % hhsize, append=False)
 
     logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))
 
@@ -184,6 +187,7 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id):
     if estimator:
         estimator.write_choices(choices)
         choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
+        hh_joint = estimator.get_survey_values(hh_joint, "households", "has_joint_tour")
         estimator.write_override_choices(choices)
         estimator.end_estimation()
 
diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py
index af6d8b62f2..a13e91a578 100644
--- a/activitysim/abm/models/joint_tour_frequency_composition.py
+++ b/activitysim/abm/models/joint_tour_frequency_composition.py
@@ -35,14 +35,14 @@ def joint_tour_frequency_composition(
 
     model_settings = config.read_model_settings(model_settings_file_name)
 
+    # FIXME setting index as "alt" causes crash in estimation mode...
     # alt_tdd = simulate.read_model_alts(
     #     "joint_tour_frequency_composition_alternatives.csv", set_index="alt"
     # )
-    # FIXME setting index as "alt" causes crash in estimation mode...
     alt_tdd = simulate.read_model_alts(
         "joint_tour_frequency_composition_alternatives.csv", set_index=None
     )
-    # alt_tdd.index = alt_tdd['alt'].values
+    alt_tdd.index = alt_tdd['alt'].values
 
     # - only interested in households with more than one cdap travel_active person and
     # - at least one non-preschooler
diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py
index ee8658ae5f..d079322e6e 100644
--- a/activitysim/abm/models/joint_tour_participation.py
+++ b/activitysim/abm/models/joint_tour_participation.py
@@ -209,6 +209,10 @@ def participants_chooser(probs, choosers, spec, trace_label):
                 probs[choice_col] = np.where(probs[choice_col] > 0, 1, 0)
                 non_choice_col = [col for col in probs.columns if col != choice_col][0]
                 probs[non_choice_col] = 1 - probs[choice_col]
+                if iter > MAX_ITERATIONS + 1:
+                    raise RuntimeError(
+                        f"{num_tours_remaining} tours could not be satisfied even with forcing participation"
+                    )
             else:
                 raise RuntimeError(
                     f"{num_tours_remaining} tours could not be satisfied after {iter} iterations"
diff --git a/activitysim/abm/models/transit_pass_subsidy.py b/activitysim/abm/models/transit_pass_subsidy.py
index 4e513a6611..45a118fda8 100644
--- a/activitysim/abm/models/transit_pass_subsidy.py
+++ b/activitysim/abm/models/transit_pass_subsidy.py
@@ -51,7 +51,7 @@ def transit_pass_subsidy(persons_merged, persons, chunk_size, trace_hh_id):
     if estimator:
         estimator.write_model_settings(model_settings, model_settings_file_name)
         estimator.write_spec(model_settings)
-        estimator.write_coefficients(coefficients_df)
+        estimator.write_coefficients(coefficients_df, model_settings)
         estimator.write_choosers(choosers)
 
     choices = simulate.simple_simulate(

From 024879353df102ed5674076609ebaef02c6a6c3d Mon Sep 17 00:00:00 2001
From: David Hensle <davidh@sandag.org>
Date: Thu, 2 Mar 2023 12:24:29 -0800
Subject: [PATCH 04/58] blacken

---
 activitysim/abm/models/cdap.py                             | 4 +++-
 activitysim/abm/models/joint_tour_frequency_composition.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
index 79c650a7a2..384eb24b7f 100644
--- a/activitysim/abm/models/cdap.py
+++ b/activitysim/abm/models/cdap.py
@@ -153,7 +153,9 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id):
             estimator.write_table(spec, "spec_%s" % hhsize, append=False)
             if add_joint_tour_utility:
                 joint_spec = cdap.get_cached_joint_spec(hhsize)
-                estimator.write_table(joint_spec, "joint_spec_%s" % hhsize, append=False)
+                estimator.write_table(
+                    joint_spec, "joint_spec_%s" % hhsize, append=False
+                )
 
     logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))
 
diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py
index a13e91a578..c1f735092b 100644
--- a/activitysim/abm/models/joint_tour_frequency_composition.py
+++ b/activitysim/abm/models/joint_tour_frequency_composition.py
@@ -42,7 +42,7 @@ def joint_tour_frequency_composition(
     alt_tdd = simulate.read_model_alts(
         "joint_tour_frequency_composition_alternatives.csv", set_index=None
     )
-    alt_tdd.index = alt_tdd['alt'].values
+    alt_tdd.index = alt_tdd["alt"].values
 
     # - only interested in households with more than one cdap travel_active person and
     # - at least one non-preschooler

From 9d17c41862956ca678ecb8fd18e9d993c8dace2e Mon Sep 17 00:00:00 2001
From: David Hensle <davidh@sandag.org>
Date: Thu, 2 Mar 2023 14:17:06 -0800
Subject: [PATCH 05/58] hh_joint from survey only if adding joint utility

---
 activitysim/abm/models/cdap.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
index 384eb24b7f..268d95ed14 100644
--- a/activitysim/abm/models/cdap.py
+++ b/activitysim/abm/models/cdap.py
@@ -189,7 +189,8 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id):
     if estimator:
         estimator.write_choices(choices)
         choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
-        hh_joint = estimator.get_survey_values(hh_joint, "households", "has_joint_tour")
+        if add_joint_tour_utility:
+            hh_joint = estimator.get_survey_values(hh_joint, "households", "has_joint_tour")
         estimator.write_override_choices(choices)
         estimator.end_estimation()
 

From 7ba0ddda7fd69a95755ed492b307315d7d2f08db Mon Sep 17 00:00:00 2001
From: David Hensle <davidh@sandag.org>
Date: Thu, 2 Mar 2023 14:29:44 -0800
Subject: [PATCH 06/58] blacken

---
 activitysim/abm/models/cdap.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
index 268d95ed14..a8566f73be 100644
--- a/activitysim/abm/models/cdap.py
+++ b/activitysim/abm/models/cdap.py
@@ -190,7 +190,9 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id):
         estimator.write_choices(choices)
         choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
         if add_joint_tour_utility:
-            hh_joint = estimator.get_survey_values(hh_joint, "households", "has_joint_tour")
+            hh_joint = estimator.get_survey_values(
+                hh_joint, "households", "has_joint_tour"
+            )
         estimator.write_override_choices(choices)
         estimator.end_estimation()
 

From da724c35b0c7a1973378c18a9cda6061b0d674ea Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 16 Mar 2023 10:29:28 -0700
Subject: [PATCH 07/58] write out mode choice logsum

---
 activitysim/abm/models/location_choice.py | 40 ++++++++++++++++++-----
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
index dd5e279b39..ff2dd4d1ef 100644
--- a/activitysim/abm/models/location_choice.py
+++ b/activitysim/abm/models/location_choice.py
@@ -808,6 +808,18 @@ def run_location_choice(
                 )
                 tracing.trace_df(choices_df, estimation_trace_label)
 
+        if want_logsums & (not skip_choice):
+            # grabbing index, could be person_id or proto_person_id
+            index_name = choices_df.index.name
+            # merging mode choice logsum of chosen alternative to choices
+            choices_df = pd.merge(
+                choices_df.reset_index(), 
+                location_sample_df.reset_index()[[index_name, model_settings["ALT_DEST_COL_NAME"], ALT_LOGSUM]],
+                how='left',
+                left_on=[index_name, 'choice'],
+                right_on=[index_name, model_settings["ALT_DEST_COL_NAME"]]
+            ).drop(columns=model_settings["ALT_DEST_COL_NAME"]).set_index(index_name)
+
         choices_list.append(choices_df)
 
         if want_sample_table:
@@ -825,7 +837,7 @@ def run_location_choice(
     else:
         # this will only happen with small samples (e.g. singleton) with no (e.g.) school segs
         logger.warning("%s no choices", trace_label)
-        choices_df = pd.DataFrame(columns=["choice", "logsum"])
+        choices_df = pd.DataFrame(columns=["choice", "logsum", ALT_LOGSUM])
 
     if len(sample_list) > 0:
         save_sample_df = pd.concat(sample_list)
@@ -869,7 +881,8 @@ def iterate_location_choice(
     Returns
     -------
     adds choice column model_settings['DEST_CHOICE_COLUMN_NAME']
-    adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
+    adds destination choice logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
+    adds mode choice logsum to selected destination column model_settings['MODE_CHOICE_LOGSUM_COLUMN_NAME']- if provided
     adds annotations to persons table
     """
 
@@ -879,7 +892,9 @@ def iterate_location_choice(
     chooser_filter_column = model_settings["CHOOSER_FILTER_COLUMN_NAME"]
 
     dest_choice_column_name = model_settings["DEST_CHOICE_COLUMN_NAME"]
-    logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME")
+    dc_logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME")
+    mc_logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME")
+    want_logsums = (dc_logsum_column_name is not None) | (mc_logsum_column_name is not None)
 
     sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME")
     want_sample_table = (
@@ -929,7 +944,7 @@ def iterate_location_choice(
             persons_merged_df_,
             network_los,
             shadow_price_calculator=spc,
-            want_logsums=logsum_column_name is not None,
+            want_logsums=want_logsums,
             want_sample_table=want_sample_table,
             estimator=estimator,
             model_settings=model_settings,
@@ -1005,10 +1020,15 @@ def iterate_location_choice(
     )
 
     # add the dest_choice_logsum column to persons dataframe
-    if logsum_column_name:
-        persons_df[logsum_column_name] = (
+    if dc_logsum_column_name:
+        persons_df[dc_logsum_column_name] = (
             choices_df["logsum"].reindex(persons_df.index).astype("float")
         )
+    # add the mode choice logsum column to persons dataframe
+    if mc_logsum_column_name:
+        persons_df[mc_logsum_column_name] = (
+            choices_df[ALT_LOGSUM].reindex(persons_df.index).astype("float")
+        )
 
     if save_sample_df is not None:
         # might be None for tiny samples even if sample_table_name was specified
@@ -1047,9 +1067,13 @@ def iterate_location_choice(
         if trace_hh_id:
             tracing.trace_df(households_df, label=trace_label, warn_if_empty=True)
 
-    if logsum_column_name:
+    if dc_logsum_column_name:
+        tracing.print_summary(
+            dc_logsum_column_name, choices_df["logsum"], value_counts=True
+        )
+    if mc_logsum_column_name:
         tracing.print_summary(
-            logsum_column_name, choices_df["logsum"], value_counts=True
+            mc_logsum_column_name, choices_df[ALT_LOGSUM], value_counts=True
         )
 
     return persons_df

From 92acbfa6efcdcf022f0b90c66f058ddf2fdee453 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 16 Mar 2023 10:30:16 -0700
Subject: [PATCH 08/58] tracing for proto population

---
 activitysim/core/tracing.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/activitysim/core/tracing.py b/activitysim/core/tracing.py
index 44707c0aea..f5f3813908 100644
--- a/activitysim/core/tracing.py
+++ b/activitysim/core/tracing.py
@@ -788,6 +788,15 @@ def interaction_trace_rows(interaction_df, choosers, sample_size=None):
     elif "person_id" in choosers.columns and persons_table_name in traceable_table_ids:
         slicer_column_name = "person_id"
         targets = traceable_table_ids[persons_table_name]
+    elif choosers.index.name == "proto_person_id" and persons_table_name in traceable_table_ids:
+        slicer_column_name = choosers.index.name
+        targets = traceable_table_ids[persons_table_name]
+    elif choosers.index.name == "proto_household_id" and households_table_name in traceable_table_ids:
+        slicer_column_name = choosers.index.name
+        targets = traceable_table_ids[households_table_name]
+    elif choosers.index.name == "proto_tour_id" and 'proto_tours' in traceable_table_ids:
+        slicer_column_name = choosers.index.name
+        targets = traceable_table_ids['proto_tours']
     else:
         print(choosers.columns)
         raise RuntimeError(

From 398c5a901dfc664bb52eb2b1aeb881879430d428 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 16 Mar 2023 10:55:27 -0700
Subject: [PATCH 09/58] dropping all traceable tables after disagg

---
 activitysim/abm/models/disaggregate_accessibility.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
index fe79d3fcdf..1bc267c856 100644
--- a/activitysim/abm/models/disaggregate_accessibility.py
+++ b/activitysim/abm/models/disaggregate_accessibility.py
@@ -745,11 +745,9 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id):
     for ch in list(pipeline.get_rn_generator().channels.keys()):
         pipeline.get_rn_generator().drop_channel(ch)
 
-    # Drop any prematurely added traceables
-    for trace in [
-        x for x in inject.get_injectable("traceable_tables") if "proto_" not in x
-    ]:
-        tracing.deregister_traceable_table(trace)
+    # Dropping all traceable tables
+    for table in inject.get_injectable("traceable_tables"):
+        tracing.deregister_traceable_table(table)
 
     # need to clear any premature tables that were added during the previous run
     orca._TABLES.clear()

From 049efcfae4a5161eeae32622d501acd134a3d8e2 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Tue, 21 Mar 2023 15:33:59 -0700
Subject: [PATCH 10/58] blacken

---
 activitysim/abm/models/location_choice.py | 24 +++++++++++++++--------
 activitysim/core/tracing.py               | 16 +++++++++++----
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
index ff2dd4d1ef..8470368708 100644
--- a/activitysim/abm/models/location_choice.py
+++ b/activitysim/abm/models/location_choice.py
@@ -812,13 +812,19 @@ def run_location_choice(
             # grabbing index, could be person_id or proto_person_id
             index_name = choices_df.index.name
             # merging mode choice logsum of chosen alternative to choices
-            choices_df = pd.merge(
-                choices_df.reset_index(), 
-                location_sample_df.reset_index()[[index_name, model_settings["ALT_DEST_COL_NAME"], ALT_LOGSUM]],
-                how='left',
-                left_on=[index_name, 'choice'],
-                right_on=[index_name, model_settings["ALT_DEST_COL_NAME"]]
-            ).drop(columns=model_settings["ALT_DEST_COL_NAME"]).set_index(index_name)
+            choices_df = (
+                pd.merge(
+                    choices_df.reset_index(),
+                    location_sample_df.reset_index()[
+                        [index_name, model_settings["ALT_DEST_COL_NAME"], ALT_LOGSUM]
+                    ],
+                    how="left",
+                    left_on=[index_name, "choice"],
+                    right_on=[index_name, model_settings["ALT_DEST_COL_NAME"]],
+                )
+                .drop(columns=model_settings["ALT_DEST_COL_NAME"])
+                .set_index(index_name)
+            )
 
         choices_list.append(choices_df)
 
@@ -894,7 +900,9 @@ def iterate_location_choice(
     dest_choice_column_name = model_settings["DEST_CHOICE_COLUMN_NAME"]
     dc_logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME")
     mc_logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME")
-    want_logsums = (dc_logsum_column_name is not None) | (mc_logsum_column_name is not None)
+    want_logsums = (dc_logsum_column_name is not None) | (
+        mc_logsum_column_name is not None
+    )
 
     sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME")
     want_sample_table = (
diff --git a/activitysim/core/tracing.py b/activitysim/core/tracing.py
index f5f3813908..07ba053266 100644
--- a/activitysim/core/tracing.py
+++ b/activitysim/core/tracing.py
@@ -788,15 +788,23 @@ def interaction_trace_rows(interaction_df, choosers, sample_size=None):
     elif "person_id" in choosers.columns and persons_table_name in traceable_table_ids:
         slicer_column_name = "person_id"
         targets = traceable_table_ids[persons_table_name]
-    elif choosers.index.name == "proto_person_id" and persons_table_name in traceable_table_ids:
+    elif (
+        choosers.index.name == "proto_person_id"
+        and persons_table_name in traceable_table_ids
+    ):
         slicer_column_name = choosers.index.name
         targets = traceable_table_ids[persons_table_name]
-    elif choosers.index.name == "proto_household_id" and households_table_name in traceable_table_ids:
+    elif (
+        choosers.index.name == "proto_household_id"
+        and households_table_name in traceable_table_ids
+    ):
         slicer_column_name = choosers.index.name
         targets = traceable_table_ids[households_table_name]
-    elif choosers.index.name == "proto_tour_id" and 'proto_tours' in traceable_table_ids:
+    elif (
+        choosers.index.name == "proto_tour_id" and "proto_tours" in traceable_table_ids
+    ):
         slicer_column_name = choosers.index.name
-        targets = traceable_table_ids['proto_tours']
+        targets = traceable_table_ids["proto_tours"]
     else:
         print(choosers.columns)
         raise RuntimeError(

From cad3bf799ffa1ba37409dcfd56524dcaed28a731 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 6 Apr 2023 10:19:04 -0700
Subject: [PATCH 11/58] pandas < 2.0

---
 conda-environments/activitysim-dev-base.yml | 2 +-
 conda-environments/activitysim-dev.yml      | 2 +-
 conda-environments/github-actions-tests.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/conda-environments/activitysim-dev-base.yml b/conda-environments/activitysim-dev-base.yml
index f0bdcb3a36..7b534edf8a 100644
--- a/conda-environments/activitysim-dev-base.yml
+++ b/conda-environments/activitysim-dev-base.yml
@@ -41,7 +41,7 @@ dependencies:
 - numpydoc
 - openmatrix >= 0.3.4.1
 - orca >= 1.6
-- pandas >= 1.1.0
+- pandas >= 1.1.0,<2.0
 - pre-commit
 - psutil >= 4.1
 - pyarrow >= 2.0
diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml
index c46b436dea..519f442db1 100644
--- a/conda-environments/activitysim-dev.yml
+++ b/conda-environments/activitysim-dev.yml
@@ -37,7 +37,7 @@ dependencies:
 - numpydoc
 - openmatrix >= 0.3.4.1
 - orca >= 1.6
-- pandas >= 1.1.0
+- pandas >= 1.1.0,<2.0
 - pre-commit
 - psutil >= 4.1
 - pyarrow >= 2.0
diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml
index 63d353ea51..9207c8899e 100644
--- a/conda-environments/github-actions-tests.yml
+++ b/conda-environments/github-actions-tests.yml
@@ -16,7 +16,7 @@ dependencies:
 - numpy >= 1.16.1
 - openmatrix >= 0.3.4.1
 - orca >= 1.6
-- pandas >= 1.1.0
+- pandas >= 1.1.0,<2.0
 - psutil >= 4.1
 - pyarrow >= 2.0
 - pypyr >= 5.3

From 879d09833a5e8504e3c24a7577288752a51aae4c Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 6 Apr 2023 15:13:42 -0700
Subject: [PATCH 12/58] annotate and col selection in disagg access

---
 .../abm/models/disaggregate_accessibility.py  | 33 ++++++++++++++++---
 .../abm/tables/disaggregate_accessibility.py  | 19 +++++------
 2 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
index 1bc267c856..b8b8bfd3e1 100644
--- a/activitysim/abm/models/disaggregate_accessibility.py
+++ b/activitysim/abm/models/disaggregate_accessibility.py
@@ -569,14 +569,13 @@ def merge_persons(self):
         inject.add_table("proto_persons_merged", persons_merged)
 
 
-def get_disaggregate_logsums(network_los, chunk_size, trace_hh_id):
+def get_disaggregate_logsums(
+    network_los, chunk_size, trace_hh_id, disagg_model_settings
+):
     logsums = {}
     persons_merged = pipeline.get_table("proto_persons_merged").sort_index(
         inplace=False
     )
-    disagg_model_settings = read_disaggregate_accessibility_yaml(
-        "disaggregate_accessibility.yaml"
-    )
 
     for model_name in [
         "workplace_location",
@@ -696,8 +695,14 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id):
             tracing.register_traceable_table(tablename, df)
         del df
 
+    disagg_model_settings = read_disaggregate_accessibility_yaml(
+        "disaggregate_accessibility.yaml"
+    )
+
     # Run location choice
-    logsums = get_disaggregate_logsums(network_los, chunk_size, trace_hh_id)
+    logsums = get_disaggregate_logsums(
+        network_los, chunk_size, trace_hh_id, disagg_model_settings
+    )
     logsums = {k + "_accessibility": v for k, v in logsums.items()}
 
     # Combined accessibility table
@@ -758,4 +763,22 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id):
     # Inject accessibility results into pipeline
     [inject.add_table(k, df) for k, df in logsums.items()]
 
+    # available post-processing
+    for annotations in disagg_model_settings.get("postprocess_proto_tables", []):
+        tablename = annotations["tablename"]
+        df = pipeline.get_table(tablename)
+        assert df is not None
+        assert annotations is not None
+        assign_columns(
+            df=df,
+            model_settings={
+                **annotations["annotate"],
+                **disagg_model_settings["suffixes"],
+            },
+            trace_label=tracing.extend_trace_label(
+                "disaggregate_accessibility.postprocess", tablename
+            ),
+        )
+        pipeline.replace_table(tablename, df)
+
     return
diff --git a/activitysim/abm/tables/disaggregate_accessibility.py b/activitysim/abm/tables/disaggregate_accessibility.py
index 4c4eb9ad40..db65652f5f 100644
--- a/activitysim/abm/tables/disaggregate_accessibility.py
+++ b/activitysim/abm/tables/disaggregate_accessibility.py
@@ -151,14 +151,13 @@ def disaggregate_accessibility(persons, households, land_use, accessibility):
     accessibility_cols = [
         x for x in proto_accessibility_df.columns if "accessibility" in x
     ]
+    keep_cols = model_settings.get("KEEP_COLS", accessibility_cols)
 
     # Parse the merging parameters
     assert merging_params is not None
 
     # Check if already assigned!
-    if set(accessibility_cols).intersection(persons_merged_df.columns) == set(
-        accessibility_cols
-    ):
+    if set(keep_cols).intersection(persons_merged_df.columns) == set(keep_cols):
         return
 
     # Find the nearest zone (spatially) with accessibilities calculated
@@ -190,7 +189,7 @@ def disaggregate_accessibility(persons, households, land_use, accessibility):
     # because it will get slightly different logsums for households in the same zone.
     # This is because different destination zones were selected. To resolve, get mean by cols.
     right_df = (
-        proto_accessibility_df.groupby(merge_cols)[accessibility_cols]
+        proto_accessibility_df.groupby(merge_cols)[keep_cols]
         .mean()
         .sort_values(nearest_cols)
         .reset_index()
@@ -223,9 +222,9 @@ def disaggregate_accessibility(persons, households, land_use, accessibility):
         )
 
         # Predict the nearest person ID and pull the logsums
-        matched_logsums_df = right_df.loc[clf.predict(x_pop)][
-            accessibility_cols
-        ].reset_index(drop=True)
+        matched_logsums_df = right_df.loc[clf.predict(x_pop)][keep_cols].reset_index(
+            drop=True
+        )
         merge_df = pd.concat(
             [left_df.reset_index(drop=False), matched_logsums_df], axis=1
         ).set_index("person_id")
@@ -257,12 +256,12 @@ def disaggregate_accessibility(persons, households, land_use, accessibility):
 
     # Check that it was correctly left-joined
     assert all(persons_merged_df[merge_cols] == merge_df[merge_cols])
-    assert any(merge_df[accessibility_cols].isnull())
+    assert any(merge_df[keep_cols].isnull())
 
     # Inject merged accessibilities so that it can be included in persons_merged function
-    inject.add_table("disaggregate_accessibility", merge_df[accessibility_cols])
+    inject.add_table("disaggregate_accessibility", merge_df[keep_cols])
 
-    return merge_df[accessibility_cols]
+    return merge_df[keep_cols]
 
 
 inject.broadcast(

From 131832084575f9850156ce40a02fd7fd6adcf55d Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 6 Apr 2023 15:14:32 -0700
Subject: [PATCH 13/58] annotate auto ownership for selection of disaggregate
 accessibilities

---
 activitysim/abm/models/auto_ownership.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py
index 3e01513cfe..476bd18adf 100644
--- a/activitysim/abm/models/auto_ownership.py
+++ b/activitysim/abm/models/auto_ownership.py
@@ -4,7 +4,7 @@
 
 from activitysim.core import config, expressions, inject, pipeline, simulate, tracing
 
-from .util import estimation
+from .util import estimation, annotate
 
 logger = logging.getLogger(__name__)
 
@@ -84,5 +84,8 @@ def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_
         "auto_ownership", households.auto_ownership, value_counts=True
     )
 
+    if model_settings.get("annotate_households"):
+        annotate.annotate_households(model_settings, trace_label)
+
     if trace_hh_id:
         tracing.trace_df(households, label="auto_ownership", warn_if_empty=True)

From 67b1b90b3b9f83b3fb8189e23595953403e15b98 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 6 Apr 2023 15:15:09 -0700
Subject: [PATCH 14/58] added to annotate helper

---
 activitysim/abm/models/util/annotate.py | 38 +++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/activitysim/abm/models/util/annotate.py b/activitysim/abm/models/util/annotate.py
index e50519b38e..727ef4292f 100644
--- a/activitysim/abm/models/util/annotate.py
+++ b/activitysim/abm/models/util/annotate.py
@@ -15,6 +15,44 @@
 logger = logging.getLogger(__name__)
 
 
+def annotate_households(model_settings, trace_label, locals_dict={}):
+    """
+    Add columns to the households table in the pipeline according to spec.
+
+    Parameters
+    ----------
+    model_settings : dict
+    trace_label : str
+    """
+    households = inject.get_table("households").to_frame()
+    expressions.assign_columns(
+        df=households,
+        model_settings=model_settings.get("annotate_households"),
+        locals_dict=locals_dict,
+        trace_label=tracing.extend_trace_label(trace_label, "annotate_households"),
+    )
+    pipeline.replace_table("households", households)
+
+
+def annotate_persons(model_settings, trace_label, locals_dict={}):
+    """
+    Add columns to the persons table in the pipeline according to spec.
+
+    Parameters
+    ----------
+    model_settings : dict
+    trace_label : str
+    """
+    persons = inject.get_table("persons").to_frame()
+    expressions.assign_columns(
+        df=persons,
+        model_settings=model_settings.get("annotate_persons"),
+        locals_dict=locals_dict,
+        trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"),
+    )
+    pipeline.replace_table("persons", persons)
+
+
 def annotate_tours(model_settings, trace_label, locals_dict={}):
     """
     Add columns to the tours table in the pipeline according to spec.

From d2fdc58bc540bc8f8c2da4544b808b9e6f111a53 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Fri, 7 Apr 2023 11:14:15 -0700
Subject: [PATCH 15/58] school escorting estimation enhancements

---
 .../joint_tour_frequency_composition.py       |   1 +
 activitysim/abm/models/school_escorting.py    | 116 ++++++++++++------
 .../models/util/school_escort_tours_trips.py  |   8 ++
 3 files changed, 90 insertions(+), 35 deletions(-)

diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py
index c1f735092b..ffdb36e2c7 100644
--- a/activitysim/abm/models/joint_tour_frequency_composition.py
+++ b/activitysim/abm/models/joint_tour_frequency_composition.py
@@ -36,6 +36,7 @@ def joint_tour_frequency_composition(
     model_settings = config.read_model_settings(model_settings_file_name)
 
     # FIXME setting index as "alt" causes crash in estimation mode...
+    # happens in school escorting too!
     # alt_tdd = simulate.read_model_alts(
     #     "joint_tour_frequency_composition_alternatives.csv", set_index="alt"
     # )
diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
index 8da0068a7e..13e09e08b0 100644
--- a/activitysim/abm/models/school_escorting.py
+++ b/activitysim/abm/models/school_escorting.py
@@ -46,7 +46,7 @@ def determine_escorting_participants(choosers, persons, model_settings):
 
     # can specify different weights to determine chaperones
     persontype_weight = model_settings.get("PERSON_WEIGHT", 100)
-    gender_weight = model_settings.get("PERSON_WEIGHT", 10)
+    gender_weight = model_settings.get("GENDER_WEIGHT", 10)
     age_weight = model_settings.get("AGE_WEIGHT", 1)
 
     # can we move all of these to a config file?
@@ -122,7 +122,7 @@ def add_prev_choices_to_choosers(choosers, choices, alts, stage):
             stage_alts,
             how="left",
             left_on=escorting_choice,
-            right_on=stage_alts.index.name,
+            right_index=True,
         )
         .set_index("household_id")
     )
@@ -198,8 +198,12 @@ def create_school_escorting_bundles_table(choosers, tours, stage):
     bundles : pd.DataFrame
         one school escorting bundle per row
     """
-    # making a table of bundles
-    choosers = choosers.reset_index()
+    # want to keep household_id in columns, which is already there if running in estimation mode
+    if "household_id" in choosers.columns:
+        choosers = choosers.reset_index(drop=True)
+    else:
+        choosers = choosers.reset_index()
+    # creating a row for every school escorting bundle
     choosers = choosers.loc[choosers.index.repeat(choosers["nbundles"])]
 
     bundles = pd.DataFrame()
@@ -362,7 +366,11 @@ def school_escorting(
     households_merged = households_merged.to_frame()
     tours = tours.to_frame()
 
-    alts = simulate.read_model_alts(model_settings["ALTS"], set_index="Alt")
+    # FIXME setting index as "Alt" causes crash in estimation mode...
+    # happens in joint_tour_frequency_composition too!
+    # alts = simulate.read_model_alts(model_settings["ALTS"], set_index="Alt")
+    alts = simulate.read_model_alts(model_settings["ALTS"], set_index=None)
+    alts.index = alts["Alt"].values
 
     households_merged, participant_columns = determine_escorting_participants(
         households_merged, persons, model_settings
@@ -379,7 +387,9 @@ def school_escorting(
     choices = None
     for stage_num, stage in enumerate(school_escorting_stages):
         stage_trace_label = trace_label + "_" + stage
-        estimator = estimation.manager.begin_estimation("school_escorting_" + stage)
+        estimator = estimation.manager.begin_estimation(
+            model_name="school_escorting_" + stage, bundle_name="school_escorting"
+        )
 
         model_spec_raw = simulate.read_model_spec(
             file_name=model_settings[stage.upper() + "_SPEC"]
@@ -434,9 +444,26 @@ def school_escorting(
 
         if estimator:
             estimator.write_model_settings(model_settings, model_settings_file_name)
-            estimator.write_spec(model_settings)
-            estimator.write_coefficients(coefficients_df, model_settings)
+            estimator.write_spec(model_settings, tag=stage.upper() + "_SPEC")
+            estimator.write_coefficients(
+                coefficients_df, file_name=stage.upper() + "_COEFFICIENTS"
+            )
             estimator.write_choosers(choosers)
+            estimator.write_alternatives(alts, bundle_directory=True)
+
+            # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column
+            #  shuold we do it here or have interaction_simulate do it?
+            # chooser index must be duplicated in column or it will be omitted from interaction_dataset
+            # estimation requires that chooser_id is either in index or a column of interaction_dataset
+            # so it can be reformatted (melted) and indexed by chooser_id and alt_id
+            assert choosers.index.name == "household_id"
+            assert "household_id" not in choosers.columns
+            choosers["household_id"] = choosers.index
+
+            # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables?
+            estimator.set_alt_id("alt_id")
+
+            estimator.set_chooser_id(choosers.index.name)
 
         log_alt_losers = config.setting("log_alt_losers", False)
 
@@ -474,7 +501,7 @@ def school_escorting(
 
         if stage_num >= 1:
             choosers["Alt"] = choices
-            choosers = choosers.join(alts, how="left", on="Alt")
+            choosers = choosers.join(alts.set_index("Alt"), how="left", on="Alt")
             bundles = create_school_escorting_bundles_table(
                 choosers[choosers["Alt"] > 1], tours, stage
             )
@@ -484,6 +511,7 @@ def school_escorting(
 
     escort_bundles = pd.concat(escort_bundles)
 
+    # possible to get
     if len(escort_bundles) > 0:
         escort_bundles["bundle_id"] = (
             escort_bundles["household_id"] * 10
@@ -519,29 +547,47 @@ def school_escorting(
             escort_bundles
         )
 
-        # update pipeline
-
-        pipeline.replace_table("tours", tours)
-        pipeline.get_rn_generator().drop_channel("tours")
-        pipeline.get_rn_generator().add_channel("tours", tours)
-        pipeline.replace_table("escort_bundles", escort_bundles)
-        # save school escorting tours and trips in pipeline so we can overwrite results from downstream models
-        pipeline.replace_table("school_escort_tours", school_escort_tours)
-        pipeline.replace_table("school_escort_trips", school_escort_trips)
-
-        # updating timetable object with pure escort tours so joint tours do not schedule ontop
-        timetable = inject.get_injectable("timetable")
-
-        # Need to do this such that only one person is in nth_tours
-        # thus, looping through tour_category and tour_num
-        # including mandatory tours because their start / end times may have
-        # changed to match the school escort times
-        for tour_category in tours.tour_category.unique():
-            for tour_num, nth_tours in tours[
-                tours.tour_category == tour_category
-            ].groupby("tour_num", sort=True):
-                timetable.assign(
-                    window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"]
-                )
-
-        timetable.replace_table()
+    else:
+        # create empty school escort tours & trips tables to be used downstream
+        tours["school_esc_outbound"] = pd.NA
+        tours["school_esc_inbound"] = pd.NA
+        tours["school_escort_direction"] = pd.NA
+        tours["next_pure_escort_start"] = pd.NA
+        school_escort_tours = pd.DataFrame(columns=tours.columns)
+        trip_cols = [
+            "household_id",
+            "person_id",
+            "tour_id",
+            "trip_id",
+            "outbound",
+            "depart",
+            "purpose",
+            "destination",
+        ]
+        school_escort_trips = pd.DataFrame(columns=trip_cols)
+
+    # update pipeline
+    pipeline.replace_table("tours", tours)
+    pipeline.get_rn_generator().drop_channel("tours")
+    pipeline.get_rn_generator().add_channel("tours", tours)
+    pipeline.replace_table("escort_bundles", escort_bundles)
+    # save school escorting tours and trips in pipeline so we can overwrite results from downstream models
+    pipeline.replace_table("school_escort_tours", school_escort_tours)
+    pipeline.replace_table("school_escort_trips", school_escort_trips)
+
+    # updating timetable object with pure escort tours so joint tours do not schedule ontop
+    timetable = inject.get_injectable("timetable")
+
+    # Need to do this such that only one person is in nth_tours
+    # thus, looping through tour_category and tour_num
+    # including mandatory tours because their start / end times may have
+    # changed to match the school escort times
+    for tour_category in tours.tour_category.unique():
+        for tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby(
+            "tour_num", sort=True
+        ):
+            timetable.assign(
+                window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"]
+            )
+
+    timetable.replace_table()
diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py
index 778fb86454..691239ee10 100644
--- a/activitysim/abm/models/util/school_escort_tours_trips.py
+++ b/activitysim/abm/models/util/school_escort_tours_trips.py
@@ -399,6 +399,10 @@ def merge_school_escort_trips_into_pipeline():
     tours = pipeline.get_table("tours")
     trips = pipeline.get_table("trips")
 
+    # checking to see if there are school escort trips to merge in
+    if len(school_escort_trips) == 0:
+        return trips
+
     # want to remove stops if school escorting takes place on that half tour so we can replace them with the actual stops
     out_se_tours = tours[
         tours["school_esc_outbound"].isin(["pure_escort", "ride_share"])
@@ -603,6 +607,10 @@ def force_escortee_tour_modes_to_match_chauffeur(tours):
     # Does it even matter if trip modes are getting matched later?
     escort_bundles = inject.get_table("escort_bundles").to_frame()
 
+    if len(escort_bundles) == 0:
+        # do not need to do anything if no escorting
+        return tours
+
     # grabbing the school tour ids for each school escort bundle
     se_tours = escort_bundles[["school_tour_ids", "chauf_tour_id"]].copy()
     # merging in chauffeur tour mode

From 03e4b679caadad49a025609b7e07c0aeb6703f51 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Fri, 7 Apr 2023 11:15:49 -0700
Subject: [PATCH 16/58] tracing improvements for disaggregate accessibilities

---
 activitysim/core/tracing.py | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/activitysim/core/tracing.py b/activitysim/core/tracing.py
index 07ba053266..6bbc2854de 100644
--- a/activitysim/core/tracing.py
+++ b/activitysim/core/tracing.py
@@ -776,30 +776,22 @@ def interaction_trace_rows(interaction_df, choosers, sample_size=None):
         households_table_name.pop(),
     )
 
-    if choosers.index.name == "person_id" and persons_table_name in traceable_table_ids:
+    if (
+        choosers.index.name in ["person_id", "proto_person_id"]
+    ) and persons_table_name in traceable_table_ids:
         slicer_column_name = choosers.index.name
-        targets = traceable_table_ids["persons"]
-    elif choosers.index.name == "household_id" and "households" in traceable_table_ids:
+        targets = traceable_table_ids[persons_table_name]
+    elif (
+        choosers.index.name in ["household_id", "proto_household_id"]
+    ) and households_table_name in traceable_table_ids:
         slicer_column_name = choosers.index.name
-        targets = traceable_table_ids["households"]
+        targets = traceable_table_ids[households_table_name]
     elif "household_id" in choosers.columns and "households" in traceable_table_ids:
         slicer_column_name = "household_id"
         targets = traceable_table_ids[households_table_name]
     elif "person_id" in choosers.columns and persons_table_name in traceable_table_ids:
         slicer_column_name = "person_id"
         targets = traceable_table_ids[persons_table_name]
-    elif (
-        choosers.index.name == "proto_person_id"
-        and persons_table_name in traceable_table_ids
-    ):
-        slicer_column_name = choosers.index.name
-        targets = traceable_table_ids[persons_table_name]
-    elif (
-        choosers.index.name == "proto_household_id"
-        and households_table_name in traceable_table_ids
-    ):
-        slicer_column_name = choosers.index.name
-        targets = traceable_table_ids[households_table_name]
     elif (
         choosers.index.name == "proto_tour_id" and "proto_tours" in traceable_table_ids
     ):

From b4668362019c692bf4256a5f7e79c737a1bd3d5c Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Fri, 7 Apr 2023 11:16:34 -0700
Subject: [PATCH 17/58] skip data bundle writing functionality

---
 activitysim/abm/models/util/estimation.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/activitysim/abm/models/util/estimation.py b/activitysim/abm/models/util/estimation.py
index 6a5dbadf1f..8501f02552 100644
--- a/activitysim/abm/models/util/estimation.py
+++ b/activitysim/abm/models/util/estimation.py
@@ -46,11 +46,12 @@ def __init__(self, bundle_name, model_name, estimation_table_recipes):
             os.makedirs(output_dir)  # make directory if needed
 
         # delete estimation files
-        unlink_files(self.output_directory(), file_types=("csv", "yaml"))
+        unlink_files(self.output_directory(), file_types=("csv", "yaml", "hdf"))
         if self.bundle_name != self.model_name:
             # kind of inelegant to always delete these, but ok as they are redundantly recreated for each sub model
             unlink_files(
-                self.output_directory(bundle_directory=True), file_types=("csv", "yaml")
+                self.output_directory(bundle_directory=True),
+                file_types=("csv", "yaml", "hdf"),
             )
 
         # FIXME - not required?
@@ -218,6 +219,13 @@ def write_omnibus_table(self):
         if len(self.omnibus_tables) == 0:
             return
 
+        settings = config.read_model_settings(ESTIMATION_SETTINGS_FILE_NAME)
+
+        edbs_to_skip = settings.get("SKIP_BUNDLE_WRITE_FOR", [])
+        if self.bundle_name in edbs_to_skip:
+            self.debug(f"Skipping write to disk for {self.bundle_name}")
+            return
+
         for omnibus_table, table_names in self.omnibus_tables.items():
 
             self.debug(
@@ -236,12 +244,20 @@ def write_omnibus_table(self):
                 1 if omnibus_table in self.omnibus_tables_append_columns else 0
             )
 
-            df = pd.concat([self.tables[t] for t in table_names], axis=concat_axis)
+            # df = pd.concat([self.tables[t] for t in table_names], axis=concat_axis)
+            if len(table_names) == 0:
+                # empty tables
+                df = pd.DataFrame()
+            else:
+                df = pd.concat([self.tables[t] for t in table_names], axis=concat_axis)
+
+            self.debug(f"sorting tables: {table_names}")
             df.sort_index(ascending=True, inplace=True, kind="mergesort")
 
             file_path = self.output_file_path(omnibus_table, "csv")
             assert not os.path.isfile(file_path)
 
+            self.debug(f"writing table: {file_path}")
             df.to_csv(file_path, mode="a", index=True, header=True)
 
             self.debug("write_omnibus_choosers: %s" % file_path)

From a9dda1c7af8bac45b24dfec67272be7126c65d8e Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Fri, 7 Apr 2023 21:01:42 -0700
Subject: [PATCH 18/58] fixing restart issue with missing destination_size

---
 activitysim/abm/models/disaggregate_accessibility.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
index b8b8bfd3e1..2bb78bf60e 100644
--- a/activitysim/abm/models/disaggregate_accessibility.py
+++ b/activitysim/abm/models/disaggregate_accessibility.py
@@ -741,11 +741,13 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id):
     logsums["proto_disaggregate_accessibility"] = access_df
 
     # Drop any tables prematurely created
-    for tablename in [
-        "school_destination_size",
-        "workplace_destination_size",
-    ]:
-        pipeline.drop_table(tablename)
+    # FIXME: dropping size tables breaks restart functionality for location choice models.
+    #        hopefully this pipeline mess just goes away with move away from orca....
+    # for tablename in [
+    #     "school_destination_size",
+    #     "workplace_destination_size",
+    # ]:
+    #     pipeline.drop_table(tablename)
 
     for ch in list(pipeline.get_rn_generator().channels.keys()):
         pipeline.get_rn_generator().drop_channel(ch)

From 560db34549ba996e3f3eaa95f2acc86839a33287 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Fri, 7 Apr 2023 21:02:08 -0700
Subject: [PATCH 19/58] missed hdf cleanup

---
 activitysim/abm/models/util/estimation.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/activitysim/abm/models/util/estimation.py b/activitysim/abm/models/util/estimation.py
index 8501f02552..7c23b97b5e 100644
--- a/activitysim/abm/models/util/estimation.py
+++ b/activitysim/abm/models/util/estimation.py
@@ -46,12 +46,11 @@ def __init__(self, bundle_name, model_name, estimation_table_recipes):
             os.makedirs(output_dir)  # make directory if needed
 
         # delete estimation files
-        unlink_files(self.output_directory(), file_types=("csv", "yaml", "hdf"))
+        unlink_files(self.output_directory(), file_types=("csv", "yaml"))
         if self.bundle_name != self.model_name:
             # kind of inelegant to always delete these, but ok as they are redundantly recreated for each sub model
             unlink_files(
-                self.output_directory(bundle_directory=True),
-                file_types=("csv", "yaml", "hdf"),
+                self.output_directory(bundle_directory=True), file_types=("csv", "yaml")
             )
 
         # FIXME - not required?

From 53ecea8ef9a2936c1f33be715e3d8798ea7e64dc Mon Sep 17 00:00:00 2001
From: Joel Freedman <freedmnj@gmail.com>
Date: Mon, 24 Apr 2023 17:53:14 -0700
Subject: [PATCH 20/58] Added estimation functionality for external worker
 identification model.

---
 activitysim/estimation/larch/__init__.py      |  2 +-
 .../larch/external_worker_identification.py   | 80 +++++++++++++++++++
 2 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 activitysim/estimation/larch/external_worker_identification.py

diff --git a/activitysim/estimation/larch/__init__.py b/activitysim/estimation/larch/__init__.py
index 1d87e1ce12..d5ff3d0921 100644
--- a/activitysim/estimation/larch/__init__.py
+++ b/activitysim/estimation/larch/__init__.py
@@ -9,7 +9,7 @@
 from .scheduling import *
 from .simple_simulate import *
 from .stop_frequency import *
-
+from .external_worker_identification import *
 
 def component_model(name, *args, **kwargs):
     if isinstance(name, str):
diff --git a/activitysim/estimation/larch/external_worker_identification.py b/activitysim/estimation/larch/external_worker_identification.py
new file mode 100644
index 0000000000..63d267cf09
--- /dev/null
+++ b/activitysim/estimation/larch/external_worker_identification.py
@@ -0,0 +1,80 @@
+import os
+from typing import Collection
+
+import numpy as np
+import pandas as pd
+import yaml
+from larch import DataFrames, Model, P, X
+from larch.util import Dict
+
+from .general import (
+    apply_coefficients,
+    dict_of_linear_utility_from_spec,
+    remove_apostrophes,
+)
+from .simple_simulate import simple_simulate_data
+
+
+def external_worker_identification_model(
+    name="external_worker_identification",
+    edb_directory="output/estimation_data_bundle/{name}/",
+    return_data=False,
+):
+    data = simple_simulate_data(
+        name=name,
+        edb_directory=edb_directory,
+        values_index_col="person_id",
+    )
+    coefficients = data.coefficients
+    # coef_template = data.coef_template # not used
+    spec = data.spec
+    chooser_data = data.chooser_data
+    settings = data.settings
+
+    altnames = list(spec.columns[3:])
+    altcodes = range(len(altnames))
+
+    chooser_data = remove_apostrophes(chooser_data)
+    chooser_data.fillna(0, inplace=True)
+
+    # Remove choosers with invalid observed choice
+    chooser_data = chooser_data[chooser_data["override_choice"] >= 0]
+
+    m = Model()
+    # One of the alternatives is coded as 0, so
+    # we need to explicitly initialize the MNL nesting graph
+    # and set to root_id to a value other than zero.
+    m.initialize_graph(alternative_codes=altcodes, root_id=99)
+
+    m.utility_co = dict_of_linear_utility_from_spec(
+        spec,
+        "Label",
+        dict(zip(altnames, altcodes)),
+    )
+
+    apply_coefficients(coefficients, m)
+
+    d = DataFrames(
+        co=chooser_data,
+        av=True,
+        alt_codes=altcodes,
+        alt_names=altnames,
+    )
+
+    m.dataservice = d
+    m.choice_co_code = "override_choice"
+
+    if return_data:
+        return (
+            m,
+            Dict(
+                edb_directory=data.edb_directory,
+                chooser_data=chooser_data,
+                coefficients=coefficients,
+                spec=spec,
+                altnames=altnames,
+                altcodes=altcodes,
+            ),
+        )
+
+    return m

From 8205609ba5a68d3544ccdbba0a4e518241f6965a Mon Sep 17 00:00:00 2001
From: Joel Freedman <freedmnj@gmail.com>
Date: Mon, 24 Apr 2023 18:01:30 -0700
Subject: [PATCH 21/58] Added code to estimate external worker location choice
 model

---
 activitysim/estimation/larch/location_choice.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py
index 74a426e714..64aa471410 100644
--- a/activitysim/estimation/larch/location_choice.py
+++ b/activitysim/estimation/larch/location_choice.py
@@ -330,6 +330,12 @@ def workplace_location_model(**kwargs):
         **kwargs,
     )
 
+def external_workplace_location_model(**kwargs):
+    unused = kwargs.pop("name", None)
+    return location_choice_model(
+        name="external_workplace_location",
+        **kwargs,
+    )
 
 def school_location_model(**kwargs):
     unused = kwargs.pop("name", None)

From 47f22e21b6cb9a1a2a23f54dd0a4ff9269e7c28d Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Mon, 15 May 2023 11:32:58 -0700
Subject: [PATCH 22/58] miscellaneous est fixes

---
 activitysim/abm/models/cdap.py                    |  1 +
 activitysim/abm/models/joint_tour_destination.py  |  2 +-
 .../abm/models/non_mandatory_tour_frequency.py    | 15 ++++++++-------
 activitysim/abm/models/school_escorting.py        |  2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
index a8566f73be..761c5c970e 100644
--- a/activitysim/abm/models/cdap.py
+++ b/activitysim/abm/models/cdap.py
@@ -190,6 +190,7 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id):
         estimator.write_choices(choices)
         choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
         if add_joint_tour_utility:
+            hh_joint.index.name = 'household_id'
             hh_joint = estimator.get_survey_values(
                 hh_joint, "households", "has_joint_tour"
             )
diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py
index 02651d2a44..02f0dbe84e 100644
--- a/activitysim/abm/models/joint_tour_destination.py
+++ b/activitysim/abm/models/joint_tour_destination.py
@@ -62,7 +62,7 @@ def joint_tour_destination(
         estimator.write_model_settings(model_settings, model_settings_file_name)
 
     choices_df, save_sample_df = tour_destination.run_tour_destination(
-        tours,
+        joint_tours,
         persons_merged,
         want_logsums,
         want_sample_table,
diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py
index 521f49c47c..b9a6db1fee 100644
--- a/activitysim/abm/models/non_mandatory_tour_frequency.py
+++ b/activitysim/abm/models/non_mandatory_tour_frequency.py
@@ -345,13 +345,14 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i
 
         # make sure they created the right tours
         survey_tours = estimation.manager.get_survey_table("tours").sort_index()
-        non_mandatory_survey_tours = survey_tours[
-            survey_tours.tour_category == "non_mandatory"
-        ]
-        assert len(non_mandatory_survey_tours) == len(non_mandatory_tours)
-        assert non_mandatory_survey_tours.index.equals(
-            non_mandatory_tours.sort_index().index
-        )
+        # FIXME below check needs to remove the pure-escort tours from the survey tours table
+        # non_mandatory_survey_tours = survey_tours[
+        #     survey_tours.tour_category == "non_mandatory"
+        # ]
+        # assert len(non_mandatory_survey_tours) == len(non_mandatory_tours)
+        # assert non_mandatory_survey_tours.index.equals(
+        #     non_mandatory_tours.sort_index().index
+        # )
 
         # make sure they created tours with the expected tour_ids
         columns = ["person_id", "household_id", "tour_type", "tour_category"]
diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
index 13e09e08b0..319c0c0e54 100644
--- a/activitysim/abm/models/school_escorting.py
+++ b/activitysim/abm/models/school_escorting.py
@@ -511,7 +511,7 @@ def school_escorting(
 
     escort_bundles = pd.concat(escort_bundles)
 
-    # possible to get
+    # Only want to create bundles and tours and trips if at least one household has school escorting
     if len(escort_bundles) > 0:
         escort_bundles["bundle_id"] = (
             escort_bundles["household_id"] * 10

From 7354f7a0104a564c8e7f3f06e06be69d810f431b Mon Sep 17 00:00:00 2001
From: Joel Freedman <freedmnj@gmail.com>
Date: Wed, 17 May 2023 22:06:37 -0700
Subject: [PATCH 23/58] update simple_simulate.py to include
 work_from_home_model estimation functionality

---
 activitysim/estimation/larch/simple_simulate.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py
index 0ee055fe6c..940447042d 100644
--- a/activitysim/estimation/larch/simple_simulate.py
+++ b/activitysim/estimation/larch/simple_simulate.py
@@ -230,6 +230,21 @@ def free_parking_model(
         },  # True is free parking, False is paid parking, names match spec positions
     )
 
+def work_from_home_model(
+    name="work_from_home",
+    edb_directory="output/estimation_data_bundle/{name}/",
+    return_data=False,
+):
+    return simple_simulate_model(
+        name=name,
+        edb_directory=edb_directory,
+        return_data=return_data,
+        choices={
+            True: 1,
+            False: 2,
+        },  # True is work from home, false is does not work from home, names match spec positions
+    )
+
 
 def mandatory_tour_frequency_model(
     name="mandatory_tour_frequency",

From 5847bda77735c70203b78926154c3574dfa92f2c Mon Sep 17 00:00:00 2001
From: Joel Freedman <freedmnj@gmail.com>
Date: Thu, 18 May 2023 13:39:24 -0700
Subject: [PATCH 24/58] Added code for
 external_non_mandatory_identification_model estimation to simple_simulate.py

---
 activitysim/estimation/larch/simple_simulate.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py
index 940447042d..f5504f1f47 100644
--- a/activitysim/estimation/larch/simple_simulate.py
+++ b/activitysim/estimation/larch/simple_simulate.py
@@ -310,3 +310,19 @@ def joint_tour_participation_model(
             1: 2,  # 1 means not participate, alternative 2
         },
     )
+
+def external_non_mandatory_identification_model(
+    name="external_non_mandatory_identification",
+    edb_directory="output/estimation_data_bundle/{name}/",
+    return_data=False,
+):
+    return simple_simulate_model(
+        name=name,
+        edb_directory=edb_directory,
+        return_data=return_data,
+        values_index_col="tour_id",
+        choices={
+            0: 1,  # 0 means external, alternative 1
+            1: 2,  # 1 means not external, alternative 2
+        },
+    )

From 88fe4a7373b51825543c9c600bfe2862cc3f243a Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Wed, 21 Jun 2023 11:31:34 -0700
Subject: [PATCH 25/58] added landuse to trip dest locals_d

---
 activitysim/abm/models/trip_destination.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py
index 545cfee29f..01554d984d 100644
--- a/activitysim/abm/models/trip_destination.py
+++ b/activitysim/abm/models/trip_destination.py
@@ -134,6 +134,7 @@ def _destination_sample(
             "size_terms": size_term_matrix,
             "size_terms_array": size_term_matrix.df.to_numpy(),
             "timeframe": "trip",
+            "land_use": inject.get_table("land_use").to_frame(),
         }
     )
     locals_dict.update(skims)
@@ -829,6 +830,7 @@ def trip_destination_simulate(
             "size_terms": size_term_matrix,
             "size_terms_array": size_term_matrix.df.to_numpy(),
             "timeframe": "trip",
+            "land_use": inject.get_table("land_use").to_frame(),
         }
     )
     locals_dict.update(skims)

From 90fce8d8696ce9c0224e19f597c932e241b30c82 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Mon, 3 Jul 2023 18:00:26 -0700
Subject: [PATCH 26/58] adding transit pass estimation functions

---
 .../estimation/larch/simple_simulate.py       | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py
index f5504f1f47..9681e28875 100644
--- a/activitysim/estimation/larch/simple_simulate.py
+++ b/activitysim/estimation/larch/simple_simulate.py
@@ -326,3 +326,36 @@ def external_non_mandatory_identification_model(
             1: 2,  # 1 means not external, alternative 2
         },
     )
+
+
+def transit_pass_subsidy_model(
+    name="transit_pass_subsidy",
+    edb_directory="output/estimation_data_bundle/{name}/",
+    return_data=False,
+):
+    print("transit pass subsidy")
+    return simple_simulate_model(
+        name=name,
+        edb_directory=edb_directory,
+        return_data=return_data,
+        choices={
+            0: 1, # 0 means no subsidy, alternative 1
+            1: 2, # 1 means subsidy, alternative 2
+        },
+    )
+
+
+def transit_pass_ownership_model(
+    name="transit_pass_ownership",
+    edb_directory="output/estimation_data_bundle/{name}/",
+    return_data=False,
+):
+    return simple_simulate_model(
+        name=name,
+        edb_directory=edb_directory,
+        return_data=return_data,
+        choices={
+            0: 1, # 0 means no pass, alternative 1
+            1: 2, # 1 means pass, alternative 2
+        },
+    )

From 901e2fcaee6650a2e47e33a1109339c61c523e8a Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Tue, 4 Jul 2023 11:35:16 -0700
Subject: [PATCH 27/58] transponder ownership estimation call

---
 activitysim/estimation/larch/simple_simulate.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py
index 9681e28875..87ec5594c8 100644
--- a/activitysim/estimation/larch/simple_simulate.py
+++ b/activitysim/estimation/larch/simple_simulate.py
@@ -359,3 +359,18 @@ def transit_pass_ownership_model(
             1: 2, # 1 means pass, alternative 2
         },
     )
+
+def transponder_ownership_model(
+    name="transponder_ownership",
+    edb_directory="output/estimation_data_bundle/{name}/",
+    return_data=False,
+):
+    return simple_simulate_model(
+        name=name,
+        edb_directory=edb_directory,
+        return_data=return_data,
+        choices={
+            0: 1, # 0 means no pass, alternative 1
+            1: 2, # 1 means pass, alternative 2
+        },
+    )
\ No newline at end of file

From c5c0d80789520948bbf577a423414eaf5e41a867 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Fri, 14 Jul 2023 17:14:40 -0700
Subject: [PATCH 28/58] estimation stop frequency improvements

---
 activitysim/abm/models/stop_frequency.py       | 2 +-
 activitysim/estimation/larch/stop_frequency.py | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/activitysim/abm/models/stop_frequency.py b/activitysim/abm/models/stop_frequency.py
index 94a208075f..4e05fe8749 100644
--- a/activitysim/abm/models/stop_frequency.py
+++ b/activitysim/abm/models/stop_frequency.py
@@ -199,7 +199,7 @@ def stop_frequency(
             print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}")
             different = True
         trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index)]
-        if len(survey_trips_not_in_trips) > 0:
+        if len(trips_not_in_survey_trips) > 0:
             print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}")
             different = True
         assert not different
diff --git a/activitysim/estimation/larch/stop_frequency.py b/activitysim/estimation/larch/stop_frequency.py
index c572af5e87..e580491e02 100644
--- a/activitysim/estimation/larch/stop_frequency.py
+++ b/activitysim/estimation/larch/stop_frequency.py
@@ -42,8 +42,7 @@ def stop_frequency_data(
         seg_purpose = seg_["primary_purpose"]
         seg_subdir = Path(os.path.join(edb_directory, seg_purpose))
         segment_coef[seg_["primary_purpose"]] = pd.read_csv(
-            seg_subdir / seg_["COEFFICIENTS"],
-            index_col="coefficient_name",
+            seg_subdir / seg_["COEFFICIENTS"], index_col="coefficient_name", comment="#"
         )
 
     for seg in segments:
@@ -89,13 +88,13 @@ def stop_frequency_data(
         seg_purpose = seg["primary_purpose"]
         seg_subdir = Path(os.path.join(edb_directory, seg_purpose))
         coeffs_ = pd.read_csv(
-            seg_subdir / seg["COEFFICIENTS"], index_col="coefficient_name"
+            seg_subdir / seg["COEFFICIENTS"], index_col="coefficient_name", comment="#"
         )
         coeffs_.index = pd.Index(
             [f"{i}_{seg_purpose}" for i in coeffs_.index], name="coefficient_name"
         )
         seg_coefficients.append(coeffs_)
-        spec = pd.read_csv(seg_subdir / "stop_frequency_SPEC_.csv")
+        spec = pd.read_csv(seg_subdir / "stop_frequency_SPEC_.csv", comment="#")
         spec = remove_apostrophes(spec, ["Label"])
         # spec.iloc[:, 3:] = spec.iloc[:, 3:].applymap(lambda x: f"{x}_{seg_purpose}" if not pd.isna(x) else x)
         seg_spec.append(spec)

From c0fdfde8eb866d55897f45f3cb8e131fa50f1c78 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Wed, 19 Jul 2023 12:31:52 -0700
Subject: [PATCH 29/58] sampling location choice in estimation mode, reindex
 and landuse available

---
 activitysim/abm/models/location_choice.py | 39 +++++++++++++++++------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
index 8470368708..7027c87dd8 100644
--- a/activitysim/abm/models/location_choice.py
+++ b/activitysim/abm/models/location_choice.py
@@ -17,6 +17,7 @@
 )
 from activitysim.core.interaction_sample import interaction_sample
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
+from activitysim.core.util import reindex
 
 from .util import estimation
 from .util import logsums as logsum
@@ -138,15 +139,8 @@ def _location_sample(
     logger.info("Running %s with %d persons" % (trace_label, len(choosers.index)))
 
     sample_size = model_settings["SAMPLE_SIZE"]
-    if config.setting("disable_destination_sampling", False) or (
-        estimator and estimator.want_unsampled_alternatives
-    ):
-        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
-        logger.info(
-            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
-            % (trace_label,)
-        )
-        sample_size = 0
+    if estimator:
+        sample_size = model_settings.get('ESTIMATION_SAMPLE_SIZE', 0)
 
     locals_d = {
         "skims": skims,
@@ -154,6 +148,8 @@ def _location_sample(
         "orig_col_name": skims.orig_key,  # added for sharrow flows
         "dest_col_name": skims.dest_key,  # added for sharrow flows
         "timeframe": "timeless",
+        "reindex": reindex,
+        "land_use": inject.get_table("land_use").to_frame(),
     }
     constants = config.get_model_constants(model_settings)
     locals_d.update(constants)
@@ -470,6 +466,29 @@ def run_location_sample(
             trace_label=trace_label,
         )
 
+    # FIXME temporary code to ensure sampled alternative is in choices for estimation
+    # Hack to get shorter run times when you don't care about creating EDB for location choice models
+    if estimator:
+        # grabbing survey values
+        survey_persons = estimation.manager.get_survey_table("persons")
+        if 'school_location' in trace_label:
+            survey_choices = survey_persons['school_zone_id'].reset_index()
+        elif ('workplace_location' in trace_label) and ('external' not in trace_label):
+            survey_choices = survey_persons['workplace_zone_id'].reset_index()
+        else:
+            return choices
+        survey_choices.columns = ['person_id', 'alt_dest']
+        survey_choices = survey_choices[survey_choices['person_id'].isin(choices.index) & (survey_choices.alt_dest > 0)]
+        # merging survey destination into table if not available
+        joined_data = survey_choices.merge(choices, on=['person_id', 'alt_dest'], how='left', indicator=True)
+        missing_rows = joined_data[joined_data['_merge'] == 'left_only']
+        missing_rows['pick_count'] = 1
+        if len(missing_rows) > 0:
+            new_choices = missing_rows[['person_id', 'alt_dest', 'prob', 'pick_count']].set_index('person_id')
+            choices = choices.append(new_choices, ignore_index=False).sort_index()
+            # making probability the mean of all other sampled destinations by person
+            choices['prob'] = choices['prob'].fillna(choices.groupby('person_id')['prob'].transform('mean'))
+
     return choices
 
 
@@ -601,6 +620,8 @@ def run_location_simulate(
         "orig_col_name": skims.orig_key,  # added for sharrow flows
         "dest_col_name": skims.dest_key,  # added for sharrow flows
         "timeframe": "timeless",
+        "reindex": reindex,
+        "land_use": inject.get_table("land_use").to_frame(),
     }
     constants = config.get_model_constants(model_settings)
     if constants is not None:

From a25c891acff8afe4462d047e1912747321ea1481 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Wed, 26 Jul 2023 22:45:50 -0700
Subject: [PATCH 30/58] cdap larch updates

---
 activitysim/estimation/larch/cdap.py | 143 ++++++++++++++++++++++++---
 1 file changed, 127 insertions(+), 16 deletions(-)

diff --git a/activitysim/estimation/larch/cdap.py b/activitysim/estimation/larch/cdap.py
index fdb801de03..ee96ee5aab 100644
--- a/activitysim/estimation/larch/cdap.py
+++ b/activitysim/estimation/larch/cdap.py
@@ -19,8 +19,10 @@
 
 _logger = logging.getLogger(logger_name)
 
+MAX_HHSIZE = 5
 
-def generate_alternatives(n_persons):
+
+def generate_alternatives(n_persons, add_joint=False):
     """
     Generate a dictionary of CDAP alternatives.
 
@@ -39,8 +41,14 @@ def generate_alternatives(n_persons):
     alt_names = list(
         "".join(i) for i in itertools.product(basic_patterns, repeat=n_persons)
     )
+    if add_joint:
+        pattern = r"[MN]"
+        joint_alts = [
+            alt + "J" for alt in alt_names if len(re.findall(pattern, alt)) >= 2
+        ]
+        alt_names = alt_names + joint_alts
     alt_codes = np.arange(1, len(alt_names) + 1)
-    return Dict(zip(alt_names, alt_codes))
+    return dict(zip(alt_names, alt_codes))
 
 
 def apply_replacements(expression, prefix, tokens):
@@ -67,7 +75,9 @@ def apply_replacements(expression, prefix, tokens):
     return expression
 
 
-def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()):
+def cdap_base_utility_by_person(
+    model, n_persons, spec, alts=None, value_tokens=(), add_joint=False
+):
     """
     Build the base utility by person for each pattern.
 
@@ -100,7 +110,7 @@ def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=
                 model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"])
     else:
         if alts is None:
-            alts = generate_alternatives(n_persons)
+            alts = generate_alternatives(n_persons, add_joint)
         person_numbers = range(1, n_persons + 1)
         for pnum in person_numbers:
             for i in spec.index:
@@ -220,13 +230,71 @@ def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficie
                                 model.utility_co[anum] += linear_component
 
 
-def cdap_split_data(households, values):
+def cdap_joint_tour_utility(model, n_persons, alts, joint_coef, values):
+    """
+    FIXME: Not fully implemented!!!!
+
+    Code is adapted from the cdap model in ActivitySim with the joint tour component
+    Structure is pretty much in place, but dependencies need to be filtered out.
+    """
+
+    for row in joint_coef.itertuples():
+        for aname, anum in alts.items():
+            # only adding joint tour utility to alternatives with joint tours
+            if "J" not in aname:
+                continue
+            expression = row.Expression
+            dependency_name = row.dependency
+            coefficient = row.coefficient
+
+            # dealing with dependencies
+            if dependency_name in ["M_px", "N_px", "H_px"]:
+                if "_pxprod" in expression:
+                    prod_conds = row.Expression.split("|")
+                    expanded_expressions = [
+                        tup
+                        for tup in itertools.product(
+                            range(len(prod_conds)), repeat=n_persons
+                        )
+                    ]
+                    for expression_tup in expanded_expressions:
+                        expression_list = []
+                        dependency_list = []
+                        for counter in range(len(expression_tup)):
+                            expression_list.append(
+                                prod_conds[expression_tup[counter]].replace(
+                                    "xprod", str(counter + 1)
+                                )
+                            )
+                            if expression_tup[counter] == 0:
+                                dependency_list.append(
+                                    dependency_name.replace("x", str(counter + 1))
+                                )
+
+                        expression_value = "&".join(expression_list)
+                        # FIXME only apply to alternative if dependency satisfied
+                        bug
+                        model.utility_co[anum] += X(expression_value) * P(coefficient)
+
+                elif "_px" in expression:
+                    for pnum in range(1, n_persons + 1):
+                        dependency_name = row.dependency.replace("x", str(pnum))
+                        expression = row.Expression.replace("x", str(pnum))
+                        # FIXME only apply to alternative if dependency satisfied
+                        bug
+                        model.utility_co[anum] += X(expression) * P(coefficient)
+
+            else:
+                model.utility_co[anum] += X(expression) * P(coefficient)
+
+
+def cdap_split_data(households, values, add_joint):
     if "cdap_rank" not in values:
         raise ValueError("assign cdap_rank to values first")
     # only process the first 5 household members
-    values = values[values.cdap_rank <= 5]
+    values = values[values.cdap_rank <= MAX_HHSIZE]
     cdap_data = {}
-    for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, 5)):
+    for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, MAX_HHSIZE)):
         if hhsize == 1:
             v = pd.merge(values, hhs_part.household_id, on="household_id").set_index(
                 "household_id"
@@ -239,16 +307,31 @@ def cdap_split_data(households, values):
             )
             v.columns = [f"p{i[1]}_{i[0]}" for i in v.columns]
             for agglom in ["override_choice", "model_choice"]:
-                v[agglom] = v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]].sum(1)
+                v[agglom] = (
+                    v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]]
+                    .fillna("H")
+                    .sum(1)
+                )
+                if add_joint:
+                    joint_tour_indicator = (
+                        hhs_part.set_index("household_id")
+                        .reindex(v.index)
+                        .has_joint_tour
+                    )
+                    pd.testing.assert_index_equal(v.index, joint_tour_indicator.index)
+                    v[agglom] = np.where(
+                        joint_tour_indicator == 1, v[agglom] + "J", v[agglom]
+                    )
         cdap_data[hhsize] = v
+
     return cdap_data
 
 
-def cdap_dataframes(households, values):
-    data = cdap_split_data(households, values)
+def cdap_dataframes(households, values, add_joint):
+    data = cdap_split_data(households, values, add_joint)
     dfs = {}
     for hhsize in data.keys():
-        alts = generate_alternatives(hhsize)
+        alts = generate_alternatives(hhsize, add_joint)
         dfs[hhsize] = DataFrames(
             co=data[hhsize],
             alt_names=alts.keys(),
@@ -296,6 +379,7 @@ def cdap_data(
     spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
     settings_file="{name}_model_settings.yaml",
     chooser_data_file="{name}_values_combined.csv",
+    joint_coeffs_file="{name}_joint_tour_coefficients.csv",
 ):
     edb_directory = edb_directory.format(name=name)
     if not os.path.exists(edb_directory):
@@ -326,7 +410,7 @@ def read_yaml(filename, **kwargs):
     if person_type_map is None:
         raise KeyError("PERSON_TYPE_MAP missing from cdap_settings.yaml")
 
-    person_rank = cdap.assign_cdap_rank(persons, person_type_map)
+    # person_rank = cdap.assign_cdap_rank(persons, person_type_map)
 
     coefficients = read_csv(
         coefficients_file,
@@ -341,9 +425,28 @@ def read_yaml(filename, **kwargs):
         comment="#",
     )
 
+    try:
+        joint_coef = read_csv(
+            joint_coeffs_file,
+            # dtype={"interaction_ptypes": str},
+            # keep_default_na=False,
+            comment="#",
+        )
+        add_joint = True
+    except FileNotFoundError:
+        joint_coef = None
+        add_joint = False
+    print("Including joint tour utiltiy?:", add_joint)
+
     spec1 = read_csv(spec1_file, comment="#")
     values = read_csv(chooser_data_file, comment="#")
-    values["cdap_rank"] = person_rank
+    person_rank = cdap.assign_cdap_rank(
+        persons[persons.household_id.isin(values.household_id)]
+        .set_index("person_id")
+        .reindex(values.person_id),
+        person_type_map,
+    )
+    values["cdap_rank"] = person_rank.values
 
     return Dict(
         edb_directory=Path(edb_directory),
@@ -353,6 +456,8 @@ def read_yaml(filename, **kwargs):
         coefficients=coefficients,
         households=hhs,
         settings=settings,
+        joint_coef=joint_coef,
+        add_joint=add_joint,
     )
 
 
@@ -365,6 +470,7 @@ def cdap_model(
     spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
     settings_file="{name}_model_settings.yaml",
     chooser_data_file="{name}_values_combined.csv",
+    joint_coeffs_file="{name}_joint_tour_coefficients.csv",
     return_data=False,
 ):
     d = cdap_data(
@@ -377,6 +483,7 @@ def cdap_model(
         spec1_file=spec1_file,
         settings_file=settings_file,
         chooser_data_file=chooser_data_file,
+        joint_coeffs_file=joint_coeffs_file,
     )
 
     households = d.households
@@ -384,8 +491,9 @@ def cdap_model(
     spec1 = d.spec1
     interaction_coef = d.interaction_coef
     coefficients = d.coefficients
+    add_joint = d.add_joint
 
-    cdap_dfs = cdap_dataframes(households, values)
+    cdap_dfs = cdap_dataframes(households, values, add_joint)
     m = {}
     _logger.info(f"building for model 1")
     m[1] = Model(dataservice=cdap_dfs[1])
@@ -398,12 +506,15 @@ def cdap_model(
         interaction_coef["cardinality"] = interaction_coef[
             "interaction_ptypes"
         ].str.len()
-    for s in [2, 3, 4, 5]:
+    for s in range(2, MAX_HHSIZE + 1):
+        # for s in [2, 3, 4, 5]:
         _logger.info(f"building for model {s}")
         m[s] = Model(dataservice=cdap_dfs[s])
-        alts = generate_alternatives(s)
+        alts = generate_alternatives(s, add_joint)
         cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns)
         cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients)
+        # if add_joint:
+        #     cdap_joint_tour_utility(m[s], s, alts, d.joint_coef, values)
         m[s].choice_any = True
         m[s].availability_any = True
 

From 851ba2189ca034255dc21cde502384de20610568 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Wed, 26 Jul 2023 22:49:29 -0700
Subject: [PATCH 31/58] blacken

---
 activitysim/abm/models/cdap.py                |  2 +-
 activitysim/abm/models/location_choice.py     | 33 ++++++++++++-------
 activitysim/estimation/larch/__init__.py      |  1 +
 .../estimation/larch/location_choice.py       |  2 ++
 .../estimation/larch/simple_simulate.py       | 17 ++++++----
 5 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
index 761c5c970e..f41bea9eee 100644
--- a/activitysim/abm/models/cdap.py
+++ b/activitysim/abm/models/cdap.py
@@ -190,7 +190,7 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id):
         estimator.write_choices(choices)
         choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
         if add_joint_tour_utility:
-            hh_joint.index.name = 'household_id'
+            hh_joint.index.name = "household_id"
             hh_joint = estimator.get_survey_values(
                 hh_joint, "households", "has_joint_tour"
             )
diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
index 7027c87dd8..7e9a76d551 100644
--- a/activitysim/abm/models/location_choice.py
+++ b/activitysim/abm/models/location_choice.py
@@ -140,7 +140,7 @@ def _location_sample(
 
     sample_size = model_settings["SAMPLE_SIZE"]
     if estimator:
-        sample_size = model_settings.get('ESTIMATION_SAMPLE_SIZE', 0)
+        sample_size = model_settings.get("ESTIMATION_SAMPLE_SIZE", 0)
 
     locals_d = {
         "skims": skims,
@@ -471,23 +471,32 @@ def run_location_sample(
     if estimator:
         # grabbing survey values
         survey_persons = estimation.manager.get_survey_table("persons")
-        if 'school_location' in trace_label:
-            survey_choices = survey_persons['school_zone_id'].reset_index()
-        elif ('workplace_location' in trace_label) and ('external' not in trace_label):
-            survey_choices = survey_persons['workplace_zone_id'].reset_index()
+        if "school_location" in trace_label:
+            survey_choices = survey_persons["school_zone_id"].reset_index()
+        elif ("workplace_location" in trace_label) and ("external" not in trace_label):
+            survey_choices = survey_persons["workplace_zone_id"].reset_index()
         else:
             return choices
-        survey_choices.columns = ['person_id', 'alt_dest']
-        survey_choices = survey_choices[survey_choices['person_id'].isin(choices.index) & (survey_choices.alt_dest > 0)]
+        survey_choices.columns = ["person_id", "alt_dest"]
+        survey_choices = survey_choices[
+            survey_choices["person_id"].isin(choices.index)
+            & (survey_choices.alt_dest > 0)
+        ]
         # merging survey destination into table if not available
-        joined_data = survey_choices.merge(choices, on=['person_id', 'alt_dest'], how='left', indicator=True)
-        missing_rows = joined_data[joined_data['_merge'] == 'left_only']
-        missing_rows['pick_count'] = 1
+        joined_data = survey_choices.merge(
+            choices, on=["person_id", "alt_dest"], how="left", indicator=True
+        )
+        missing_rows = joined_data[joined_data["_merge"] == "left_only"]
+        missing_rows["pick_count"] = 1
         if len(missing_rows) > 0:
-            new_choices = missing_rows[['person_id', 'alt_dest', 'prob', 'pick_count']].set_index('person_id')
+            new_choices = missing_rows[
+                ["person_id", "alt_dest", "prob", "pick_count"]
+            ].set_index("person_id")
             choices = choices.append(new_choices, ignore_index=False).sort_index()
             # making probability the mean of all other sampled destinations by person
-            choices['prob'] = choices['prob'].fillna(choices.groupby('person_id')['prob'].transform('mean'))
+            choices["prob"] = choices["prob"].fillna(
+                choices.groupby("person_id")["prob"].transform("mean")
+            )
 
     return choices
 
diff --git a/activitysim/estimation/larch/__init__.py b/activitysim/estimation/larch/__init__.py
index d5ff3d0921..a175db6e7d 100644
--- a/activitysim/estimation/larch/__init__.py
+++ b/activitysim/estimation/larch/__init__.py
@@ -11,6 +11,7 @@
 from .stop_frequency import *
 from .external_worker_identification import *
 
+
 def component_model(name, *args, **kwargs):
     if isinstance(name, str):
         m = globals().get(f"{name}_model")
diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py
index 64aa471410..41e2f78e63 100644
--- a/activitysim/estimation/larch/location_choice.py
+++ b/activitysim/estimation/larch/location_choice.py
@@ -330,6 +330,7 @@ def workplace_location_model(**kwargs):
         **kwargs,
     )
 
+
 def external_workplace_location_model(**kwargs):
     unused = kwargs.pop("name", None)
     return location_choice_model(
@@ -337,6 +338,7 @@ def external_workplace_location_model(**kwargs):
         **kwargs,
     )
 
+
 def school_location_model(**kwargs):
     unused = kwargs.pop("name", None)
     return location_choice_model(
diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py
index 87ec5594c8..843311da17 100644
--- a/activitysim/estimation/larch/simple_simulate.py
+++ b/activitysim/estimation/larch/simple_simulate.py
@@ -230,6 +230,7 @@ def free_parking_model(
         },  # True is free parking, False is paid parking, names match spec positions
     )
 
+
 def work_from_home_model(
     name="work_from_home",
     edb_directory="output/estimation_data_bundle/{name}/",
@@ -311,6 +312,7 @@ def joint_tour_participation_model(
         },
     )
 
+
 def external_non_mandatory_identification_model(
     name="external_non_mandatory_identification",
     edb_directory="output/estimation_data_bundle/{name}/",
@@ -339,8 +341,8 @@ def transit_pass_subsidy_model(
         edb_directory=edb_directory,
         return_data=return_data,
         choices={
-            0: 1, # 0 means no subsidy, alternative 1
-            1: 2, # 1 means subsidy, alternative 2
+            0: 1,  # 0 means no subsidy, alternative 1
+            1: 2,  # 1 means subsidy, alternative 2
         },
     )
 
@@ -355,11 +357,12 @@ def transit_pass_ownership_model(
         edb_directory=edb_directory,
         return_data=return_data,
         choices={
-            0: 1, # 0 means no pass, alternative 1
-            1: 2, # 1 means pass, alternative 2
+            0: 1,  # 0 means no pass, alternative 1
+            1: 2,  # 1 means pass, alternative 2
         },
     )
 
+
 def transponder_ownership_model(
     name="transponder_ownership",
     edb_directory="output/estimation_data_bundle/{name}/",
@@ -370,7 +373,7 @@ def transponder_ownership_model(
         edb_directory=edb_directory,
         return_data=return_data,
         choices={
-            0: 1, # 0 means no pass, alternative 1
-            1: 2, # 1 means pass, alternative 2
+            0: 1,  # 0 means no pass, alternative 1
+            1: 2,  # 1 means pass, alternative 2
         },
-    )
\ No newline at end of file
+    )

From cb4a0cbb68a800e5362f0367de570459530dffc6 Mon Sep 17 00:00:00 2001
From: Joel Freedman <freedmnj@gmail.com>
Date: Mon, 14 Aug 2023 15:53:50 -0700
Subject: [PATCH 32/58] added estimation functionality for
 external_non_mandatory_destination model

---
 activitysim/estimation/larch/location_choice.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py
index 41e2f78e63..7c88531884 100644
--- a/activitysim/estimation/larch/location_choice.py
+++ b/activitysim/estimation/larch/location_choice.py
@@ -374,6 +374,12 @@ def non_mandatory_tour_destination_model(**kwargs):
         **kwargs,
     )
 
+def external_non_mandatory_destination_model(**kwargs):
+    unused = kwargs.pop("name", None)
+    return location_choice_model(
+        name="external_non_mandatory_destination",
+        **kwargs,
+    )
 
 def trip_destination_model(**kwargs):
     unused = kwargs.pop("name", None)

From 7ac64a10e4d2fe07e3f3cc3c4b2bca295945eaf3 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 17 Aug 2023 14:19:52 -0700
Subject: [PATCH 33/58] nmtf estimation & enhancements

---
 .../models/non_mandatory_tour_frequency.py    |  10 +-
 activitysim/abm/models/util/overlap.py        |  93 ++++++++++++++++
 activitysim/core/config.py                    |   2 +-
 .../estimation/larch/nonmand_tour_freq.py     | 102 +++++++++++++++++-
 4 files changed, 202 insertions(+), 5 deletions(-)

diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py
index b9a6db1fee..a76a3650bf 100644
--- a/activitysim/abm/models/non_mandatory_tour_frequency.py
+++ b/activitysim/abm/models/non_mandatory_tour_frequency.py
@@ -20,7 +20,7 @@
 from .util import annotate
 from .util.school_escort_tours_trips import recompute_tour_count_statistics
 
-from .util.overlap import person_max_window
+from .util.overlap import person_max_window, person_available_periods
 from .util.tour_frequency import process_non_mandatory_tours
 
 logger = logging.getLogger(__name__)
@@ -166,7 +166,10 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i
     preprocessor_settings = model_settings.get("preprocessor", None)
     if preprocessor_settings:
 
-        locals_dict = {"person_max_window": person_max_window}
+        locals_dict = {
+            "person_max_window": person_max_window,
+            "person_available_periods": person_available_periods,
+        }
 
         expressions.assign_columns(
             df=choosers,
@@ -259,6 +262,9 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i
 
         choices_list.append(choices)
 
+    # FIXME only want to keep actual purposes, adding cols in alts will mess this up
+    # this is complicated by canonical_ids calculated based on alts if not specified explicitly
+    # thus, adding column to input alts will change IDs and break estimation mode....
     del alternatives["tot_tours"]  # del tot_tours column we added above
 
     # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate
diff --git a/activitysim/abm/models/util/overlap.py b/activitysim/abm/models/util/overlap.py
index 70fadfbd43..f7773a14a3 100644
--- a/activitysim/abm/models/util/overlap.py
+++ b/activitysim/abm/models/util/overlap.py
@@ -250,3 +250,96 @@ def person_max_window(persons):
     max_window.index = persons.index
 
     return max_window
+
+
+def calculate_consecutive(array):
+    # Append zeros columns at either sides of counts
+    append1 = np.zeros((array.shape[0], 1), dtype=int)
+    array_ext = np.column_stack((append1, array, append1))
+
+    # Get start and stop indices with 1s as triggers
+    diffs = np.diff((array_ext == 1).astype(int), axis=1)
+    starts = np.argwhere(diffs == 1)
+    stops = np.argwhere(diffs == -1)
+
+    # Get intervals using differences between start and stop indices
+    intvs = stops[:, 1] - starts[:, 1]
+
+    # Store intervals as a 2D array for further vectorized ops to make.
+    c = np.bincount(starts[:, 0])
+    mask = np.arange(c.max()) < c[:, None]
+    intvs2D = mask.astype(float)
+    intvs2D[mask] = intvs
+
+    # Get max along each row as final output
+    out = intvs2D.max(1).astype(int)
+    return out
+
+
+def person_available_periods(persons, start_bin=None, end_bin=None, continuous=False):
+    """
+    Returns the number of available time period bins foreach person in persons.
+    Can limit the calculation to include starting and/or ending bins.
+    Can return either the total number of available time bins with continuous = True,
+    or only the maximum
+
+    This is equivalent to person_max_window if no start/end bins provided and continous=True
+
+    time bins are inclusive, i.e. [start_bin, end_bin]
+
+    e.g.
+    available out of timetable has dummy first and last bins
+    available = [
+        [1,1,1,1,1,1,1,1,1,1,1,1],
+        [1,1,0,1,1,0,0,1,0,1,0,1],
+        #-,0,1,2,3,4,5,6,7,8,9,-  time bins
+    ]
+    returns:
+    for start_bin=None, end_bin=None, continuous=False: (10, 5)
+    for start_bin=None, end_bin=None, continuous=True: (10, 2)
+    for start_bin=5, end_bin=9, continuous=False: (5, 2)
+    for start_bin=5, end_bin=9, continuous=True: (5, 1)
+
+
+    Parameters
+    ----------
+    start_bin : (int) starting time bin to include starting from 0
+    end_bin : (int) ending time bin to include
+    continuous : (bool) count all available bins if false or just largest continuous run if True
+
+    Returns
+    -------
+    pd.Series of the number of available time bins indexed by person ID
+    """
+    timetable = inject.get_injectable("timetable")
+
+    # ndarray with one row per person and one column per time period
+    # array value of 1 where free periods and 0 elsewhere
+    s = pd.Series(persons.index.values, index=persons.index)
+
+    # first and last bins are dummys in the time table
+    # so if you have 48 half hour time periods, shape is (len(persons), 50)
+    available = timetable.individually_available(s)
+
+    # Create a mask to exclude bins before the starting bin and after the ending bin
+    mask = np.ones(available.shape[1], dtype=bool)
+    mask[0] = False
+    mask[len(mask) - 1] = False
+    if start_bin is not None:
+        # +1 needed due to dummy first bin
+        mask[: start_bin + 1] = False
+    if end_bin is not None:
+        # +2 for dummy first bin and inclusive end_bin
+        mask[end_bin + 2 :] = False
+
+    # Apply the mask to the array
+    masked_array = available[:, mask]
+
+    # Calculate the number of available time periods for each person
+    availability = np.sum(masked_array, axis=1)
+
+    if continuous:
+        availability = calculate_consecutive(masked_array)
+
+    availability = pd.Series(availability, index=persons.index)
+    return availability
diff --git a/activitysim/core/config.py b/activitysim/core/config.py
index 7024d0512b..a86093f3e6 100644
--- a/activitysim/core/config.py
+++ b/activitysim/core/config.py
@@ -708,7 +708,7 @@ def filter_warnings():
     # These warning are left as warnings as an invitation for future enhancement.
     from pandas.errors import PerformanceWarning
 
-    warnings.filterwarnings("default", category=PerformanceWarning)
+    warnings.filterwarnings("ignore", category=PerformanceWarning)
 
     # pandas 1.5
     # beginning in pandas version 1.5, a new warning is emitted when a column is set via iloc
diff --git a/activitysim/estimation/larch/nonmand_tour_freq.py b/activitysim/estimation/larch/nonmand_tour_freq.py
index 9dfdac73a4..ba911dd219 100644
--- a/activitysim/estimation/larch/nonmand_tour_freq.py
+++ b/activitysim/estimation/larch/nonmand_tour_freq.py
@@ -7,6 +7,8 @@
 from larch import DataFrames, Model
 from larch.log import logger_name
 from larch.util import Dict
+import pickle
+from datetime import datetime
 
 from .general import (
     apply_coefficients,
@@ -27,6 +29,7 @@ def interaction_simulate_data(
     coefficients_files="{segment_name}/{name}_coefficients_{segment_name}.csv",
     chooser_data_files="{segment_name}/{name}_choosers_combined.csv",
     alt_values_files="{segment_name}/{name}_interaction_expression_values.csv",
+    segment_subset=[],
 ):
     edb_directory = edb_directory.format(name=name)
 
@@ -46,21 +49,30 @@ def _read_csv(filename, **kwargs):
     alt_values = {}
 
     segment_names = [s["NAME"] for s in settings["SPEC_SEGMENTS"]]
+    if len(segment_subset) > 0:
+        assert set(segment_subset).issubset(
+            set(segment_names)
+        ), f"{segment_subset} is not a subset of {segment_names}"
+        segment_names = segment_subset
 
     for segment_name in segment_names:
+        print(f"Loading EDB for {segment_name} segment")
         coefficients[segment_name] = _read_csv(
             coefficients_files.format(name=name, segment_name=segment_name),
             index_col="coefficient_name",
+            comment="#",
         )
         chooser_data[segment_name] = _read_csv(
             chooser_data_files.format(name=name, segment_name=segment_name),
         )
         alt_values[segment_name] = _read_csv(
             alt_values_files.format(name=name, segment_name=segment_name),
+            comment="#",
         )
 
     spec = _read_csv(
         spec_file,
+        comment="#",
     )
     spec = remove_apostrophes(spec, ["Label"])
     # alt_names = list(spec.columns[3:])
@@ -118,10 +130,80 @@ def unavail(model, x_ca):
     return unav
 
 
+# FIXME move all this to larch/general.py? see ActititySim issue #686
+def _read_feather(filename, name, edb_directory, **kwargs):
+    filename = filename.format(name=name)
+    return pd.read_feather(os.path.join(edb_directory, filename), **kwargs)
+
+
+def _to_feather(df, filename, name, edb_directory, **kwargs):
+    filename = filename.format(name=name)
+    return df.to_feather(os.path.join(edb_directory, filename), **kwargs)
+
+
+def _read_pickle(filename, name, edb_directory, **kwargs):
+    filename = filename.format(name=name)
+    return pd.read_pickle(os.path.join(edb_directory, filename), **kwargs)
+
+
+def _to_pickle(df, filename, name, edb_directory, **kwargs):
+    filename = filename.format(name=name)
+    return df.to_pickle(os.path.join(edb_directory, filename), **kwargs)
+
+
+def _file_exists(filename, name, edb_directory):
+    filename = filename.format(name=name)
+    return os.path.exists(os.path.join(edb_directory, filename))
+
+
+def get_x_ca_df(alt_values, name, edb_directory, num_chunks):
+    def split(a, n):
+        k, m = divmod(len(a), n)
+        return (a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n))
+
+    # process x_ca with cv_to_ca with or without chunking
+    x_ca_pickle_file = "{name}_x_ca.pkl"
+    if num_chunks == 1:
+        x_ca = cv_to_ca(alt_values)
+    elif _file_exists(x_ca_pickle_file, name, edb_directory):
+        # if pickle file from previous x_ca processing exist, load it to save time
+        time_start = datetime.now()
+        x_ca = _read_pickle(x_ca_pickle_file, name, edb_directory)
+        print(
+            f"x_ca data loaded from {name}_x_ca.fea - time elapsed {(datetime.now() - time_start).total_seconds()}"
+        )
+    else:
+        time_start = datetime.now()
+        # calculate num_chunks based on chunking_size (or max number of rows per chunk)
+        chunking_size = round(len(alt_values) / num_chunks, 3)
+        print(
+            f"Using {num_chunks} chunks results in chunk size of {chunking_size} (of {len(alt_values)} total rows)"
+        )
+        all_chunk_ids = list(alt_values.index.get_level_values(0).unique())
+        split_ids = list(split(all_chunk_ids, num_chunks))
+        x_ca_list = []
+        for i, chunk_ids in enumerate(split_ids):
+            alt_values_i = alt_values.loc[chunk_ids]
+            x_ca_i = cv_to_ca(alt_values_i)
+            x_ca_list.append(x_ca_i)
+            print(
+                f"\rx_ca_i compute done for chunk {i+1}/{num_chunks} - time elapsed {(datetime.now() - time_start).total_seconds()}"
+            )
+        x_ca = pd.concat(x_ca_list, axis=0)
+        # save final x_ca result as pickle file to save time for future data loading
+        _to_pickle(x_ca, x_ca_pickle_file, name, edb_directory)
+        print(
+            f"x_ca compute done - time elapsed {(datetime.now() - time_start).total_seconds()}"
+        )
+    return x_ca
+
+
 def nonmand_tour_freq_model(
     edb_directory="output/estimation_data_bundle/{name}/",
     return_data=False,
     condense_parameters=False,
+    segment_subset=[],
+    num_chunks=1,
 ):
     """
     Prepare nonmandatory tour frequency models for estimation.
@@ -141,10 +223,16 @@ def nonmand_tour_freq_model(
     data = interaction_simulate_data(
         name="non_mandatory_tour_frequency",
         edb_directory=edb_directory,
+        segment_subset=segment_subset,
     )
 
     settings = data.settings
     segment_names = [s["NAME"] for s in settings["SPEC_SEGMENTS"]]
+    if len(segment_subset) > 0:
+        assert set(segment_subset).issubset(
+            set(segment_names)
+        ), f"{segment_subset} is not a subset of {segment_names}"
+        segment_names = segment_subset
     if condense_parameters:
         data.relabel_coef = link_same_value_coefficients(
             segment_names, data.coefficients, data.spec
@@ -157,6 +245,7 @@ def nonmand_tour_freq_model(
 
     m = {}
     for segment_name in segment_names:
+        print(f"Creating larch model for {segment_name}")
         segment_model = m[segment_name] = Model()
         # One of the alternatives is coded as 0, so
         # we need to explicitly initialize the MNL nesting graph
@@ -178,11 +267,20 @@ def nonmand_tour_freq_model(
             .set_index("person_id")
             .rename(columns={"TAZ": "HOMETAZ"})
         )
-        x_ca = cv_to_ca(alt_values[segment_name].set_index(["person_id", "variable"]))
+        print("\t performing cv to ca step")
+        # x_ca = cv_to_ca(alt_values[segment_name].set_index(["person_id", "variable"]))
+        x_ca = get_x_ca_df(
+            alt_values=alt_values[segment_name].set_index(["person_id", "variable"]),
+            name=segment_name,
+            edb_directory=edb_directory.format(name="non_mandatory_tour_frequency"),
+            num_chunks=num_chunks,
+        )
+
         d = DataFrames(
             co=x_co,
             ca=x_ca,
-            av=~unavail(segment_model, x_ca),
+            av=True,
+            # av=~unavail(segment_model, x_ca),
         )
         m[segment_name].dataservice = d
 

From 56ec71b69bdbebc6fed12a9ce15cd7b6e8cc4aea Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 17 Aug 2023 14:22:33 -0700
Subject: [PATCH 34/58] assertions in location choice estimation

---
 activitysim/estimation/larch/location_choice.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py
index 41e2f78e63..12bae4e4ee 100644
--- a/activitysim/estimation/larch/location_choice.py
+++ b/activitysim/estimation/larch/location_choice.py
@@ -106,6 +106,9 @@ def _read_csv(filename, **kwargs):
         .set_index("segment")
     )
     size_spec = size_spec.loc[:, size_spec.max() > 0]
+    assert (
+        len(size_spec) > 0
+    ), f"Empty size_spec, is model_selector {SIZE_TERM_SELECTOR} in your size term file?"
 
     size_coef = size_coefficients_from_spec(size_spec)
 
@@ -214,6 +217,9 @@ def _read_csv(filename, **kwargs):
     else:
         av = 1
 
+    assert len(x_co) > 0, "Empty chooser dataframe"
+    assert len(x_ca_1) > 0, "Empty alternatives dataframe"
+
     d = DataFrames(co=x_co, ca=x_ca_1, av=av)
 
     m = Model(dataservice=d)

From 49a20fb1419b75e4f7c4e35d66a7718084d90577 Mon Sep 17 00:00:00 2001
From: dhensle <david.hensle@rsginc.com>
Date: Thu, 17 Aug 2023 14:24:48 -0700
Subject: [PATCH 35/58] blacken

---
 activitysim/estimation/larch/location_choice.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py
index 0035cb53e5..870343f893 100644
--- a/activitysim/estimation/larch/location_choice.py
+++ b/activitysim/estimation/larch/location_choice.py
@@ -380,6 +380,7 @@ def non_mandatory_tour_destination_model(**kwargs):
         **kwargs,
     )
 
+
 def external_non_mandatory_destination_model(**kwargs):
     unused = kwargs.pop("name", None)
     return location_choice_model(
@@ -387,6 +388,7 @@ def external_non_mandatory_destination_model(**kwargs):
         **kwargs,
     )
 
+
 def trip_destination_model(**kwargs):
     unused = kwargs.pop("name", None)
     return location_choice_model(

From 0950b8093196d96032273c7abd68a1599ce770d9 Mon Sep 17 00:00:00 2001
From: Joel Freedman <freedmnj@gmail.com>
Date: Tue, 12 Sep 2023 14:21:37 -0700
Subject: [PATCH 36/58] Added estimation functionality for
 telecommute_frequency_model

---
 .../estimation/larch/simple_simulate.py        | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py
index 843311da17..6538466188 100644
--- a/activitysim/estimation/larch/simple_simulate.py
+++ b/activitysim/estimation/larch/simple_simulate.py
@@ -377,3 +377,21 @@ def transponder_ownership_model(
             1: 2,  # 1 means pass, alternative 2
         },
     )
+    
+def telecommute_frequency_model(
+    name="telecommute_frequency",
+    edb_directory="output/estimation_data_bundle/{name}/",
+    return_data=False,
+):
+    return simple_simulate_model(
+        name=name,
+        edb_directory=edb_directory,
+        return_data=return_data,
+        choices={
+            "No_Telecommute" : 1,	
+            "1_day_week" : 2,	
+            "2_3_days_week" : 3,	
+            "4_days_week" : 4,
+        },
+    )
+

From fad6808a3804c68eb3d9a0a69ac310d8fcef5a5b Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Mon, 25 Sep 2023 15:59:54 -0700
Subject: [PATCH 37/58] Updated trip_matrices.py to not overwrite trip
 destination with parking location and have trip tables read from parking
 location for trips originating at a parking location

---
 activitysim/abm/models/trip_matrices.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py
index 0c9e1f447f..2db8f8c5a4 100644
--- a/activitysim/abm/models/trip_matrices.py
+++ b/activitysim/abm/models/trip_matrices.py
@@ -52,11 +52,23 @@ def write_trip_matrices(network_los):
         parking_settings = config.read_model_settings("parking_location_choice.yaml")
         parking_taz_col_name = parking_settings["ALT_DEST_COL_NAME"]
         if parking_taz_col_name in trips_df:
-            # TODO make parking zone negative, not zero, if not used
+            
+            trips_df["true_origin"] = trips_df["origin"]
+            trips_df["true_destination"] = trips_df["destination"]
+            
+            # Get origin parking zone if vehicle not parked at origin
+            trips_df["origin_parking_zone"] = np.where(
+                trips_df["tour_id"] == trips_df["tour_id"].shift(1),
+                trips_df[parking_taz_col_name].shift(1),
+                -1
+            )
+            
             trips_df.loc[trips_df[parking_taz_col_name] > 0, "destination"] = trips_df[
                 parking_taz_col_name
             ]
-        # Also need address the return trip
+            trips_df.loc[trips_df["origin_parking_zone"] > 0, "origin"] = trips_df[
+                "origin_parking_zone"
+            ]
 
     # write matrices by zone system type
     if network_los.zone_system == los.ONE_ZONE:  # taz trips written to taz matrices
@@ -213,6 +225,12 @@ def write_trip_matrices(network_los):
         write_matrices(
             aggregate_trips, zone_index, orig_index, dest_index, model_settings, True
         )
+        
+    if "parking_location" in config.setting("models"):
+        # Set trip origin and destination to be the actual location the person is and not where their vehicle is parked
+        trips_df["origin"] = trips_df["true_origin"]
+        trips_df["destination"] = trips_df["true_destination"]
+        del trips_df["true_origin"], trips_df["true_destination"]
 
 
 def annotate_trips(trips, network_los, model_settings):

From 5adfc7beb473457dc1978a71747a82a34b668586 Mon Sep 17 00:00:00 2001
From: David Hensle <hensle93@gmail.com>
Date: Thu, 1 Feb 2024 20:29:53 -0800
Subject: [PATCH 38/58] remove alts from alts_long in veh type choice

---
 activitysim/abm/models/vehicle_type_choice.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
index 324e505dda..20d1d58208 100644
--- a/activitysim/abm/models/vehicle_type_choice.py
+++ b/activitysim/abm/models/vehicle_type_choice.py
@@ -222,6 +222,11 @@ def construct_model_alternatives(model_settings, alts_cats_dict, vehicle_type_da
         else:
             # eliminate alternatives if no vehicle type data
             alts_wide = alts_wide[alts_wide._merge != "left_only"]
+            # need to also remove any alts from alts_long
+            alts_long.set_index(['body_type', 'fuel_type', 'age'], inplace=True)
+            alts_long = alts_long[alts_long.index.isin(
+                alts_wide.set_index(['body_type', 'fuel_type', 'age']).index)
+            ].reset_index()
         alts_wide.drop(columns="_merge", inplace=True)
 
     # converting age to integer to allow interactions in utilities

From 15b7f31a3e07d41f2a394651c85ed2a7e46d26fb Mon Sep 17 00:00:00 2001
From: David Hensle <hensle93@gmail.com>
Date: Fri, 2 Feb 2024 17:18:42 -0800
Subject: [PATCH 39/58] logging and correct order for veh_type name

---
 activitysim/abm/models/vehicle_type_choice.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
index 20d1d58208..e6ebf5b200 100644
--- a/activitysim/abm/models/vehicle_type_choice.py
+++ b/activitysim/abm/models/vehicle_type_choice.py
@@ -221,11 +221,17 @@ def construct_model_alternatives(model_settings, alts_cats_dict, vehicle_type_da
             ), f"missing vehicle data for alternatives:\n {missing_alts}"
         else:
             # eliminate alternatives if no vehicle type data
+            num_alts_before_filer = len(alts_wide)
             alts_wide = alts_wide[alts_wide._merge != "left_only"]
+            logger.warning(
+                f"Removed {num_alts_before_filer - len(alts_wide)} alternatives not included in input vehicle type data."
+            )
             # need to also remove any alts from alts_long
-            alts_long.set_index(['body_type', 'fuel_type', 'age'], inplace=True)
-            alts_long = alts_long[alts_long.index.isin(
-                alts_wide.set_index(['body_type', 'fuel_type', 'age']).index)
+            alts_long.set_index(["body_type", "age", "fuel_type"], inplace=True)
+            alts_long = alts_long[
+                alts_long.index.isin(
+                    alts_wide.set_index(["body_type", "age", "fuel_type"]).index
+                )
             ].reset_index()
         alts_wide.drop(columns="_merge", inplace=True)
 

From 2a99bed359ee8307c301da4eb14375644bfb95b3 Mon Sep 17 00:00:00 2001
From: David Hensle <hensle93@gmail.com>
Date: Mon, 5 Feb 2024 16:56:05 -0800
Subject: [PATCH 40/58] correct veh type alts for choices

---
 activitysim/abm/models/vehicle_type_choice.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
index e6ebf5b200..95d4087aa4 100644
--- a/activitysim/abm/models/vehicle_type_choice.py
+++ b/activitysim/abm/models/vehicle_type_choice.py
@@ -233,6 +233,7 @@ def construct_model_alternatives(model_settings, alts_cats_dict, vehicle_type_da
                     alts_wide.set_index(["body_type", "age", "fuel_type"]).index
                 )
             ].reset_index()
+            alts_long.index = alts_wide.index
         alts_wide.drop(columns="_merge", inplace=True)
 
     # converting age to integer to allow interactions in utilities
@@ -427,14 +428,12 @@ def iterate_vehicle_type_choice(
         choices.rename(columns={"choice": "vehicle_type"}, inplace=True)
 
         if alts_cats_dict:
-            alts = (
-                alts_long[alts_long.columns]
-                .apply(lambda row: "_".join(row.values.astype(str)), axis=1)
-                .values
-            )
+            alts = alts_long[alts_long.columns].apply(
+                lambda row: "_".join(row.values.astype(str)), axis=1
+            ).to_dict()
         else:
-            alts = model_spec.columns
-        choices["vehicle_type"] = choices["vehicle_type"].map(dict(enumerate(alts)))
+            alts = enumerate(dict(model_spec.columns))
+        choices["vehicle_type"] = choices["vehicle_type"].map(alts)
 
         # STEP II: append probabilistic vehicle type attributes
         if probs_spec_file is not None:

From 8cccce30d298d77f7699f40f387ebe4f7f2a475d Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Tue, 20 Feb 2024 16:41:34 -0800
Subject: [PATCH 41/58] Updated origin adjustment to parking zone to only
 happen if the trip is an auto mode

---
 activitysim/abm/models/trip_matrices.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py
index 2db8f8c5a4..97a6361fc6 100644
--- a/activitysim/abm/models/trip_matrices.py
+++ b/activitysim/abm/models/trip_matrices.py
@@ -51,6 +51,16 @@ def write_trip_matrices(network_los):
     if "parking_location" in config.setting("models"):
         parking_settings = config.read_model_settings("parking_location_choice.yaml")
         parking_taz_col_name = parking_settings["ALT_DEST_COL_NAME"]
+        auto_nest_name = parking_settings["AUTO_MODE_NEST"]
+
+        # Read trip mode choice settings to get auto modes
+        trip_mode_choice_settings = config.read_model_settings("trip_mode_choice.yaml")
+        trip_mode_choice_nest = config.get_logit_mocel_settings(trip_mode_choice_settings)
+        for alternative in trip_mode_choice_nest["alternatives"]:
+            if alternative["name"] == auto_nest_name:
+                auto_modes = alternative["alternatives"]
+                break
+
         if parking_taz_col_name in trips_df:
             
             trips_df["true_origin"] = trips_df["origin"]
@@ -59,7 +69,11 @@ def write_trip_matrices(network_los):
             # Get origin parking zone if vehicle not parked at origin
             trips_df["origin_parking_zone"] = np.where(
                 trips_df["tour_id"] == trips_df["tour_id"].shift(1),
-                trips_df[parking_taz_col_name].shift(1),
+                np.where(
+                    trip_df["trip_mode"].apply(lambda x: x in auto_modes),
+                    trips_df[parking_taz_col_name].shift(1),
+                    -1
+                    )   
                 -1
             )
             

From 851319dc1019fe3d489bbc062d7fbc165c0fb6b2 Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Tue, 20 Feb 2024 16:45:21 -0800
Subject: [PATCH 42/58] Final trips table now reports origin and destination
 TAZs regardless of parking TAZ

---
 activitysim/abm/models/trip_matrices.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py
index 97a6361fc6..7d592c4e4a 100644
--- a/activitysim/abm/models/trip_matrices.py
+++ b/activitysim/abm/models/trip_matrices.py
@@ -246,6 +246,13 @@ def write_trip_matrices(network_los):
         trips_df["destination"] = trips_df["true_destination"]
         del trips_df["true_origin"], trips_df["true_destination"]
 
+        trips_df["otaz"] = (
+            pipeline.get_table("land_use").reindex(trips_df["origin"]).TAZ.tolist()
+        )
+        trips_df["dtaz"] = (
+            pipeline.get_table("land_use").reindex(trips_df["destination"]).TAZ.tolist()
+        )
+
 
 def annotate_trips(trips, network_los, model_settings):
     """

From 5fb2b839c5712d17ed5220c7a65267dd3b073445 Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Tue, 20 Feb 2024 16:47:37 -0800
Subject: [PATCH 43/58] Previous commit is only necessary for 2- and 3-zone
 systems

---
 activitysim/abm/models/trip_matrices.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py
index 7d592c4e4a..3e14ef61b9 100644
--- a/activitysim/abm/models/trip_matrices.py
+++ b/activitysim/abm/models/trip_matrices.py
@@ -246,12 +246,13 @@ def write_trip_matrices(network_los):
         trips_df["destination"] = trips_df["true_destination"]
         del trips_df["true_origin"], trips_df["true_destination"]
 
-        trips_df["otaz"] = (
-            pipeline.get_table("land_use").reindex(trips_df["origin"]).TAZ.tolist()
-        )
-        trips_df["dtaz"] = (
-            pipeline.get_table("land_use").reindex(trips_df["destination"]).TAZ.tolist()
-        )
+        if network_los.zone_system == los.TWO_ZONE or network_los.zone_system == los.THREE_ZONE:
+            trips_df["otaz"] = (
+                pipeline.get_table("land_use").reindex(trips_df["origin"]).TAZ.tolist()
+            )
+            trips_df["dtaz"] = (
+                pipeline.get_table("land_use").reindex(trips_df["destination"]).TAZ.tolist()
+            )
 
 
 def annotate_trips(trips, network_los, model_settings):

From 11a3d332120f29776f8c2f7327023086118852cc Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Tue, 20 Feb 2024 17:24:37 -0800
Subject: [PATCH 44/58] Removed lookup of trip mode choice configs so user
 specifies auto modes in parking location settings

---
 activitysim/abm/models/trip_matrices.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py
index 3e14ef61b9..37692ed439 100644
--- a/activitysim/abm/models/trip_matrices.py
+++ b/activitysim/abm/models/trip_matrices.py
@@ -51,15 +51,8 @@ def write_trip_matrices(network_los):
     if "parking_location" in config.setting("models"):
         parking_settings = config.read_model_settings("parking_location_choice.yaml")
         parking_taz_col_name = parking_settings["ALT_DEST_COL_NAME"]
-        auto_nest_name = parking_settings["AUTO_MODE_NEST"]
-
-        # Read trip mode choice settings to get auto modes
-        trip_mode_choice_settings = config.read_model_settings("trip_mode_choice.yaml")
-        trip_mode_choice_nest = config.get_logit_mocel_settings(trip_mode_choice_settings)
-        for alternative in trip_mode_choice_nest["alternatives"]:
-            if alternative["name"] == auto_nest_name:
-                auto_modes = alternative["alternatives"]
-                break
+        assert "AUTO_MODES" in parking_settings, "AUTO_MODES must be specified in parking location settings to properly adjust trip tables for assignment"
+        auto_modes = parking_settings["AUTO_MODES"]
 
         if parking_taz_col_name in trips_df:
             

From 389131e2b399c67052b971275a8c3518df0707ec Mon Sep 17 00:00:00 2001
From: Joe Flood <joejimflood@gmail.com>
Date: Thu, 14 Mar 2024 08:46:54 -0700
Subject: [PATCH 45/58] Fixed bugs found by Ali

---
 activitysim/abm/models/trip_matrices.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py
index 37692ed439..cfa65f4182 100644
--- a/activitysim/abm/models/trip_matrices.py
+++ b/activitysim/abm/models/trip_matrices.py
@@ -63,10 +63,10 @@ def write_trip_matrices(network_los):
             trips_df["origin_parking_zone"] = np.where(
                 trips_df["tour_id"] == trips_df["tour_id"].shift(1),
                 np.where(
-                    trip_df["trip_mode"].apply(lambda x: x in auto_modes),
+                    trips_df["trip_mode"].apply(lambda x: x in auto_modes),
                     trips_df[parking_taz_col_name].shift(1),
                     -1
-                    )   
+                    ),   
                 -1
             )
             

From f9695426cf6cb947fd9495f9e0726cb65cbca73b Mon Sep 17 00:00:00 2001
From: David Hensle <hensle93@gmail.com>
Date: Mon, 18 Mar 2024 11:09:55 -0700
Subject: [PATCH 46/58] adding school escort columns that were missed if no
 escorting

---
 activitysim/abm/models/school_escorting.py               | 2 ++
 activitysim/abm/models/util/school_escort_tours_trips.py | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
index 319c0c0e54..2ae72a968f 100644
--- a/activitysim/abm/models/school_escorting.py
+++ b/activitysim/abm/models/school_escorting.py
@@ -563,6 +563,8 @@ def school_escorting(
             "depart",
             "purpose",
             "destination",
+            "escort_participants",
+            "chauf_tour_id",
         ]
         school_escort_trips = pd.DataFrame(columns=trip_cols)
 
diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py
index 691239ee10..0a7796dea7 100644
--- a/activitysim/abm/models/util/school_escort_tours_trips.py
+++ b/activitysim/abm/models/util/school_escort_tours_trips.py
@@ -401,6 +401,9 @@ def merge_school_escort_trips_into_pipeline():
 
     # checking to see if there are school escort trips to merge in
     if len(school_escort_trips) == 0:
+        # if no trips, fill escorting columns with NA
+        trips[["escort_participants", "school_escort_direction", "school_escort_trip_id",]] = pd.NA
+        pipeline.replace_table("trips", trips)
         return trips
 
     # want to remove stops if school escorting takes place on that half tour so we can replace them with the actual stops

From 627e2867b904633c47007e912f69b65c84c7f549 Mon Sep 17 00:00:00 2001
From: Ali Etezady <58451076+aletzdy@users.noreply.github.com>
Date: Tue, 19 Mar 2024 10:50:58 -0700
Subject: [PATCH 47/58] fixing disaggregate accessibility bug in zone sampler

---
 activitysim/abm/models/disaggregate_accessibility.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
index 2bb78bf60e..94bf963c54 100644
--- a/activitysim/abm/models/disaggregate_accessibility.py
+++ b/activitysim/abm/models/disaggregate_accessibility.py
@@ -275,6 +275,10 @@ def zone_sampler(self):
                     maz_candidates = maz_candidates[
                         ~maz_candidates.MAZ.isin(maz_sample_idx)
                     ]
+
+                    # Need to make sure we sample from TAZs that still exist in the maz_candidates
+                    taz_candidates = taz_candidates[taz_candidates.index.isin(maz_candidates.TAZ)]
+
                     # Calculate the remaining samples to collect
                     n_samples_remaining = n_samples - len(maz_sample_idx)
                     n_samples_remaining = (

From 92bf60da0069e4bc9f39cbad1a583d44a2bed16d Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Thu, 28 Mar 2024 09:49:03 -0700
Subject: [PATCH 48/58] Made code changes to get categorical vehicle types to
 work

---
 activitysim/abm/models/vehicle_type_choice.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
index 95d4087aa4..de723602ff 100644
--- a/activitysim/abm/models/vehicle_type_choice.py
+++ b/activitysim/abm/models/vehicle_type_choice.py
@@ -347,6 +347,17 @@ def iterate_vehicle_type_choice(
             model_settings, alts_cats_dict, vehicle_type_data
         )
 
+    # alts preprocessor
+    alts_preprocessor_settings = model_settings.alts_preprocessor
+    if alts_preprocessor_settings:
+        expressions.assign_columns(
+            state,
+            df=alts_wide,
+            model_settings=alts_preprocessor_settings,
+            locals_dict=locals_dict,
+            trace_label=trace_label,
+        )
+
     # - preparing choosers for iterating
     vehicles_merged = vehicles_merged.to_frame()
     vehicles_merged["already_owned_veh"] = ""
@@ -380,6 +391,12 @@ def iterate_vehicle_type_choice(
             len(choosers),
         )
 
+        # filter columns of alts and choosers
+        if len(model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE) > 0:
+            choosers = choosers[model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE]
+        if len(model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE) > 0:
+            alts_wide = alts_wide[model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE]
+
         # if there were so many alts that they had to be created programmatically,
         # by combining categorical variables, then the utility expressions should make
         # use of interaction terms to accommodate alt-specific coefficients and constants

From f1bdd88ecb95794576445e238579e89c717db565 Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Thu, 28 Mar 2024 09:52:12 -0700
Subject: [PATCH 49/58] Changed settings call to old format

---
 activitysim/abm/models/vehicle_type_choice.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
index de723602ff..faef05e652 100644
--- a/activitysim/abm/models/vehicle_type_choice.py
+++ b/activitysim/abm/models/vehicle_type_choice.py
@@ -393,9 +393,9 @@ def iterate_vehicle_type_choice(
 
         # filter columns of alts and choosers
         if len(model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE) > 0:
-            choosers = choosers[model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE]
+            choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")]
         if len(model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE) > 0:
-            alts_wide = alts_wide[model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE]
+            alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")]
 
         # if there were so many alts that they had to be created programmatically,
         # by combining categorical variables, then the utility expressions should make

From b0061bc4fec193836d56d6b43c42427fdfc7e4e0 Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Thu, 28 Mar 2024 09:55:59 -0700
Subject: [PATCH 50/58] Fixed a couple more model settings calls

---
 activitysim/abm/models/vehicle_type_choice.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
index faef05e652..ae7a679a9d 100644
--- a/activitysim/abm/models/vehicle_type_choice.py
+++ b/activitysim/abm/models/vehicle_type_choice.py
@@ -392,9 +392,9 @@ def iterate_vehicle_type_choice(
         )
 
         # filter columns of alts and choosers
-        if len(model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE) > 0:
+        if len(model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")) > 0:
             choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")]
-        if len(model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE) > 0:
+        if len(model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")) > 0:
             alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")]
 
         # if there were so many alts that they had to be created programmatically,

From a3f6056c065ffaecb95e8683ff270decd28cfbf7 Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Thu, 28 Mar 2024 10:01:49 -0700
Subject: [PATCH 51/58] Removed state variable from function call and added
 default empty arrays for model settings

---
 activitysim/abm/models/vehicle_type_choice.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
index ae7a679a9d..c853f173ab 100644
--- a/activitysim/abm/models/vehicle_type_choice.py
+++ b/activitysim/abm/models/vehicle_type_choice.py
@@ -351,7 +351,6 @@ def iterate_vehicle_type_choice(
     alts_preprocessor_settings = model_settings.alts_preprocessor
     if alts_preprocessor_settings:
         expressions.assign_columns(
-            state,
             df=alts_wide,
             model_settings=alts_preprocessor_settings,
             locals_dict=locals_dict,
@@ -392,10 +391,10 @@ def iterate_vehicle_type_choice(
         )
 
         # filter columns of alts and choosers
-        if len(model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")) > 0:
-            choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")]
-        if len(model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")) > 0:
-            alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")]
+        if len(model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])) > 0:
+            choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])]
+        if len(model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE", [])) > 0:
+            alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE"l [])]
 
         # if there were so many alts that they had to be created programmatically,
         # by combining categorical variables, then the utility expressions should make

From 7449923c2ffc23d06a48ef3958bb7bd69d1de5cc Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Thu, 28 Mar 2024 10:57:24 -0700
Subject: [PATCH 52/58] Fixed call of reading in alts preprocessor

---
 activitysim/abm/models/vehicle_type_choice.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
index c853f173ab..13b6ca5f70 100644
--- a/activitysim/abm/models/vehicle_type_choice.py
+++ b/activitysim/abm/models/vehicle_type_choice.py
@@ -348,7 +348,7 @@ def iterate_vehicle_type_choice(
         )
 
     # alts preprocessor
-    alts_preprocessor_settings = model_settings.alts_preprocessor
+    alts_preprocessor_settings = model_settings.get("alts_preprocessor", None)
     if alts_preprocessor_settings:
         expressions.assign_columns(
             df=alts_wide,

From 74833cbf1ac935520b34da963d2104a16ef222cd Mon Sep 17 00:00:00 2001
From: JoeJimFlood <joejimflood@gmail.com>
Date: Thu, 28 Mar 2024 16:38:48 -0700
Subject: [PATCH 53/58] Replaced l with , in vehicle_type_choice.py

---
 activitysim/abm/models/vehicle_type_choice.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
index 13b6ca5f70..efc4ef19b6 100644
--- a/activitysim/abm/models/vehicle_type_choice.py
+++ b/activitysim/abm/models/vehicle_type_choice.py
@@ -394,7 +394,7 @@ def iterate_vehicle_type_choice(
         if len(model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])) > 0:
             choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])]
         if len(model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE", [])) > 0:
-            alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE"l [])]
+            alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE", [])]
 
         # if there were so many alts that they had to be created programmatically,
         # by combining categorical variables, then the utility expressions should make

From 422ca6cca078ed3c66d1489f2903f4b24c47a383 Mon Sep 17 00:00:00 2001
From: aber-sandag <aber@sandag.org>
Date: Mon, 8 Apr 2024 14:25:56 -0700
Subject: [PATCH 54/58] Load skims into shared memory to be accessed by later
 models

---
 activitysim/core/mem.py               |  3 ++
 activitysim/core/skim_dict_factory.py | 41 +++++++++++++++++----------
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/activitysim/core/mem.py b/activitysim/core/mem.py
index ae832f250c..de558b926d 100644
--- a/activitysim/core/mem.py
+++ b/activitysim/core/mem.py
@@ -296,6 +296,9 @@ def shared_memory_size(data_buffers=None):
 
             shared_size += Dataset.shm.preload_shared_memory_size(data_buffer[11:])
             continue
+        if isinstance(data_buffer, multiprocessing.shared_memory.SharedMemory):
+            shared_size += data_buffer.size
+            continue
         try:
             obj = data_buffer.get_obj()
         except Exception:
diff --git a/activitysim/core/skim_dict_factory.py b/activitysim/core/skim_dict_factory.py
index 450b98d25c..5755964d53 100644
--- a/activitysim/core/skim_dict_factory.py
+++ b/activitysim/core/skim_dict_factory.py
@@ -409,19 +409,26 @@ def allocate_skim_buffer(self, skim_info, shared=False):
             f"total size: {util.INT(csz)} ({util.GB(csz)})"
         )
 
-        if shared:
-            if dtype_name == "float64":
-                typecode = "d"
-            elif dtype_name == "float32":
-                typecode = "f"
-            else:
-                raise RuntimeError(
-                    "allocate_skim_buffer unrecognized dtype %s" % dtype_name
-                )
-
-            buffer = multiprocessing.RawArray(typecode, buffer_size)
-        else:
-            buffer = np.zeros(buffer_size, dtype=dtype)
+        # if shared:
+            # if dtype_name == "float64":
+            #     typecode = "d"
+            # elif dtype_name == "float32":
+            #     typecode = "f"
+            # else:
+            #     raise RuntimeError(
+            #         "allocate_skim_buffer unrecognized dtype %s" % dtype_name
+            #     )
+
+            # buffer = multiprocessing.RawArray(typecode, buffer_size)
+        shared_mem_name = f"skim_shared_memory__{skim_info.skim_tag}"
+        try:
+            buffer = multiprocessing.shared_memory.SharedMemory(name=shared_mem_name)
+            logger.info(f"skim buffer already allocated in shared memory: {shared_mem_name}, size: {buffer.size}")
+        except FileNotFoundError:
+            buffer = multiprocessing.shared_memory.SharedMemory(create=True, size=csz, name=shared_mem_name)
+            logger.info(f"allocating skim buffer in shared memory: {shared_mem_name}, size: {buffer.size}")
+        # else:
+        #     buffer = np.zeros(buffer_size, dtype=dtype)
 
         return buffer
 
@@ -440,8 +447,9 @@ def _skim_data_from_buffer(self, skim_info, skim_buffer):
         """
 
         dtype = np.dtype(skim_info.dtype_name)
-        assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape)
-        skim_data = np.frombuffer(skim_buffer, dtype=dtype).reshape(
+        # assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape)
+        assert skim_buffer.size >= util.iprod(skim_info.skim_data_shape) * dtype.itemsize
+        skim_data = np.frombuffer(skim_buffer.buf, dtype=dtype, count=util.iprod(skim_info.skim_data_shape)).reshape(
             skim_info.skim_data_shape
         )
         return skim_data
@@ -462,6 +470,9 @@ def load_skims_to_buffer(self, skim_info, skim_buffer):
         skim_data = self._skim_data_from_buffer(skim_info, skim_buffer)
         assert skim_data.shape == skim_info.skim_data_shape
 
+        if skim_data.any():
+            return
+
         if read_cache:
             # returns None if cache file not found
             cache_data = self._open_existing_readonly_memmap_skim_cache(skim_info)

From 31c3c353a3b069df7b710f52595d1b8310e615df Mon Sep 17 00:00:00 2001
From: aber-sandag <aber@sandag.org>
Date: Wed, 10 Apr 2024 10:24:36 -0700
Subject: [PATCH 55/58] Don't use shared memory when running singleprocessed
 (temp fix)

---
 activitysim/core/skim_dict_factory.py | 37 +++++++++++++++------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/activitysim/core/skim_dict_factory.py b/activitysim/core/skim_dict_factory.py
index 5755964d53..a3b37aea2c 100644
--- a/activitysim/core/skim_dict_factory.py
+++ b/activitysim/core/skim_dict_factory.py
@@ -409,7 +409,7 @@ def allocate_skim_buffer(self, skim_info, shared=False):
             f"total size: {util.INT(csz)} ({util.GB(csz)})"
         )
 
-        # if shared:
+        if shared:
             # if dtype_name == "float64":
             #     typecode = "d"
             # elif dtype_name == "float32":
@@ -420,15 +420,15 @@ def allocate_skim_buffer(self, skim_info, shared=False):
             #     )
 
             # buffer = multiprocessing.RawArray(typecode, buffer_size)
-        shared_mem_name = f"skim_shared_memory__{skim_info.skim_tag}"
-        try:
-            buffer = multiprocessing.shared_memory.SharedMemory(name=shared_mem_name)
-            logger.info(f"skim buffer already allocated in shared memory: {shared_mem_name}, size: {buffer.size}")
-        except FileNotFoundError:
-            buffer = multiprocessing.shared_memory.SharedMemory(create=True, size=csz, name=shared_mem_name)
-            logger.info(f"allocating skim buffer in shared memory: {shared_mem_name}, size: {buffer.size}")
-        # else:
-        #     buffer = np.zeros(buffer_size, dtype=dtype)
+            shared_mem_name = f"skim_shared_memory__{skim_info.skim_tag}"
+            try:
+                buffer = multiprocessing.shared_memory.SharedMemory(name=shared_mem_name)
+                logger.info(f"skim buffer already allocated in shared memory: {shared_mem_name}, size: {buffer.size}")
+            except FileNotFoundError:
+                buffer = multiprocessing.shared_memory.SharedMemory(create=True, size=csz, name=shared_mem_name)
+                logger.info(f"allocating skim buffer in shared memory: {shared_mem_name}, size: {buffer.size}")
+        else:
+            buffer = np.zeros(buffer_size, dtype=dtype)
 
         return buffer
 
@@ -447,11 +447,16 @@ def _skim_data_from_buffer(self, skim_info, skim_buffer):
         """
 
         dtype = np.dtype(skim_info.dtype_name)
-        # assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape)
-        assert skim_buffer.size >= util.iprod(skim_info.skim_data_shape) * dtype.itemsize
-        skim_data = np.frombuffer(skim_buffer.buf, dtype=dtype, count=util.iprod(skim_info.skim_data_shape)).reshape(
-            skim_info.skim_data_shape
-        )
+        if isinstance(skim_buffer, multiprocessing.shared_memory.SharedMemory):
+            assert skim_buffer.size >= util.iprod(skim_info.skim_data_shape) * dtype.itemsize
+            skim_data = np.frombuffer(skim_buffer.buf, dtype=dtype, count=util.iprod(skim_info.skim_data_shape)).reshape(
+                skim_info.skim_data_shape
+            )
+        else:
+            assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape)
+            skim_data = np.frombuffer(skim_buffer, dtype=dtype).reshape(
+                skim_info.skim_data_shape
+            )
         return skim_data
 
     def load_skims_to_buffer(self, skim_info, skim_buffer):
@@ -470,7 +475,7 @@ def load_skims_to_buffer(self, skim_info, skim_buffer):
         skim_data = self._skim_data_from_buffer(skim_info, skim_buffer)
         assert skim_data.shape == skim_info.skim_data_shape
 
-        if skim_data.any():
+        if isinstance(skim_buffer, multiprocessing.shared_memory.SharedMemory) and skim_data.any():
             return
 
         if read_cache:

From 777503395d1804f5864f1d52bac8851097985c97 Mon Sep 17 00:00:00 2001
From: aber-sandag <aber@sandag.org>
Date: Mon, 15 Apr 2024 12:14:53 -0700
Subject: [PATCH 56/58] Fix skims in trip_mode_choice annotate_trips

---
 activitysim/abm/models/trip_mode_choice.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/activitysim/abm/models/trip_mode_choice.py b/activitysim/abm/models/trip_mode_choice.py
index e7bb200e45..b165e24b73 100644
--- a/activitysim/abm/models/trip_mode_choice.py
+++ b/activitysim/abm/models/trip_mode_choice.py
@@ -317,6 +317,11 @@ def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id):
     pipeline.replace_table("trips", trips_df)
 
     if model_settings.get("annotate_trips"):
+        locals_dict = {}
+        locals_dict.update(constants)
+        simulate.set_skim_wrapper_targets(trips_merged, skims)
+        locals_dict.update(skims)
+        locals_dict["timeframe"] = "trip"
         annotate.annotate_trips(model_settings, trace_label, locals_dict)
 
     if trace_hh_id:

From 98713f304470b3be1a5dcd9060fe812bb08ac7ec Mon Sep 17 00:00:00 2001
From: David Hensle <hensle93@gmail.com>
Date: Fri, 19 Apr 2024 13:42:35 -0700
Subject: [PATCH 57/58] fix to #841

---
 activitysim/abm/models/util/overlap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/activitysim/abm/models/util/overlap.py b/activitysim/abm/models/util/overlap.py
index f7773a14a3..8665c8a513 100644
--- a/activitysim/abm/models/util/overlap.py
+++ b/activitysim/abm/models/util/overlap.py
@@ -266,7 +266,7 @@ def calculate_consecutive(array):
     intvs = stops[:, 1] - starts[:, 1]
 
     # Store intervals as a 2D array for further vectorized ops to make.
-    c = np.bincount(starts[:, 0])
+    c = np.bincount(starts[:, 0], minlength=array.shape[0])
     mask = np.arange(c.max()) < c[:, None]
     intvs2D = mask.astype(float)
     intvs2D[mask] = intvs

From e4dc5ac747f09121e617e314a08be3cf70a102bf Mon Sep 17 00:00:00 2001
From: David Hensle <hensle93@gmail.com>
Date: Wed, 1 May 2024 11:41:46 -0700
Subject: [PATCH 58/58] reducing memory needs in parking_location_choice

---
 .../abm/models/parking_location_choice.py     | 22 ++++--
 activitysim/core/util.py                      | 75 +++++++++++++++++++
 2 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py
index a87703b8b9..8eaf1bc6f3 100644
--- a/activitysim/abm/models/parking_location_choice.py
+++ b/activitysim/abm/models/parking_location_choice.py
@@ -16,7 +16,7 @@
 )
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
 from activitysim.core.tracing import print_elapsed_time
-from activitysim.core.util import assign_in_place
+from activitysim.core.util import assign_in_place, drop_unused_chooser_columns
 
 from .util import estimation
 
@@ -99,6 +99,7 @@ def parking_destination_simulate(
     destination_sample,
     model_settings,
     skims,
+    locals_dict,
     chunk_size,
     trace_hh_id,
     trace_label,
@@ -123,11 +124,6 @@ def parking_destination_simulate(
 
     logger.info("Running trip_destination_simulate with %d trips", len(trips))
 
-    locals_dict = config.get_model_constants(model_settings).copy()
-    locals_dict.update(skims)
-    locals_dict["timeframe"] = "trip"
-    locals_dict["PARKING"] = skims["op_skims"].dest_key
-
     parking_locations = interaction_sample_simulate(
         choosers=trips,
         alternatives=destination_sample,
@@ -171,6 +167,19 @@ def choose_parking_location(
     t0 = print_elapsed_time()
 
     alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
+
+    # remove trips and alts columns that are not used in spec
+    locals_dict = config.get_model_constants(model_settings).copy()
+    locals_dict.update(skims)
+    locals_dict["timeframe"] = "trip"
+    locals_dict["PARKING"] = skims["op_skims"].dest_key
+
+    spec = get_spec_for_segment(model_settings, "SPECIFICATION", segment_name)
+    trips = drop_unused_chooser_columns(trips, spec, locals_dict, custom_chooser=None)
+    alternatives = drop_unused_chooser_columns(
+        alternatives, spec, locals_dict, custom_chooser=None
+    )
+
     destination_sample = logit.interaction_dataset(
         trips, alternatives, alt_index_id=alt_dest_col_name
     )
@@ -184,6 +193,7 @@ def choose_parking_location(
         destination_sample=destination_sample,
         model_settings=model_settings,
         skims=skims,
+        locals_dict=locals_dict,
         chunk_size=chunk_size,
         trace_hh_id=trace_hh_id,
         trace_label=trace_label,
diff --git a/activitysim/core/util.py b/activitysim/core/util.py
index 217cdb8377..421eb35a06 100644
--- a/activitysim/core/util.py
+++ b/activitysim/core/util.py
@@ -468,3 +468,78 @@ def nearest_node_index(node, nodes):
     deltas = nodes - node
     dist_2 = np.einsum("ij,ij->i", deltas, deltas)
     return np.argmin(dist_2)
+
+
+def drop_unused_chooser_columns(
+    choosers, spec, locals_d, custom_chooser, sharrow_enabled=False
+):
+    """
+    Drop unused columns from the chooser table, based on the spec and custom_chooser function.
+    """
+    # keep only variables needed for spec
+    import re
+
+    # define a regular expression to find variables in spec
+    pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"
+
+    unique_variables_in_spec = set(
+        spec.reset_index()["Expression"].apply(lambda x: re.findall(pattern, x)).sum()
+    )
+
+    if locals_d:
+        unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
+        unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
+        if locals_d.get("timeframe") == "trip":
+            orig_col_name = locals_d.get("ORIGIN", None)
+            dest_col_name = locals_d.get("DESTINATION", None)
+            stop_col_name = None
+            parking_col_name = locals_d.get("PARKING", None)
+            primary_origin_col_name = None
+            if orig_col_name is None and "od_skims" in locals_d:
+                orig_col_name = locals_d["od_skims"].orig_key
+            if dest_col_name is None and "od_skims" in locals_d:
+                dest_col_name = locals_d["od_skims"].dest_key
+            if stop_col_name is None and "dp_skims" in locals_d:
+                stop_col_name = locals_d["dp_skims"].dest_key
+            if primary_origin_col_name is None and "dnt_skims" in locals_d:
+                primary_origin_col_name = locals_d["dnt_skims"].dest_key
+            unique_variables_in_spec.add(orig_col_name)
+            unique_variables_in_spec.add(dest_col_name)
+            unique_variables_in_spec.add(parking_col_name)
+            unique_variables_in_spec.add(primary_origin_col_name)
+            unique_variables_in_spec.add(stop_col_name)
+            unique_variables_in_spec.add("trip_period")
+        # when using trip_scheduling_choice for trup scheduling
+        unique_variables_in_spec.add("last_outbound_stop")
+        unique_variables_in_spec.add("last_inbound_stop")
+
+    # when sharrow mode, need to keep the following columns in the choosers table
+    if sharrow_enabled:
+        unique_variables_in_spec.add("out_period")
+        unique_variables_in_spec.add("in_period")
+        unique_variables_in_spec.add("purpose_index_num")
+
+    if custom_chooser:
+        import inspect
+
+        custom_chooser_lines = inspect.getsource(custom_chooser)
+        unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines))
+
+    logger.info("Dropping unused variables in chooser table")
+
+    logger.info(
+        "before dropping, the choosers table has {} columns: {}".format(
+            len(choosers.columns), choosers.columns
+        )
+    )
+
+    # keep only variables needed for spec
+    choosers = choosers[[c for c in choosers.columns if c in unique_variables_in_spec]]
+
+    logger.info(
+        "after dropping, the choosers table has {} columns: {}".format(
+            len(choosers.columns), choosers.columns
+        )
+    )
+
+    return choosers
\ No newline at end of file