From 9075771874853c7c1aa73c072543c8b547c2b29b Mon Sep 17 00:00:00 2001 From: David Hensle Date: Mon, 20 Feb 2023 12:41:16 -0800 Subject: [PATCH 01/58] estimation mode enhancements --- .../joint_tour_frequency_composition.py | 11 +- activitysim/abm/models/school_escorting.py | 110 +++++++++--------- .../abm/models/telecommute_frequency.py | 2 +- .../abm/models/transit_pass_ownership.py | 2 +- activitysim/abm/models/work_from_home.py | 2 +- 5 files changed, 69 insertions(+), 58 deletions(-) diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index 3e52d27fae..9fc8f61b1a 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -35,9 +35,13 @@ def joint_tour_frequency_composition( model_settings = config.read_model_settings(model_settings_file_name) + # alt_tdd = simulate.read_model_alts( + # "joint_tour_frequency_composition_alternatives.csv", set_index="alt" + # ) alt_tdd = simulate.read_model_alts( - "joint_tour_frequency_composition_alternatives.csv", set_index="alt" + "joint_tour_frequency_composition_alternatives.csv", set_index=None ) + # alt_tdd.index = alt_tdd['alt'].values # - only interested in households with more than one cdap travel_active person and # - at least one non-preschooler @@ -94,13 +98,15 @@ def joint_tour_frequency_composition( estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(choosers) - estimator.write_alternatives(alts) assert choosers.index.name == "household_id" assert "household_id" not in choosers.columns choosers["household_id"] = choosers.index estimator.set_chooser_id(choosers.index.name) + + # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? + estimator.set_alt_id("alt_id") # The choice value 'joint_tour_frequency_composition' assigned by interaction_simulate # is the index value of the chosen alternative in the alternatives table. @@ -134,6 +140,7 @@ def joint_tour_frequency_composition( # - but we don't know the tour participants yet # - so we arbitrarily choose the first person in the household # - to be point person for the purpose of generating an index and setting origin + # FIXME: not all models are guaranteed to have PNUM temp_point_persons = persons.loc[persons.PNUM == 1] temp_point_persons["person_id"] = temp_point_persons.index temp_point_persons = temp_point_persons.set_index("household_id") diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index 1b5a97fc93..52f62f7a01 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -480,62 +480,66 @@ def school_escorting( ) escort_bundles.append(bundles) + pipeline.replace_table("households", households) + escort_bundles = pd.concat(escort_bundles) - escort_bundles["bundle_id"] = ( - escort_bundles["household_id"] * 10 - + escort_bundles.groupby("household_id").cumcount() - + 1 - ) - escort_bundles.sort_values( - by=["household_id", "school_escort_direction"], - ascending=[True, False], - inplace=True, - ) - school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours( - escort_bundles - ) - chauf_tour_id_map = { - v: k for k, v in school_escort_tours["bundle_id"].to_dict().items() - } - escort_bundles["chauf_tour_id"] = np.where( - escort_bundles["escort_type"] == "ride_share", - escort_bundles["first_mand_tour_id"], - escort_bundles["bundle_id"].map(chauf_tour_id_map), - ) + if len(escort_bundles) > 0: + escort_bundles["bundle_id"] = ( + escort_bundles["household_id"] * 10 + + escort_bundles.groupby("household_id").cumcount() + + 1 + ) + escort_bundles.sort_values( + by=["household_id", "school_escort_direction"], + ascending=[True, False], + inplace=True, + ) - tours = school_escort_tours_trips.add_pure_escort_tours(tours, school_escort_tours) - tours = school_escort_tours_trips.process_tours_after_escorting_model( - escort_bundles, tours - ) + school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours( + escort_bundles + ) + chauf_tour_id_map = { + v: k for k, v in school_escort_tours["bundle_id"].to_dict().items() + } + escort_bundles["chauf_tour_id"] = np.where( + escort_bundles["escort_type"] == "ride_share", + escort_bundles["first_mand_tour_id"], + escort_bundles["bundle_id"].map(chauf_tour_id_map), + ) - school_escort_trips = school_escort_tours_trips.create_school_escort_trips( - escort_bundles - ) + tours = school_escort_tours_trips.add_pure_escort_tours(tours, school_escort_tours) + tours = school_escort_tours_trips.process_tours_after_escorting_model( + escort_bundles, tours + ) - # update pipeline - pipeline.replace_table("households", households) - pipeline.replace_table("tours", tours) - pipeline.get_rn_generator().drop_channel("tours") - pipeline.get_rn_generator().add_channel("tours", tours) - pipeline.replace_table("escort_bundles", escort_bundles) - # save school escorting tours and trips in pipeline so we can overwrite results from downstream models - pipeline.replace_table("school_escort_tours", school_escort_tours) - pipeline.replace_table("school_escort_trips", school_escort_trips) - - # updating timetable object with pure escort tours so joint tours do not schedule ontop - timetable = inject.get_injectable("timetable") - - # Need to do this such that only one person is in nth_tours - # thus, looping through tour_category and tour_num - # including mandatory tours because their start / end times may have - # changed to match the school escort times - for tour_category in tours.tour_category.unique(): - for tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby( - "tour_num", sort=True - ): - timetable.assign( - window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"] - ) + school_escort_trips = school_escort_tours_trips.create_school_escort_trips( + escort_bundles + ) - timetable.replace_table() + # update pipeline + + pipeline.replace_table("tours", tours) + pipeline.get_rn_generator().drop_channel("tours") + pipeline.get_rn_generator().add_channel("tours", tours) + pipeline.replace_table("escort_bundles", escort_bundles) + # save school escorting tours and trips in pipeline so we can overwrite results from downstream models + pipeline.replace_table("school_escort_tours", school_escort_tours) + pipeline.replace_table("school_escort_trips", school_escort_trips) + + # updating timetable object with pure escort tours so joint tours do not schedule ontop + timetable = inject.get_injectable("timetable") + + # Need to do this such that only one person is in nth_tours + # thus, looping through tour_category and tour_num + # including mandatory tours because their start / end times may have + # changed to match the school escort times + for tour_category in tours.tour_category.unique(): + for tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby( + "tour_num", sort=True + ): + timetable.assign( + window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"] + ) + + timetable.replace_table() diff --git a/activitysim/abm/models/telecommute_frequency.py b/activitysim/abm/models/telecommute_frequency.py index cc1eec4893..4596a89115 100755 --- a/activitysim/abm/models/telecommute_frequency.py +++ b/activitysim/abm/models/telecommute_frequency.py @@ -57,7 +57,7 @@ def telecommute_frequency(persons_merged, persons, chunk_size, trace_hh_id): if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) - estimator.write_coefficients(coefficients_df) + estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(choosers) choices = simulate.simple_simulate( diff --git a/activitysim/abm/models/transit_pass_ownership.py b/activitysim/abm/models/transit_pass_ownership.py index 6507ab8256..92d97080f9 100644 --- a/activitysim/abm/models/transit_pass_ownership.py +++ b/activitysim/abm/models/transit_pass_ownership.py @@ -51,7 +51,7 @@ def transit_pass_ownership(persons_merged, persons, chunk_size, trace_hh_id): if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) - estimator.write_coefficients(coefficients_df) + estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(choosers) choices = simulate.simple_simulate( diff --git a/activitysim/abm/models/work_from_home.py b/activitysim/abm/models/work_from_home.py index 97a80d301f..0432640002 100755 --- a/activitysim/abm/models/work_from_home.py +++ b/activitysim/abm/models/work_from_home.py @@ -58,7 +58,7 @@ def work_from_home(persons_merged, persons, chunk_size, trace_hh_id): if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) - estimator.write_coefficients(coefficients_df) + estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(choosers) # - iterative single process what-if adjustment if specified From 54cb3d1eda9f8df3b0104c998012010fed106c63 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Tue, 21 Feb 2023 15:39:17 -0800 Subject: [PATCH 02/58] blacken --- .../abm/models/joint_tour_frequency_composition.py | 3 ++- activitysim/abm/models/school_escorting.py | 12 +++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index 9fc8f61b1a..af6d8b62f2 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -38,6 +38,7 @@ def joint_tour_frequency_composition( # alt_tdd = simulate.read_model_alts( # "joint_tour_frequency_composition_alternatives.csv", set_index="alt" # ) + # FIXME setting index as "alt" causes crash in estimation mode... alt_tdd = simulate.read_model_alts( "joint_tour_frequency_composition_alternatives.csv", set_index=None ) @@ -104,7 +105,7 @@ def joint_tour_frequency_composition( choosers["household_id"] = choosers.index estimator.set_chooser_id(choosers.index.name) - + # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? estimator.set_alt_id("alt_id") diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index 52f62f7a01..8da0068a7e 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -508,7 +508,9 @@ def school_escorting( escort_bundles["bundle_id"].map(chauf_tour_id_map), ) - tours = school_escort_tours_trips.add_pure_escort_tours(tours, school_escort_tours) + tours = school_escort_tours_trips.add_pure_escort_tours( + tours, school_escort_tours + ) tours = school_escort_tours_trips.process_tours_after_escorting_model( escort_bundles, tours ) @@ -518,7 +520,7 @@ def school_escorting( ) # update pipeline - + pipeline.replace_table("tours", tours) pipeline.get_rn_generator().drop_channel("tours") pipeline.get_rn_generator().add_channel("tours", tours) @@ -535,9 +537,9 @@ def school_escorting( # including mandatory tours because their start / end times may have # changed to match the school escort times for tour_category in tours.tour_category.unique(): - for tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby( - "tour_num", sort=True - ): + for tour_num, nth_tours in tours[ + tours.tour_category == tour_category + ].groupby("tour_num", sort=True): timetable.assign( window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"] ) From b461dd29fbd36cf81d7507b4817892f20d21b2ae Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 2 Mar 2023 12:23:16 -0800 Subject: [PATCH 03/58] ao preprocessor, cdap & jtfc estimation --- activitysim/abm/models/auto_ownership.py | 17 ++++++++++++++++- activitysim/abm/models/cdap.py | 4 ++++ .../models/joint_tour_frequency_composition.py | 4 ++-- .../abm/models/joint_tour_participation.py | 4 ++++ activitysim/abm/models/transit_pass_subsidy.py | 2 +- 5 files changed, 27 insertions(+), 4 deletions(-) diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py index 564d6f94b6..3e01513cfe 100644 --- a/activitysim/abm/models/auto_ownership.py +++ b/activitysim/abm/models/auto_ownership.py @@ -2,7 +2,7 @@ # See full license in LICENSE.txt. import logging -from activitysim.core import config, inject, pipeline, simulate, tracing +from activitysim.core import config, expressions, inject, pipeline, simulate, tracing from .util import estimation @@ -32,6 +32,21 @@ def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_ logger.info("Running %s with %d households", trace_label, len(choosers)) + # - preprocessor + preprocessor_settings = model_settings.get("preprocessor", None) + if preprocessor_settings: + + locals_d = {} + if constants is not None: + locals_d.update(constants) + + expressions.assign_columns( + df=choosers, + model_settings=preprocessor_settings, + locals_dict=locals_d, + trace_label=trace_label, + ) + if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index f7da93687b..79c650a7a2 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -151,6 +151,9 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id): for hhsize in range(2, cdap.MAX_HHSIZE + 1): spec = cdap.get_cached_spec(hhsize) estimator.write_table(spec, "spec_%s" % hhsize, append=False) + if add_joint_tour_utility: + joint_spec = cdap.get_cached_joint_spec(hhsize) + estimator.write_table(joint_spec, "joint_spec_%s" % hhsize, append=False) logger.info("Running cdap_simulate with %d persons", len(persons_merged.index)) @@ -184,6 +187,7 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id): if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, "persons", "cdap_activity") + hh_joint = estimator.get_survey_values(hh_joint, "households", "has_joint_tour") estimator.write_override_choices(choices) estimator.end_estimation() diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index af6d8b62f2..a13e91a578 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -35,14 +35,14 @@ def joint_tour_frequency_composition( model_settings = config.read_model_settings(model_settings_file_name) + # FIXME setting index as "alt" causes crash in estimation mode... # alt_tdd = simulate.read_model_alts( # "joint_tour_frequency_composition_alternatives.csv", set_index="alt" # ) - # FIXME setting index as "alt" causes crash in estimation mode... alt_tdd = simulate.read_model_alts( "joint_tour_frequency_composition_alternatives.csv", set_index=None ) - # alt_tdd.index = alt_tdd['alt'].values + alt_tdd.index = alt_tdd['alt'].values # - only interested in households with more than one cdap travel_active person and # - at least one non-preschooler diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py index ee8658ae5f..d079322e6e 100644 --- a/activitysim/abm/models/joint_tour_participation.py +++ b/activitysim/abm/models/joint_tour_participation.py @@ -209,6 +209,10 @@ def participants_chooser(probs, choosers, spec, trace_label): probs[choice_col] = np.where(probs[choice_col] > 0, 1, 0) non_choice_col = [col for col in probs.columns if col != choice_col][0] probs[non_choice_col] = 1 - probs[choice_col] + if iter > MAX_ITERATIONS + 1: + raise RuntimeError( + f"{num_tours_remaining} tours could not be satisfied even with forcing participation" + ) else: raise RuntimeError( f"{num_tours_remaining} tours could not be satisfied after {iter} iterations" diff --git a/activitysim/abm/models/transit_pass_subsidy.py b/activitysim/abm/models/transit_pass_subsidy.py index 4e513a6611..45a118fda8 100644 --- a/activitysim/abm/models/transit_pass_subsidy.py +++ b/activitysim/abm/models/transit_pass_subsidy.py @@ -51,7 +51,7 @@ def transit_pass_subsidy(persons_merged, persons, chunk_size, trace_hh_id): if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) - estimator.write_coefficients(coefficients_df) + estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(choosers) choices = simulate.simple_simulate( From 024879353df102ed5674076609ebaef02c6a6c3d Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 2 Mar 2023 12:24:29 -0800 Subject: [PATCH 04/58] blacken --- activitysim/abm/models/cdap.py | 4 +++- activitysim/abm/models/joint_tour_frequency_composition.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 79c650a7a2..384eb24b7f 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -153,7 +153,9 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id): estimator.write_table(spec, "spec_%s" % hhsize, append=False) if add_joint_tour_utility: joint_spec = cdap.get_cached_joint_spec(hhsize) - estimator.write_table(joint_spec, "joint_spec_%s" % hhsize, append=False) + estimator.write_table( + joint_spec, "joint_spec_%s" % hhsize, append=False + ) logger.info("Running cdap_simulate with %d persons", len(persons_merged.index)) diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index a13e91a578..c1f735092b 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -42,7 +42,7 @@ def joint_tour_frequency_composition( alt_tdd = simulate.read_model_alts( "joint_tour_frequency_composition_alternatives.csv", set_index=None ) - alt_tdd.index = alt_tdd['alt'].values + alt_tdd.index = alt_tdd["alt"].values # - only interested in households with more than one cdap travel_active person and # - at least one non-preschooler From 9d17c41862956ca678ecb8fd18e9d993c8dace2e Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 2 Mar 2023 14:17:06 -0800 Subject: [PATCH 05/58] hh_joint from survey only if adding joint utility --- activitysim/abm/models/cdap.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 384eb24b7f..268d95ed14 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -189,7 +189,8 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id): if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, "persons", "cdap_activity") - hh_joint = estimator.get_survey_values(hh_joint, "households", "has_joint_tour") + if add_joint_tour_utility: + hh_joint = estimator.get_survey_values(hh_joint, "households", "has_joint_tour") estimator.write_override_choices(choices) estimator.end_estimation() From 7ba0ddda7fd69a95755ed492b307315d7d2f08db Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 2 Mar 2023 14:29:44 -0800 Subject: [PATCH 06/58] blacken --- activitysim/abm/models/cdap.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 268d95ed14..a8566f73be 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -190,7 +190,9 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id): estimator.write_choices(choices) choices = estimator.get_survey_values(choices, "persons", "cdap_activity") if add_joint_tour_utility: - hh_joint = estimator.get_survey_values(hh_joint, "households", "has_joint_tour") + hh_joint = estimator.get_survey_values( + hh_joint, "households", "has_joint_tour" + ) estimator.write_override_choices(choices) estimator.end_estimation() From da724c35b0c7a1973378c18a9cda6061b0d674ea Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 16 Mar 2023 10:29:28 -0700 Subject: [PATCH 07/58] write out mode choice logsum --- activitysim/abm/models/location_choice.py | 40 ++++++++++++++++++----- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index dd5e279b39..ff2dd4d1ef 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -808,6 +808,18 @@ def run_location_choice( ) tracing.trace_df(choices_df, estimation_trace_label) + if want_logsums & (not skip_choice): + # grabbing index, could be person_id or proto_person_id + index_name = choices_df.index.name + # merging mode choice logsum of chosen alternative to choices + choices_df = pd.merge( + choices_df.reset_index(), + location_sample_df.reset_index()[[index_name, model_settings["ALT_DEST_COL_NAME"], ALT_LOGSUM]], + how='left', + left_on=[index_name, 'choice'], + right_on=[index_name, model_settings["ALT_DEST_COL_NAME"]] + ).drop(columns=model_settings["ALT_DEST_COL_NAME"]).set_index(index_name) + choices_list.append(choices_df) if want_sample_table: @@ -825,7 +837,7 @@ def run_location_choice( else: # this will only happen with small samples (e.g. singleton) with no (e.g.) school segs logger.warning("%s no choices", trace_label) - choices_df = pd.DataFrame(columns=["choice", "logsum"]) + choices_df = pd.DataFrame(columns=["choice", "logsum", ALT_LOGSUM]) if len(sample_list) > 0: save_sample_df = pd.concat(sample_list) @@ -869,7 +881,8 @@ def iterate_location_choice( Returns ------- adds choice column model_settings['DEST_CHOICE_COLUMN_NAME'] - adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided + adds destination choice logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided + adds mode choice logsum to selected destination column model_settings['MODE_CHOICE_LOGSUM_COLUMN_NAME']- if provided adds annotations to persons table """ @@ -879,7 +892,9 @@ def iterate_location_choice( chooser_filter_column = model_settings["CHOOSER_FILTER_COLUMN_NAME"] dest_choice_column_name = model_settings["DEST_CHOICE_COLUMN_NAME"] - logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + dc_logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + mc_logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME") + want_logsums = (dc_logsum_column_name is not None) | (mc_logsum_column_name is not None) sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") want_sample_table = ( @@ -929,7 +944,7 @@ def iterate_location_choice( persons_merged_df_, network_los, shadow_price_calculator=spc, - want_logsums=logsum_column_name is not None, + want_logsums=want_logsums, want_sample_table=want_sample_table, estimator=estimator, model_settings=model_settings, @@ -1005,10 +1020,15 @@ def iterate_location_choice( ) # add the dest_choice_logsum column to persons dataframe - if logsum_column_name: - persons_df[logsum_column_name] = ( + if dc_logsum_column_name: + persons_df[dc_logsum_column_name] = ( choices_df["logsum"].reindex(persons_df.index).astype("float") ) + # add the mode choice logsum column to persons dataframe + if mc_logsum_column_name: + persons_df[mc_logsum_column_name] = ( + choices_df[ALT_LOGSUM].reindex(persons_df.index).astype("float") + ) if save_sample_df is not None: # might be None for tiny samples even if sample_table_name was specified @@ -1047,9 +1067,13 @@ def iterate_location_choice( if trace_hh_id: tracing.trace_df(households_df, label=trace_label, warn_if_empty=True) - if logsum_column_name: + if dc_logsum_column_name: + tracing.print_summary( + dc_logsum_column_name, choices_df["logsum"], value_counts=True + ) + if mc_logsum_column_name: tracing.print_summary( - logsum_column_name, choices_df["logsum"], value_counts=True + mc_logsum_column_name, choices_df[ALT_LOGSUM], value_counts=True ) return persons_df From 92acbfa6efcdcf022f0b90c66f058ddf2fdee453 Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 16 Mar 2023 10:30:16 -0700 Subject: [PATCH 08/58] tracing for proto population --- activitysim/core/tracing.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/activitysim/core/tracing.py b/activitysim/core/tracing.py index 44707c0aea..f5f3813908 100644 --- a/activitysim/core/tracing.py +++ b/activitysim/core/tracing.py @@ -788,6 +788,15 @@ def interaction_trace_rows(interaction_df, choosers, sample_size=None): elif "person_id" in choosers.columns and persons_table_name in traceable_table_ids: slicer_column_name = "person_id" targets = traceable_table_ids[persons_table_name] + elif choosers.index.name == "proto_person_id" and persons_table_name in traceable_table_ids: + slicer_column_name = choosers.index.name + targets = traceable_table_ids[persons_table_name] + elif choosers.index.name == "proto_household_id" and households_table_name in traceable_table_ids: + slicer_column_name = choosers.index.name + targets = traceable_table_ids[households_table_name] + elif choosers.index.name == "proto_tour_id" and 'proto_tours' in traceable_table_ids: + slicer_column_name = choosers.index.name + targets = traceable_table_ids['proto_tours'] else: print(choosers.columns) raise RuntimeError( From 398c5a901dfc664bb52eb2b1aeb881879430d428 Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 16 Mar 2023 10:55:27 -0700 Subject: [PATCH 09/58] dropping all traceable tables after disagg --- activitysim/abm/models/disaggregate_accessibility.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index fe79d3fcdf..1bc267c856 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -745,11 +745,9 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id): for ch in list(pipeline.get_rn_generator().channels.keys()): pipeline.get_rn_generator().drop_channel(ch) - # Drop any prematurely added traceables - for trace in [ - x for x in inject.get_injectable("traceable_tables") if "proto_" not in x - ]: - tracing.deregister_traceable_table(trace) + # Dropping all traceable tables + for table in inject.get_injectable("traceable_tables"): + tracing.deregister_traceable_table(table) # need to clear any premature tables that were added during the previous run orca._TABLES.clear() From 049efcfae4a5161eeae32622d501acd134a3d8e2 Mon Sep 17 00:00:00 2001 From: dhensle Date: Tue, 21 Mar 2023 15:33:59 -0700 Subject: [PATCH 10/58] blacken --- activitysim/abm/models/location_choice.py | 24 +++++++++++++++-------- activitysim/core/tracing.py | 16 +++++++++++---- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index ff2dd4d1ef..8470368708 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -812,13 +812,19 @@ def run_location_choice( # grabbing index, could be person_id or proto_person_id index_name = choices_df.index.name # merging mode choice logsum of chosen alternative to choices - choices_df = pd.merge( - choices_df.reset_index(), - location_sample_df.reset_index()[[index_name, model_settings["ALT_DEST_COL_NAME"], ALT_LOGSUM]], - how='left', - left_on=[index_name, 'choice'], - right_on=[index_name, model_settings["ALT_DEST_COL_NAME"]] - ).drop(columns=model_settings["ALT_DEST_COL_NAME"]).set_index(index_name) + choices_df = ( + pd.merge( + choices_df.reset_index(), + location_sample_df.reset_index()[ + [index_name, model_settings["ALT_DEST_COL_NAME"], ALT_LOGSUM] + ], + how="left", + left_on=[index_name, "choice"], + right_on=[index_name, model_settings["ALT_DEST_COL_NAME"]], + ) + .drop(columns=model_settings["ALT_DEST_COL_NAME"]) + .set_index(index_name) + ) choices_list.append(choices_df) @@ -894,7 +900,9 @@ def iterate_location_choice( dest_choice_column_name = model_settings["DEST_CHOICE_COLUMN_NAME"] dc_logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") mc_logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME") - want_logsums = (dc_logsum_column_name is not None) | (mc_logsum_column_name is not None) + want_logsums = (dc_logsum_column_name is not None) | ( + mc_logsum_column_name is not None + ) sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") want_sample_table = ( diff --git a/activitysim/core/tracing.py b/activitysim/core/tracing.py index f5f3813908..07ba053266 100644 --- a/activitysim/core/tracing.py +++ b/activitysim/core/tracing.py @@ -788,15 +788,23 @@ def interaction_trace_rows(interaction_df, choosers, sample_size=None): elif "person_id" in choosers.columns and persons_table_name in traceable_table_ids: slicer_column_name = "person_id" targets = traceable_table_ids[persons_table_name] - elif choosers.index.name == "proto_person_id" and persons_table_name in traceable_table_ids: + elif ( + choosers.index.name == "proto_person_id" + and persons_table_name in traceable_table_ids + ): slicer_column_name = choosers.index.name targets = traceable_table_ids[persons_table_name] - elif choosers.index.name == "proto_household_id" and households_table_name in traceable_table_ids: + elif ( + choosers.index.name == "proto_household_id" + and households_table_name in traceable_table_ids + ): slicer_column_name = choosers.index.name targets = traceable_table_ids[households_table_name] - elif choosers.index.name == "proto_tour_id" and 'proto_tours' in traceable_table_ids: + elif ( + choosers.index.name == "proto_tour_id" and "proto_tours" in traceable_table_ids + ): slicer_column_name = choosers.index.name - targets = traceable_table_ids['proto_tours'] + targets = traceable_table_ids["proto_tours"] else: print(choosers.columns) raise RuntimeError( From cad3bf799ffa1ba37409dcfd56524dcaed28a731 Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 6 Apr 2023 10:19:04 -0700 Subject: [PATCH 11/58] pandas < 2.0 --- conda-environments/activitysim-dev-base.yml | 2 +- conda-environments/activitysim-dev.yml | 2 +- conda-environments/github-actions-tests.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conda-environments/activitysim-dev-base.yml b/conda-environments/activitysim-dev-base.yml index f0bdcb3a36..7b534edf8a 100644 --- a/conda-environments/activitysim-dev-base.yml +++ b/conda-environments/activitysim-dev-base.yml @@ -41,7 +41,7 @@ dependencies: - numpydoc - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2.0 - pre-commit - psutil >= 4.1 - pyarrow >= 2.0 diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml index c46b436dea..519f442db1 100644 --- a/conda-environments/activitysim-dev.yml +++ b/conda-environments/activitysim-dev.yml @@ -37,7 +37,7 @@ dependencies: - numpydoc - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2.0 - pre-commit - psutil >= 4.1 - pyarrow >= 2.0 diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml index 63d353ea51..9207c8899e 100644 --- a/conda-environments/github-actions-tests.yml +++ b/conda-environments/github-actions-tests.yml @@ -16,7 +16,7 @@ dependencies: - numpy >= 1.16.1 - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2.0 - psutil >= 4.1 - pyarrow >= 2.0 - pypyr >= 5.3 From 879d09833a5e8504e3c24a7577288752a51aae4c Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 6 Apr 2023 15:13:42 -0700 Subject: [PATCH 12/58] annotate and col selection in disagg access --- .../abm/models/disaggregate_accessibility.py | 33 ++++++++++++++++--- .../abm/tables/disaggregate_accessibility.py | 19 +++++------ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index 1bc267c856..b8b8bfd3e1 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -569,14 +569,13 @@ def merge_persons(self): inject.add_table("proto_persons_merged", persons_merged) -def get_disaggregate_logsums(network_los, chunk_size, trace_hh_id): +def get_disaggregate_logsums( + network_los, chunk_size, trace_hh_id, disagg_model_settings +): logsums = {} persons_merged = pipeline.get_table("proto_persons_merged").sort_index( inplace=False ) - disagg_model_settings = read_disaggregate_accessibility_yaml( - "disaggregate_accessibility.yaml" - ) for model_name in [ "workplace_location", @@ -696,8 +695,14 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id): tracing.register_traceable_table(tablename, df) del df + disagg_model_settings = read_disaggregate_accessibility_yaml( + "disaggregate_accessibility.yaml" + ) + # Run location choice - logsums = get_disaggregate_logsums(network_los, chunk_size, trace_hh_id) + logsums = get_disaggregate_logsums( + network_los, chunk_size, trace_hh_id, disagg_model_settings + ) logsums = {k + "_accessibility": v for k, v in logsums.items()} # Combined accessibility table @@ -758,4 +763,22 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id): # Inject accessibility results into pipeline [inject.add_table(k, df) for k, df in logsums.items()] + # available post-processing + for annotations in disagg_model_settings.get("postprocess_proto_tables", []): + tablename = annotations["tablename"] + df = pipeline.get_table(tablename) + assert df is not None + assert annotations is not None + assign_columns( + df=df, + model_settings={ + **annotations["annotate"], + **disagg_model_settings["suffixes"], + }, + trace_label=tracing.extend_trace_label( + "disaggregate_accessibility.postprocess", tablename + ), + ) + pipeline.replace_table(tablename, df) + return diff --git a/activitysim/abm/tables/disaggregate_accessibility.py b/activitysim/abm/tables/disaggregate_accessibility.py index 4c4eb9ad40..db65652f5f 100644 --- a/activitysim/abm/tables/disaggregate_accessibility.py +++ b/activitysim/abm/tables/disaggregate_accessibility.py @@ -151,14 +151,13 @@ def disaggregate_accessibility(persons, households, land_use, accessibility): accessibility_cols = [ x for x in proto_accessibility_df.columns if "accessibility" in x ] + keep_cols = model_settings.get("KEEP_COLS", accessibility_cols) # Parse the merging parameters assert merging_params is not None # Check if already assigned! - if set(accessibility_cols).intersection(persons_merged_df.columns) == set( - accessibility_cols - ): + if set(keep_cols).intersection(persons_merged_df.columns) == set(keep_cols): return # Find the nearest zone (spatially) with accessibilities calculated @@ -190,7 +189,7 @@ def disaggregate_accessibility(persons, households, land_use, accessibility): # because it will get slightly different logsums for households in the same zone. # This is because different destination zones were selected. To resolve, get mean by cols. right_df = ( - proto_accessibility_df.groupby(merge_cols)[accessibility_cols] + proto_accessibility_df.groupby(merge_cols)[keep_cols] .mean() .sort_values(nearest_cols) .reset_index() @@ -223,9 +222,9 @@ def disaggregate_accessibility(persons, households, land_use, accessibility): ) # Predict the nearest person ID and pull the logsums - matched_logsums_df = right_df.loc[clf.predict(x_pop)][ - accessibility_cols - ].reset_index(drop=True) + matched_logsums_df = right_df.loc[clf.predict(x_pop)][keep_cols].reset_index( + drop=True + ) merge_df = pd.concat( [left_df.reset_index(drop=False), matched_logsums_df], axis=1 ).set_index("person_id") @@ -257,12 +256,12 @@ def disaggregate_accessibility(persons, households, land_use, accessibility): # Check that it was correctly left-joined assert all(persons_merged_df[merge_cols] == merge_df[merge_cols]) - assert any(merge_df[accessibility_cols].isnull()) + assert any(merge_df[keep_cols].isnull()) # Inject merged accessibilities so that it can be included in persons_merged function - inject.add_table("disaggregate_accessibility", merge_df[accessibility_cols]) + inject.add_table("disaggregate_accessibility", merge_df[keep_cols]) - return merge_df[accessibility_cols] + return merge_df[keep_cols] inject.broadcast( From 131832084575f9850156ce40a02fd7fd6adcf55d Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 6 Apr 2023 15:14:32 -0700 Subject: [PATCH 13/58] annotate auto ownership for selection of disaggregate accessibilities --- activitysim/abm/models/auto_ownership.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py index 3e01513cfe..476bd18adf 100644 --- a/activitysim/abm/models/auto_ownership.py +++ b/activitysim/abm/models/auto_ownership.py @@ -4,7 +4,7 @@ from activitysim.core import config, expressions, inject, pipeline, simulate, tracing -from .util import estimation +from .util import estimation, annotate logger = logging.getLogger(__name__) @@ -84,5 +84,8 @@ def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_ "auto_ownership", households.auto_ownership, value_counts=True ) + if model_settings.get("annotate_households"): + annotate.annotate_households(model_settings, trace_label) + if trace_hh_id: tracing.trace_df(households, label="auto_ownership", warn_if_empty=True) From 67b1b90b3b9f83b3fb8189e23595953403e15b98 Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 6 Apr 2023 15:15:09 -0700 Subject: [PATCH 14/58] added to annotate helper --- activitysim/abm/models/util/annotate.py | 38 +++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/activitysim/abm/models/util/annotate.py b/activitysim/abm/models/util/annotate.py index e50519b38e..727ef4292f 100644 --- a/activitysim/abm/models/util/annotate.py +++ b/activitysim/abm/models/util/annotate.py @@ -15,6 +15,44 @@ logger = logging.getLogger(__name__) +def annotate_households(model_settings, trace_label, locals_dict={}): + """ + Add columns to the households table in the pipeline according to spec. + + Parameters + ---------- + model_settings : dict + trace_label : str + """ + households = inject.get_table("households").to_frame() + expressions.assign_columns( + df=households, + model_settings=model_settings.get("annotate_households"), + locals_dict=locals_dict, + trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), + ) + pipeline.replace_table("households", households) + + +def annotate_persons(model_settings, trace_label, locals_dict={}): + """ + Add columns to the persons table in the pipeline according to spec. + + Parameters + ---------- + model_settings : dict + trace_label : str + """ + persons = inject.get_table("persons").to_frame() + expressions.assign_columns( + df=persons, + model_settings=model_settings.get("annotate_persons"), + locals_dict=locals_dict, + trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), + ) + pipeline.replace_table("persons", persons) + + def annotate_tours(model_settings, trace_label, locals_dict={}): """ Add columns to the tours table in the pipeline according to spec. From d2fdc58bc540bc8f8c2da4544b808b9e6f111a53 Mon Sep 17 00:00:00 2001 From: dhensle Date: Fri, 7 Apr 2023 11:14:15 -0700 Subject: [PATCH 15/58] school escorting estimation enhancements --- .../joint_tour_frequency_composition.py | 1 + activitysim/abm/models/school_escorting.py | 116 ++++++++++++------ .../models/util/school_escort_tours_trips.py | 8 ++ 3 files changed, 90 insertions(+), 35 deletions(-) diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index c1f735092b..ffdb36e2c7 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -36,6 +36,7 @@ def joint_tour_frequency_composition( model_settings = config.read_model_settings(model_settings_file_name) # FIXME setting index as "alt" causes crash in estimation mode... + # happens in school escorting too! # alt_tdd = simulate.read_model_alts( # "joint_tour_frequency_composition_alternatives.csv", set_index="alt" # ) diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index 8da0068a7e..13e09e08b0 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -46,7 +46,7 @@ def determine_escorting_participants(choosers, persons, model_settings): # can specify different weights to determine chaperones persontype_weight = model_settings.get("PERSON_WEIGHT", 100) - gender_weight = model_settings.get("PERSON_WEIGHT", 10) + gender_weight = model_settings.get("GENDER_WEIGHT", 10) age_weight = model_settings.get("AGE_WEIGHT", 1) # can we move all of these to a config file? @@ -122,7 +122,7 @@ def add_prev_choices_to_choosers(choosers, choices, alts, stage): stage_alts, how="left", left_on=escorting_choice, - right_on=stage_alts.index.name, + right_index=True, ) .set_index("household_id") ) @@ -198,8 +198,12 @@ def create_school_escorting_bundles_table(choosers, tours, stage): bundles : pd.DataFrame one school escorting bundle per row """ - # making a table of bundles - choosers = choosers.reset_index() + # want to keep household_id in columns, which is already there if running in estimation mode + if "household_id" in choosers.columns: + choosers = choosers.reset_index(drop=True) + else: + choosers = choosers.reset_index() + # creating a row for every school escorting bundle choosers = choosers.loc[choosers.index.repeat(choosers["nbundles"])] bundles = pd.DataFrame() @@ -362,7 +366,11 @@ def school_escorting( households_merged = households_merged.to_frame() tours = tours.to_frame() - alts = simulate.read_model_alts(model_settings["ALTS"], set_index="Alt") + # FIXME setting index as "Alt" causes crash in estimation mode... + # happens in joint_tour_frequency_composition too! + # alts = simulate.read_model_alts(model_settings["ALTS"], set_index="Alt") + alts = simulate.read_model_alts(model_settings["ALTS"], set_index=None) + alts.index = alts["Alt"].values households_merged, participant_columns = determine_escorting_participants( households_merged, persons, model_settings @@ -379,7 +387,9 @@ def school_escorting( choices = None for stage_num, stage in enumerate(school_escorting_stages): stage_trace_label = trace_label + "_" + stage - estimator = estimation.manager.begin_estimation("school_escorting_" + stage) + estimator = estimation.manager.begin_estimation( + model_name="school_escorting_" + stage, bundle_name="school_escorting" + ) model_spec_raw = simulate.read_model_spec( file_name=model_settings[stage.upper() + "_SPEC"] @@ -434,9 +444,26 @@ def school_escorting( if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) - estimator.write_spec(model_settings) - estimator.write_coefficients(coefficients_df, model_settings) + estimator.write_spec(model_settings, tag=stage.upper() + "_SPEC") + estimator.write_coefficients( + coefficients_df, file_name=stage.upper() + "_COEFFICIENTS" + ) estimator.write_choosers(choosers) + estimator.write_alternatives(alts, bundle_directory=True) + + # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column + # shuold we do it here or have interaction_simulate do it? + # chooser index must be duplicated in column or it will be omitted from interaction_dataset + # estimation requires that chooser_id is either in index or a column of interaction_dataset + # so it can be reformatted (melted) and indexed by chooser_id and alt_id + assert choosers.index.name == "household_id" + assert "household_id" not in choosers.columns + choosers["household_id"] = choosers.index + + # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? + estimator.set_alt_id("alt_id") + + estimator.set_chooser_id(choosers.index.name) log_alt_losers = config.setting("log_alt_losers", False) @@ -474,7 +501,7 @@ def school_escorting( if stage_num >= 1: choosers["Alt"] = choices - choosers = choosers.join(alts, how="left", on="Alt") + choosers = choosers.join(alts.set_index("Alt"), how="left", on="Alt") bundles = create_school_escorting_bundles_table( choosers[choosers["Alt"] > 1], tours, stage ) @@ -484,6 +511,7 @@ def school_escorting( escort_bundles = pd.concat(escort_bundles) + # possible to get if len(escort_bundles) > 0: escort_bundles["bundle_id"] = ( escort_bundles["household_id"] * 10 @@ -519,29 +547,47 @@ def school_escorting( escort_bundles ) - # update pipeline - - pipeline.replace_table("tours", tours) - pipeline.get_rn_generator().drop_channel("tours") - pipeline.get_rn_generator().add_channel("tours", tours) - pipeline.replace_table("escort_bundles", escort_bundles) - # save school escorting tours and trips in pipeline so we can overwrite results from downstream models - pipeline.replace_table("school_escort_tours", school_escort_tours) - pipeline.replace_table("school_escort_trips", school_escort_trips) - - # updating timetable object with pure escort tours so joint tours do not schedule ontop - timetable = inject.get_injectable("timetable") - - # Need to do this such that only one person is in nth_tours - # thus, looping through tour_category and tour_num - # including mandatory tours because their start / end times may have - # changed to match the school escort times - for tour_category in tours.tour_category.unique(): - for tour_num, nth_tours in tours[ - tours.tour_category == tour_category - ].groupby("tour_num", sort=True): - timetable.assign( - window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"] - ) - - timetable.replace_table() + else: + # create empty school escort tours & trips tables to be used downstream + tours["school_esc_outbound"] = pd.NA + tours["school_esc_inbound"] = pd.NA + tours["school_escort_direction"] = pd.NA + tours["next_pure_escort_start"] = pd.NA + school_escort_tours = pd.DataFrame(columns=tours.columns) + trip_cols = [ + "household_id", + "person_id", + "tour_id", + "trip_id", + "outbound", + "depart", + "purpose", + "destination", + ] + school_escort_trips = pd.DataFrame(columns=trip_cols) + + # update pipeline + pipeline.replace_table("tours", tours) + pipeline.get_rn_generator().drop_channel("tours") + pipeline.get_rn_generator().add_channel("tours", tours) + pipeline.replace_table("escort_bundles", escort_bundles) + # save school escorting tours and trips in pipeline so we can overwrite results from downstream models + pipeline.replace_table("school_escort_tours", school_escort_tours) + pipeline.replace_table("school_escort_trips", school_escort_trips) + + # updating timetable object with pure escort tours so joint tours do not schedule ontop + timetable = inject.get_injectable("timetable") + + # Need to do this such that only one person is in nth_tours + # thus, looping through tour_category and tour_num + # including mandatory tours because their start / end times may have + # changed to match the school escort times + for tour_category in tours.tour_category.unique(): + for tour_num, nth_tours in tours[tours.tour_category == tour_category].groupby( + "tour_num", sort=True + ): + timetable.assign( + window_row_ids=nth_tours["person_id"], tdds=nth_tours["tdd"] + ) + + timetable.replace_table() diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py index 778fb86454..691239ee10 100644 --- a/activitysim/abm/models/util/school_escort_tours_trips.py +++ b/activitysim/abm/models/util/school_escort_tours_trips.py @@ -399,6 +399,10 @@ def merge_school_escort_trips_into_pipeline(): tours = pipeline.get_table("tours") trips = pipeline.get_table("trips") + # checking to see if there are school escort trips to merge in + if len(school_escort_trips) == 0: + return trips + # want to remove stops if school escorting takes place on that half tour so we can replace them with the actual stops out_se_tours = tours[ tours["school_esc_outbound"].isin(["pure_escort", "ride_share"]) @@ -603,6 +607,10 @@ def force_escortee_tour_modes_to_match_chauffeur(tours): # Does it even matter if trip modes are getting matched later? escort_bundles = inject.get_table("escort_bundles").to_frame() + if len(escort_bundles) == 0: + # do not need to do anything if no escorting + return tours + # grabbing the school tour ids for each school escort bundle se_tours = escort_bundles[["school_tour_ids", "chauf_tour_id"]].copy() # merging in chauffeur tour mode From 03e4b679caadad49a025609b7e07c0aeb6703f51 Mon Sep 17 00:00:00 2001 From: dhensle Date: Fri, 7 Apr 2023 11:15:49 -0700 Subject: [PATCH 16/58] tracing improvements for disaggregate accessibilities --- activitysim/core/tracing.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/activitysim/core/tracing.py b/activitysim/core/tracing.py index 07ba053266..6bbc2854de 100644 --- a/activitysim/core/tracing.py +++ b/activitysim/core/tracing.py @@ -776,30 +776,22 @@ def interaction_trace_rows(interaction_df, choosers, sample_size=None): households_table_name.pop(), ) - if choosers.index.name == "person_id" and persons_table_name in traceable_table_ids: + if ( + choosers.index.name in ["person_id", "proto_person_id"] + ) and persons_table_name in traceable_table_ids: slicer_column_name = choosers.index.name - targets = traceable_table_ids["persons"] - elif choosers.index.name == "household_id" and "households" in traceable_table_ids: + targets = traceable_table_ids[persons_table_name] + elif ( + choosers.index.name in ["household_id", "proto_household_id"] + ) and households_table_name in traceable_table_ids: slicer_column_name = choosers.index.name - targets = traceable_table_ids["households"] + targets = traceable_table_ids[households_table_name] elif "household_id" in choosers.columns and "households" in traceable_table_ids: slicer_column_name = "household_id" targets = traceable_table_ids[households_table_name] elif "person_id" in choosers.columns and persons_table_name in traceable_table_ids: slicer_column_name = "person_id" targets = traceable_table_ids[persons_table_name] - elif ( - choosers.index.name == "proto_person_id" - and persons_table_name in traceable_table_ids - ): - slicer_column_name = choosers.index.name - targets = traceable_table_ids[persons_table_name] - elif ( - choosers.index.name == "proto_household_id" - and households_table_name in traceable_table_ids - ): - slicer_column_name = choosers.index.name - targets = traceable_table_ids[households_table_name] elif ( choosers.index.name == "proto_tour_id" and "proto_tours" in traceable_table_ids ): From b4668362019c692bf4256a5f7e79c737a1bd3d5c Mon Sep 17 00:00:00 2001 From: dhensle Date: Fri, 7 Apr 2023 11:16:34 -0700 Subject: [PATCH 17/58] skip data bundle writing functionality --- activitysim/abm/models/util/estimation.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/activitysim/abm/models/util/estimation.py b/activitysim/abm/models/util/estimation.py index 6a5dbadf1f..8501f02552 100644 --- a/activitysim/abm/models/util/estimation.py +++ b/activitysim/abm/models/util/estimation.py @@ -46,11 +46,12 @@ def __init__(self, bundle_name, model_name, estimation_table_recipes): os.makedirs(output_dir) # make directory if needed # delete estimation files - unlink_files(self.output_directory(), file_types=("csv", "yaml")) + unlink_files(self.output_directory(), file_types=("csv", "yaml", "hdf")) if self.bundle_name != self.model_name: # kind of inelegant to always delete these, but ok as they are redundantly recreated for each sub model unlink_files( - self.output_directory(bundle_directory=True), file_types=("csv", "yaml") + self.output_directory(bundle_directory=True), + file_types=("csv", "yaml", "hdf"), ) # FIXME - not required? @@ -218,6 +219,13 @@ def write_omnibus_table(self): if len(self.omnibus_tables) == 0: return + settings = config.read_model_settings(ESTIMATION_SETTINGS_FILE_NAME) + + edbs_to_skip = settings.get("SKIP_BUNDLE_WRITE_FOR", []) + if self.bundle_name in edbs_to_skip: + self.debug(f"Skipping write to disk for {self.bundle_name}") + return + for omnibus_table, table_names in self.omnibus_tables.items(): self.debug( @@ -236,12 +244,20 @@ def write_omnibus_table(self): 1 if omnibus_table in self.omnibus_tables_append_columns else 0 ) - df = pd.concat([self.tables[t] for t in table_names], axis=concat_axis) + # df = pd.concat([self.tables[t] for t in table_names], axis=concat_axis) + if len(table_names) == 0: + # empty tables + df = pd.DataFrame() + else: + df = pd.concat([self.tables[t] for t in table_names], axis=concat_axis) + + self.debug(f"sorting tables: {table_names}") df.sort_index(ascending=True, inplace=True, kind="mergesort") file_path = self.output_file_path(omnibus_table, "csv") assert not os.path.isfile(file_path) + self.debug(f"writing table: {file_path}") df.to_csv(file_path, mode="a", index=True, header=True) self.debug("write_omnibus_choosers: %s" % file_path) From a9dda1c7af8bac45b24dfec67272be7126c65d8e Mon Sep 17 00:00:00 2001 From: dhensle Date: Fri, 7 Apr 2023 21:01:42 -0700 Subject: [PATCH 18/58] fixing restart issue with missing destination_size --- activitysim/abm/models/disaggregate_accessibility.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index b8b8bfd3e1..2bb78bf60e 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -741,11 +741,13 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id): logsums["proto_disaggregate_accessibility"] = access_df # Drop any tables prematurely created - for tablename in [ - "school_destination_size", - "workplace_destination_size", - ]: - pipeline.drop_table(tablename) + # FIXME: dropping size tables breaks restart functionality for location choice models. + # hopefully this pipeline mess just goes away with move away from orca.... + # for tablename in [ + # "school_destination_size", + # "workplace_destination_size", + # ]: + # pipeline.drop_table(tablename) for ch in list(pipeline.get_rn_generator().channels.keys()): pipeline.get_rn_generator().drop_channel(ch) From 560db34549ba996e3f3eaa95f2acc86839a33287 Mon Sep 17 00:00:00 2001 From: dhensle Date: Fri, 7 Apr 2023 21:02:08 -0700 Subject: [PATCH 19/58] missed hdf cleanup --- activitysim/abm/models/util/estimation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/activitysim/abm/models/util/estimation.py b/activitysim/abm/models/util/estimation.py index 8501f02552..7c23b97b5e 100644 --- a/activitysim/abm/models/util/estimation.py +++ b/activitysim/abm/models/util/estimation.py @@ -46,12 +46,11 @@ def __init__(self, bundle_name, model_name, estimation_table_recipes): os.makedirs(output_dir) # make directory if needed # delete estimation files - unlink_files(self.output_directory(), file_types=("csv", "yaml", "hdf")) + unlink_files(self.output_directory(), file_types=("csv", "yaml")) if self.bundle_name != self.model_name: # kind of inelegant to always delete these, but ok as they are redundantly recreated for each sub model unlink_files( - self.output_directory(bundle_directory=True), - file_types=("csv", "yaml", "hdf"), + self.output_directory(bundle_directory=True), file_types=("csv", "yaml") ) # FIXME - not required? From 53ecea8ef9a2936c1f33be715e3d8798ea7e64dc Mon Sep 17 00:00:00 2001 From: Joel Freedman Date: Mon, 24 Apr 2023 17:53:14 -0700 Subject: [PATCH 20/58] Added estimation functionality for external worker identification model. --- activitysim/estimation/larch/__init__.py | 2 +- .../larch/external_worker_identification.py | 80 +++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 activitysim/estimation/larch/external_worker_identification.py diff --git a/activitysim/estimation/larch/__init__.py b/activitysim/estimation/larch/__init__.py index 1d87e1ce12..d5ff3d0921 100644 --- a/activitysim/estimation/larch/__init__.py +++ b/activitysim/estimation/larch/__init__.py @@ -9,7 +9,7 @@ from .scheduling import * from .simple_simulate import * from .stop_frequency import * - +from .external_worker_identification import * def component_model(name, *args, **kwargs): if isinstance(name, str): diff --git a/activitysim/estimation/larch/external_worker_identification.py b/activitysim/estimation/larch/external_worker_identification.py new file mode 100644 index 0000000000..63d267cf09 --- /dev/null +++ b/activitysim/estimation/larch/external_worker_identification.py @@ -0,0 +1,80 @@ +import os +from typing import Collection + +import numpy as np +import pandas as pd +import yaml +from larch import DataFrames, Model, P, X +from larch.util import Dict + +from .general import ( + apply_coefficients, + dict_of_linear_utility_from_spec, + remove_apostrophes, +) +from .simple_simulate import simple_simulate_data + + +def external_worker_identification_model( + name="external_worker_identification", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + data = simple_simulate_data( + name=name, + edb_directory=edb_directory, + values_index_col="person_id", + ) + coefficients = data.coefficients + # coef_template = data.coef_template # not used + spec = data.spec + chooser_data = data.chooser_data + settings = data.settings + + altnames = list(spec.columns[3:]) + altcodes = range(len(altnames)) + + chooser_data = remove_apostrophes(chooser_data) + chooser_data.fillna(0, inplace=True) + + # Remove choosers with invalid observed choice + chooser_data = chooser_data[chooser_data["override_choice"] >= 0] + + m = Model() + # One of the alternatives is coded as 0, so + # we need to explicitly initialize the MNL nesting graph + # and set to root_id to a value other than zero. + m.initialize_graph(alternative_codes=altcodes, root_id=99) + + m.utility_co = dict_of_linear_utility_from_spec( + spec, + "Label", + dict(zip(altnames, altcodes)), + ) + + apply_coefficients(coefficients, m) + + d = DataFrames( + co=chooser_data, + av=True, + alt_codes=altcodes, + alt_names=altnames, + ) + + m.dataservice = d + m.choice_co_code = "override_choice" + + if return_data: + return ( + m, + Dict( + edb_directory=data.edb_directory, + chooser_data=chooser_data, + coefficients=coefficients, + spec=spec, + altnames=altnames, + altcodes=altcodes, + ), + ) + + return m From 8205609ba5a68d3544ccdbba0a4e518241f6965a Mon Sep 17 00:00:00 2001 From: Joel Freedman Date: Mon, 24 Apr 2023 18:01:30 -0700 Subject: [PATCH 21/58] Added code to estimate external worker location choice model --- activitysim/estimation/larch/location_choice.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py index 74a426e714..64aa471410 100644 --- a/activitysim/estimation/larch/location_choice.py +++ b/activitysim/estimation/larch/location_choice.py @@ -330,6 +330,12 @@ def workplace_location_model(**kwargs): **kwargs, ) +def external_workplace_location_model(**kwargs): + unused = kwargs.pop("name", None) + return location_choice_model( + name="external_workplace_location", + **kwargs, + ) def school_location_model(**kwargs): unused = kwargs.pop("name", None) From 47f22e21b6cb9a1a2a23f54dd0a4ff9269e7c28d Mon Sep 17 00:00:00 2001 From: dhensle Date: Mon, 15 May 2023 11:32:58 -0700 Subject: [PATCH 22/58] miscellaneous est fixes --- activitysim/abm/models/cdap.py | 1 + activitysim/abm/models/joint_tour_destination.py | 2 +- .../abm/models/non_mandatory_tour_frequency.py | 15 ++++++++------- activitysim/abm/models/school_escorting.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index a8566f73be..761c5c970e 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -190,6 +190,7 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id): estimator.write_choices(choices) choices = estimator.get_survey_values(choices, "persons", "cdap_activity") if add_joint_tour_utility: + hh_joint.index.name = 'household_id' hh_joint = estimator.get_survey_values( hh_joint, "households", "has_joint_tour" ) diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py index 02651d2a44..02f0dbe84e 100644 --- a/activitysim/abm/models/joint_tour_destination.py +++ b/activitysim/abm/models/joint_tour_destination.py @@ -62,7 +62,7 @@ def joint_tour_destination( estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( - tours, + joint_tours, persons_merged, want_logsums, want_sample_table, diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index 521f49c47c..b9a6db1fee 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -345,13 +345,14 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i # make sure they created the right tours survey_tours = estimation.manager.get_survey_table("tours").sort_index() - non_mandatory_survey_tours = survey_tours[ - survey_tours.tour_category == "non_mandatory" - ] - assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) - assert non_mandatory_survey_tours.index.equals( - non_mandatory_tours.sort_index().index - ) + # FIXME below check needs to remove the pure-escort tours from the survey tours table + # non_mandatory_survey_tours = survey_tours[ + # survey_tours.tour_category == "non_mandatory" + # ] + # assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) + # assert non_mandatory_survey_tours.index.equals( + # non_mandatory_tours.sort_index().index + # ) # make sure they created tours with the expected tour_ids columns = ["person_id", "household_id", "tour_type", "tour_category"] diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index 13e09e08b0..319c0c0e54 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -511,7 +511,7 @@ def school_escorting( escort_bundles = pd.concat(escort_bundles) - # possible to get + # Only want to create bundles and tours and trips if at least one household has school escorting if len(escort_bundles) > 0: escort_bundles["bundle_id"] = ( escort_bundles["household_id"] * 10 From 7354f7a0104a564c8e7f3f06e06be69d810f431b Mon Sep 17 00:00:00 2001 From: Joel Freedman Date: Wed, 17 May 2023 22:06:37 -0700 Subject: [PATCH 23/58] update simple_simulate.py to include work_from_home_model estimation functionality --- activitysim/estimation/larch/simple_simulate.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py index 0ee055fe6c..940447042d 100644 --- a/activitysim/estimation/larch/simple_simulate.py +++ b/activitysim/estimation/larch/simple_simulate.py @@ -230,6 +230,21 @@ def free_parking_model( }, # True is free parking, False is paid parking, names match spec positions ) +def work_from_home_model( + name="work_from_home", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + True: 1, + False: 2, + }, # True is work from home, false is does not work from home, names match spec positions + ) + def mandatory_tour_frequency_model( name="mandatory_tour_frequency", From 5847bda77735c70203b78926154c3574dfa92f2c Mon Sep 17 00:00:00 2001 From: Joel Freedman Date: Thu, 18 May 2023 13:39:24 -0700 Subject: [PATCH 24/58] Added code for external_non_mandatory_identification_model estimation to simple_simulate.py --- activitysim/estimation/larch/simple_simulate.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py index 940447042d..f5504f1f47 100644 --- a/activitysim/estimation/larch/simple_simulate.py +++ b/activitysim/estimation/larch/simple_simulate.py @@ -310,3 +310,19 @@ def joint_tour_participation_model( 1: 2, # 1 means not participate, alternative 2 }, ) + +def external_non_mandatory_identification_model( + name="external_non_mandatory_identification", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + values_index_col="tour_id", + choices={ + 0: 1, # 0 means external, alternative 1 + 1: 2, # 1 means not external, alternative 2 + }, + ) From 88fe4a7373b51825543c9c600bfe2862cc3f243a Mon Sep 17 00:00:00 2001 From: dhensle Date: Wed, 21 Jun 2023 11:31:34 -0700 Subject: [PATCH 25/58] added landuse to trip dest locals_d --- activitysim/abm/models/trip_destination.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 545cfee29f..01554d984d 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -134,6 +134,7 @@ def _destination_sample( "size_terms": size_term_matrix, "size_terms_array": size_term_matrix.df.to_numpy(), "timeframe": "trip", + "land_use": inject.get_table("land_use").to_frame(), } ) locals_dict.update(skims) @@ -829,6 +830,7 @@ def trip_destination_simulate( "size_terms": size_term_matrix, "size_terms_array": size_term_matrix.df.to_numpy(), "timeframe": "trip", + "land_use": inject.get_table("land_use").to_frame(), } ) locals_dict.update(skims) From 90fce8d8696ce9c0224e19f597c932e241b30c82 Mon Sep 17 00:00:00 2001 From: dhensle Date: Mon, 3 Jul 2023 18:00:26 -0700 Subject: [PATCH 26/58] adding transit pass estimation functions --- .../estimation/larch/simple_simulate.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py index f5504f1f47..9681e28875 100644 --- a/activitysim/estimation/larch/simple_simulate.py +++ b/activitysim/estimation/larch/simple_simulate.py @@ -326,3 +326,36 @@ def external_non_mandatory_identification_model( 1: 2, # 1 means not external, alternative 2 }, ) + + +def transit_pass_subsidy_model( + name="transit_pass_subsidy", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + print("transit pass subsidy") + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + 0: 1, # 0 means no subsidy, alternative 1 + 1: 2, # 1 means subsidy, alternative 2 + }, + ) + + +def transit_pass_ownership_model( + name="transit_pass_ownership", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + 0: 1, # 0 means no pass, alternative 1 + 1: 2, # 1 means pass, alternative 2 + }, + ) From 901e2fcaee6650a2e47e33a1109339c61c523e8a Mon Sep 17 00:00:00 2001 From: dhensle Date: Tue, 4 Jul 2023 11:35:16 -0700 Subject: [PATCH 27/58] transponder ownership estimation call --- activitysim/estimation/larch/simple_simulate.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py index 9681e28875..87ec5594c8 100644 --- a/activitysim/estimation/larch/simple_simulate.py +++ b/activitysim/estimation/larch/simple_simulate.py @@ -359,3 +359,18 @@ def transit_pass_ownership_model( 1: 2, # 1 means pass, alternative 2 }, ) + +def transponder_ownership_model( + name="transponder_ownership", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + 0: 1, # 0 means no pass, alternative 1 + 1: 2, # 1 means pass, alternative 2 + }, + ) \ No newline at end of file From c5c0d80789520948bbf577a423414eaf5e41a867 Mon Sep 17 00:00:00 2001 From: dhensle Date: Fri, 14 Jul 2023 17:14:40 -0700 Subject: [PATCH 28/58] estimation stop frequency improvements --- activitysim/abm/models/stop_frequency.py | 2 +- activitysim/estimation/larch/stop_frequency.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/activitysim/abm/models/stop_frequency.py b/activitysim/abm/models/stop_frequency.py index 94a208075f..4e05fe8749 100644 --- a/activitysim/abm/models/stop_frequency.py +++ b/activitysim/abm/models/stop_frequency.py @@ -199,7 +199,7 @@ def stop_frequency( print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}") different = True trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index)] - if len(survey_trips_not_in_trips) > 0: + if len(trips_not_in_survey_trips) > 0: print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}") different = True assert not different diff --git a/activitysim/estimation/larch/stop_frequency.py b/activitysim/estimation/larch/stop_frequency.py index c572af5e87..e580491e02 100644 --- a/activitysim/estimation/larch/stop_frequency.py +++ b/activitysim/estimation/larch/stop_frequency.py @@ -42,8 +42,7 @@ def stop_frequency_data( seg_purpose = seg_["primary_purpose"] seg_subdir = Path(os.path.join(edb_directory, seg_purpose)) segment_coef[seg_["primary_purpose"]] = pd.read_csv( - seg_subdir / seg_["COEFFICIENTS"], - index_col="coefficient_name", + seg_subdir / seg_["COEFFICIENTS"], index_col="coefficient_name", comment="#" ) for seg in segments: @@ -89,13 +88,13 @@ def stop_frequency_data( seg_purpose = seg["primary_purpose"] seg_subdir = Path(os.path.join(edb_directory, seg_purpose)) coeffs_ = pd.read_csv( - seg_subdir / seg["COEFFICIENTS"], index_col="coefficient_name" + seg_subdir / seg["COEFFICIENTS"], index_col="coefficient_name", comment="#" ) coeffs_.index = pd.Index( [f"{i}_{seg_purpose}" for i in coeffs_.index], name="coefficient_name" ) seg_coefficients.append(coeffs_) - spec = pd.read_csv(seg_subdir / "stop_frequency_SPEC_.csv") + spec = pd.read_csv(seg_subdir / "stop_frequency_SPEC_.csv", comment="#") spec = remove_apostrophes(spec, ["Label"]) # spec.iloc[:, 3:] = spec.iloc[:, 3:].applymap(lambda x: f"{x}_{seg_purpose}" if not pd.isna(x) else x) seg_spec.append(spec) From c0fdfde8eb866d55897f45f3cb8e131fa50f1c78 Mon Sep 17 00:00:00 2001 From: dhensle Date: Wed, 19 Jul 2023 12:31:52 -0700 Subject: [PATCH 29/58] sampling location choice in estimation mode, reindex and landuse available --- activitysim/abm/models/location_choice.py | 39 +++++++++++++++++------ 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 8470368708..7027c87dd8 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -17,6 +17,7 @@ ) from activitysim.core.interaction_sample import interaction_sample from activitysim.core.interaction_sample_simulate import interaction_sample_simulate +from activitysim.core.util import reindex from .util import estimation from .util import logsums as logsum @@ -138,15 +139,8 @@ def _location_sample( logger.info("Running %s with %d persons" % (trace_label, len(choosers.index))) sample_size = model_settings["SAMPLE_SIZE"] - if config.setting("disable_destination_sampling", False) or ( - estimator and estimator.want_unsampled_alternatives - ): - # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count - logger.info( - "Estimation mode for %s using unsampled alternatives short_circuit_choices" - % (trace_label,) - ) - sample_size = 0 + if estimator: + sample_size = model_settings.get('ESTIMATION_SAMPLE_SIZE', 0) locals_d = { "skims": skims, @@ -154,6 +148,8 @@ def _location_sample( "orig_col_name": skims.orig_key, # added for sharrow flows "dest_col_name": skims.dest_key, # added for sharrow flows "timeframe": "timeless", + "reindex": reindex, + "land_use": inject.get_table("land_use").to_frame(), } constants = config.get_model_constants(model_settings) locals_d.update(constants) @@ -470,6 +466,29 @@ def run_location_sample( trace_label=trace_label, ) + # FIXME temporary code to ensure sampled alternative is in choices for estimation + # Hack to get shorter run times when you don't care about creating EDB for location choice models + if estimator: + # grabbing survey values + survey_persons = estimation.manager.get_survey_table("persons") + if 'school_location' in trace_label: + survey_choices = survey_persons['school_zone_id'].reset_index() + elif ('workplace_location' in trace_label) and ('external' not in trace_label): + survey_choices = survey_persons['workplace_zone_id'].reset_index() + else: + return choices + survey_choices.columns = ['person_id', 'alt_dest'] + survey_choices = survey_choices[survey_choices['person_id'].isin(choices.index) & (survey_choices.alt_dest > 0)] + # merging survey destination into table if not available + joined_data = survey_choices.merge(choices, on=['person_id', 'alt_dest'], how='left', indicator=True) + missing_rows = joined_data[joined_data['_merge'] == 'left_only'] + missing_rows['pick_count'] = 1 + if len(missing_rows) > 0: + new_choices = missing_rows[['person_id', 'alt_dest', 'prob', 'pick_count']].set_index('person_id') + choices = choices.append(new_choices, ignore_index=False).sort_index() + # making probability the mean of all other sampled destinations by person + choices['prob'] = choices['prob'].fillna(choices.groupby('person_id')['prob'].transform('mean')) + return choices @@ -601,6 +620,8 @@ def run_location_simulate( "orig_col_name": skims.orig_key, # added for sharrow flows "dest_col_name": skims.dest_key, # added for sharrow flows "timeframe": "timeless", + "reindex": reindex, + "land_use": inject.get_table("land_use").to_frame(), } constants = config.get_model_constants(model_settings) if constants is not None: From a25c891acff8afe4462d047e1912747321ea1481 Mon Sep 17 00:00:00 2001 From: dhensle Date: Wed, 26 Jul 2023 22:45:50 -0700 Subject: [PATCH 30/58] cdap larch updates --- activitysim/estimation/larch/cdap.py | 143 ++++++++++++++++++++++++--- 1 file changed, 127 insertions(+), 16 deletions(-) diff --git a/activitysim/estimation/larch/cdap.py b/activitysim/estimation/larch/cdap.py index fdb801de03..ee96ee5aab 100644 --- a/activitysim/estimation/larch/cdap.py +++ b/activitysim/estimation/larch/cdap.py @@ -19,8 +19,10 @@ _logger = logging.getLogger(logger_name) +MAX_HHSIZE = 5 -def generate_alternatives(n_persons): + +def generate_alternatives(n_persons, add_joint=False): """ Generate a dictionary of CDAP alternatives. @@ -39,8 +41,14 @@ def generate_alternatives(n_persons): alt_names = list( "".join(i) for i in itertools.product(basic_patterns, repeat=n_persons) ) + if add_joint: + pattern = r"[MN]" + joint_alts = [ + alt + "J" for alt in alt_names if len(re.findall(pattern, alt)) >= 2 + ] + alt_names = alt_names + joint_alts alt_codes = np.arange(1, len(alt_names) + 1) - return Dict(zip(alt_names, alt_codes)) + return dict(zip(alt_names, alt_codes)) def apply_replacements(expression, prefix, tokens): @@ -67,7 +75,9 @@ def apply_replacements(expression, prefix, tokens): return expression -def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()): +def cdap_base_utility_by_person( + model, n_persons, spec, alts=None, value_tokens=(), add_joint=False +): """ Build the base utility by person for each pattern. @@ -100,7 +110,7 @@ def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens= model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"]) else: if alts is None: - alts = generate_alternatives(n_persons) + alts = generate_alternatives(n_persons, add_joint) person_numbers = range(1, n_persons + 1) for pnum in person_numbers: for i in spec.index: @@ -220,13 +230,71 @@ def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficie model.utility_co[anum] += linear_component -def cdap_split_data(households, values): +def cdap_joint_tour_utility(model, n_persons, alts, joint_coef, values): + """ + FIXME: Not fully implemented!!!! + + Code is adapted from the cdap model in ActivitySim with the joint tour component + Structure is pretty much in place, but dependencies need to be filtered out. + """ + + for row in joint_coef.itertuples(): + for aname, anum in alts.items(): + # only adding joint tour utility to alternatives with joint tours + if "J" not in aname: + continue + expression = row.Expression + dependency_name = row.dependency + coefficient = row.coefficient + + # dealing with dependencies + if dependency_name in ["M_px", "N_px", "H_px"]: + if "_pxprod" in expression: + prod_conds = row.Expression.split("|") + expanded_expressions = [ + tup + for tup in itertools.product( + range(len(prod_conds)), repeat=n_persons + ) + ] + for expression_tup in expanded_expressions: + expression_list = [] + dependency_list = [] + for counter in range(len(expression_tup)): + expression_list.append( + prod_conds[expression_tup[counter]].replace( + "xprod", str(counter + 1) + ) + ) + if expression_tup[counter] == 0: + dependency_list.append( + dependency_name.replace("x", str(counter + 1)) + ) + + expression_value = "&".join(expression_list) + # FIXME only apply to alternative if dependency satisfied + bug + model.utility_co[anum] += X(expression_value) * P(coefficient) + + elif "_px" in expression: + for pnum in range(1, n_persons + 1): + dependency_name = row.dependency.replace("x", str(pnum)) + expression = row.Expression.replace("x", str(pnum)) + # FIXME only apply to alternative if dependency satisfied + bug + model.utility_co[anum] += X(expression) * P(coefficient) + + else: + model.utility_co[anum] += X(expression) * P(coefficient) + + +def cdap_split_data(households, values, add_joint): if "cdap_rank" not in values: raise ValueError("assign cdap_rank to values first") # only process the first 5 household members - values = values[values.cdap_rank <= 5] + values = values[values.cdap_rank <= MAX_HHSIZE] cdap_data = {} - for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, 5)): + for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, MAX_HHSIZE)): if hhsize == 1: v = pd.merge(values, hhs_part.household_id, on="household_id").set_index( "household_id" @@ -239,16 +307,31 @@ def cdap_split_data(households, values): ) v.columns = [f"p{i[1]}_{i[0]}" for i in v.columns] for agglom in ["override_choice", "model_choice"]: - v[agglom] = v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]].sum(1) + v[agglom] = ( + v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]] + .fillna("H") + .sum(1) + ) + if add_joint: + joint_tour_indicator = ( + hhs_part.set_index("household_id") + .reindex(v.index) + .has_joint_tour + ) + pd.testing.assert_index_equal(v.index, joint_tour_indicator.index) + v[agglom] = np.where( + joint_tour_indicator == 1, v[agglom] + "J", v[agglom] + ) cdap_data[hhsize] = v + return cdap_data -def cdap_dataframes(households, values): - data = cdap_split_data(households, values) +def cdap_dataframes(households, values, add_joint): + data = cdap_split_data(households, values, add_joint) dfs = {} for hhsize in data.keys(): - alts = generate_alternatives(hhsize) + alts = generate_alternatives(hhsize, add_joint) dfs[hhsize] = DataFrames( co=data[hhsize], alt_names=alts.keys(), @@ -296,6 +379,7 @@ def cdap_data( spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", + joint_coeffs_file="{name}_joint_tour_coefficients.csv", ): edb_directory = edb_directory.format(name=name) if not os.path.exists(edb_directory): @@ -326,7 +410,7 @@ def read_yaml(filename, **kwargs): if person_type_map is None: raise KeyError("PERSON_TYPE_MAP missing from cdap_settings.yaml") - person_rank = cdap.assign_cdap_rank(persons, person_type_map) + # person_rank = cdap.assign_cdap_rank(persons, person_type_map) coefficients = read_csv( coefficients_file, @@ -341,9 +425,28 @@ def read_yaml(filename, **kwargs): comment="#", ) + try: + joint_coef = read_csv( + joint_coeffs_file, + # dtype={"interaction_ptypes": str}, + # keep_default_na=False, + comment="#", + ) + add_joint = True + except FileNotFoundError: + joint_coef = None + add_joint = False + print("Including joint tour utiltiy?:", add_joint) + spec1 = read_csv(spec1_file, comment="#") values = read_csv(chooser_data_file, comment="#") - values["cdap_rank"] = person_rank + person_rank = cdap.assign_cdap_rank( + persons[persons.household_id.isin(values.household_id)] + .set_index("person_id") + .reindex(values.person_id), + person_type_map, + ) + values["cdap_rank"] = person_rank.values return Dict( edb_directory=Path(edb_directory), @@ -353,6 +456,8 @@ def read_yaml(filename, **kwargs): coefficients=coefficients, households=hhs, settings=settings, + joint_coef=joint_coef, + add_joint=add_joint, ) @@ -365,6 +470,7 @@ def cdap_model( spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", + joint_coeffs_file="{name}_joint_tour_coefficients.csv", return_data=False, ): d = cdap_data( @@ -377,6 +483,7 @@ def cdap_model( spec1_file=spec1_file, settings_file=settings_file, chooser_data_file=chooser_data_file, + joint_coeffs_file=joint_coeffs_file, ) households = d.households @@ -384,8 +491,9 @@ def cdap_model( spec1 = d.spec1 interaction_coef = d.interaction_coef coefficients = d.coefficients + add_joint = d.add_joint - cdap_dfs = cdap_dataframes(households, values) + cdap_dfs = cdap_dataframes(households, values, add_joint) m = {} _logger.info(f"building for model 1") m[1] = Model(dataservice=cdap_dfs[1]) @@ -398,12 +506,15 @@ def cdap_model( interaction_coef["cardinality"] = interaction_coef[ "interaction_ptypes" ].str.len() - for s in [2, 3, 4, 5]: + for s in range(2, MAX_HHSIZE + 1): + # for s in [2, 3, 4, 5]: _logger.info(f"building for model {s}") m[s] = Model(dataservice=cdap_dfs[s]) - alts = generate_alternatives(s) + alts = generate_alternatives(s, add_joint) cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns) cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients) + # if add_joint: + # cdap_joint_tour_utility(m[s], s, alts, d.joint_coef, values) m[s].choice_any = True m[s].availability_any = True From 851ba2189ca034255dc21cde502384de20610568 Mon Sep 17 00:00:00 2001 From: dhensle Date: Wed, 26 Jul 2023 22:49:29 -0700 Subject: [PATCH 31/58] blacken --- activitysim/abm/models/cdap.py | 2 +- activitysim/abm/models/location_choice.py | 33 ++++++++++++------- activitysim/estimation/larch/__init__.py | 1 + .../estimation/larch/location_choice.py | 2 ++ .../estimation/larch/simple_simulate.py | 17 ++++++---- 5 files changed, 35 insertions(+), 20 deletions(-) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 761c5c970e..f41bea9eee 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -190,7 +190,7 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id): estimator.write_choices(choices) choices = estimator.get_survey_values(choices, "persons", "cdap_activity") if add_joint_tour_utility: - hh_joint.index.name = 'household_id' + hh_joint.index.name = "household_id" hh_joint = estimator.get_survey_values( hh_joint, "households", "has_joint_tour" ) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 7027c87dd8..7e9a76d551 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -140,7 +140,7 @@ def _location_sample( sample_size = model_settings["SAMPLE_SIZE"] if estimator: - sample_size = model_settings.get('ESTIMATION_SAMPLE_SIZE', 0) + sample_size = model_settings.get("ESTIMATION_SAMPLE_SIZE", 0) locals_d = { "skims": skims, @@ -471,23 +471,32 @@ def run_location_sample( if estimator: # grabbing survey values survey_persons = estimation.manager.get_survey_table("persons") - if 'school_location' in trace_label: - survey_choices = survey_persons['school_zone_id'].reset_index() - elif ('workplace_location' in trace_label) and ('external' not in trace_label): - survey_choices = survey_persons['workplace_zone_id'].reset_index() + if "school_location" in trace_label: + survey_choices = survey_persons["school_zone_id"].reset_index() + elif ("workplace_location" in trace_label) and ("external" not in trace_label): + survey_choices = survey_persons["workplace_zone_id"].reset_index() else: return choices - survey_choices.columns = ['person_id', 'alt_dest'] - survey_choices = survey_choices[survey_choices['person_id'].isin(choices.index) & (survey_choices.alt_dest > 0)] + survey_choices.columns = ["person_id", "alt_dest"] + survey_choices = survey_choices[ + survey_choices["person_id"].isin(choices.index) + & (survey_choices.alt_dest > 0) + ] # merging survey destination into table if not available - joined_data = survey_choices.merge(choices, on=['person_id', 'alt_dest'], how='left', indicator=True) - missing_rows = joined_data[joined_data['_merge'] == 'left_only'] - missing_rows['pick_count'] = 1 + joined_data = survey_choices.merge( + choices, on=["person_id", "alt_dest"], how="left", indicator=True + ) + missing_rows = joined_data[joined_data["_merge"] == "left_only"] + missing_rows["pick_count"] = 1 if len(missing_rows) > 0: - new_choices = missing_rows[['person_id', 'alt_dest', 'prob', 'pick_count']].set_index('person_id') + new_choices = missing_rows[ + ["person_id", "alt_dest", "prob", "pick_count"] + ].set_index("person_id") choices = choices.append(new_choices, ignore_index=False).sort_index() # making probability the mean of all other sampled destinations by person - choices['prob'] = choices['prob'].fillna(choices.groupby('person_id')['prob'].transform('mean')) + choices["prob"] = choices["prob"].fillna( + choices.groupby("person_id")["prob"].transform("mean") + ) return choices diff --git a/activitysim/estimation/larch/__init__.py b/activitysim/estimation/larch/__init__.py index d5ff3d0921..a175db6e7d 100644 --- a/activitysim/estimation/larch/__init__.py +++ b/activitysim/estimation/larch/__init__.py @@ -11,6 +11,7 @@ from .stop_frequency import * from .external_worker_identification import * + def component_model(name, *args, **kwargs): if isinstance(name, str): m = globals().get(f"{name}_model") diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py index 64aa471410..41e2f78e63 100644 --- a/activitysim/estimation/larch/location_choice.py +++ b/activitysim/estimation/larch/location_choice.py @@ -330,6 +330,7 @@ def workplace_location_model(**kwargs): **kwargs, ) + def external_workplace_location_model(**kwargs): unused = kwargs.pop("name", None) return location_choice_model( @@ -337,6 +338,7 @@ def external_workplace_location_model(**kwargs): **kwargs, ) + def school_location_model(**kwargs): unused = kwargs.pop("name", None) return location_choice_model( diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py index 87ec5594c8..843311da17 100644 --- a/activitysim/estimation/larch/simple_simulate.py +++ b/activitysim/estimation/larch/simple_simulate.py @@ -230,6 +230,7 @@ def free_parking_model( }, # True is free parking, False is paid parking, names match spec positions ) + def work_from_home_model( name="work_from_home", edb_directory="output/estimation_data_bundle/{name}/", @@ -311,6 +312,7 @@ def joint_tour_participation_model( }, ) + def external_non_mandatory_identification_model( name="external_non_mandatory_identification", edb_directory="output/estimation_data_bundle/{name}/", @@ -339,8 +341,8 @@ def transit_pass_subsidy_model( edb_directory=edb_directory, return_data=return_data, choices={ - 0: 1, # 0 means no subsidy, alternative 1 - 1: 2, # 1 means subsidy, alternative 2 + 0: 1, # 0 means no subsidy, alternative 1 + 1: 2, # 1 means subsidy, alternative 2 }, ) @@ -355,11 +357,12 @@ def transit_pass_ownership_model( edb_directory=edb_directory, return_data=return_data, choices={ - 0: 1, # 0 means no pass, alternative 1 - 1: 2, # 1 means pass, alternative 2 + 0: 1, # 0 means no pass, alternative 1 + 1: 2, # 1 means pass, alternative 2 }, ) + def transponder_ownership_model( name="transponder_ownership", edb_directory="output/estimation_data_bundle/{name}/", @@ -370,7 +373,7 @@ def transponder_ownership_model( edb_directory=edb_directory, return_data=return_data, choices={ - 0: 1, # 0 means no pass, alternative 1 - 1: 2, # 1 means pass, alternative 2 + 0: 1, # 0 means no pass, alternative 1 + 1: 2, # 1 means pass, alternative 2 }, - ) \ No newline at end of file + ) From cb4a0cbb68a800e5362f0367de570459530dffc6 Mon Sep 17 00:00:00 2001 From: Joel Freedman Date: Mon, 14 Aug 2023 15:53:50 -0700 Subject: [PATCH 32/58] added estimation functionality for external_non_mandatory_destination model --- activitysim/estimation/larch/location_choice.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py index 41e2f78e63..7c88531884 100644 --- a/activitysim/estimation/larch/location_choice.py +++ b/activitysim/estimation/larch/location_choice.py @@ -374,6 +374,12 @@ def non_mandatory_tour_destination_model(**kwargs): **kwargs, ) +def external_non_mandatory_destination_model(**kwargs): + unused = kwargs.pop("name", None) + return location_choice_model( + name="external_non_mandatory_destination", + **kwargs, + ) def trip_destination_model(**kwargs): unused = kwargs.pop("name", None) From 7ac64a10e4d2fe07e3f3cc3c4b2bca295945eaf3 Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 17 Aug 2023 14:19:52 -0700 Subject: [PATCH 33/58] nmtf estimation & enhancements --- .../models/non_mandatory_tour_frequency.py | 10 +- activitysim/abm/models/util/overlap.py | 93 ++++++++++++++++ activitysim/core/config.py | 2 +- .../estimation/larch/nonmand_tour_freq.py | 102 +++++++++++++++++- 4 files changed, 202 insertions(+), 5 deletions(-) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index b9a6db1fee..a76a3650bf 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -20,7 +20,7 @@ from .util import annotate from .util.school_escort_tours_trips import recompute_tour_count_statistics -from .util.overlap import person_max_window +from .util.overlap import person_max_window, person_available_periods from .util.tour_frequency import process_non_mandatory_tours logger = logging.getLogger(__name__) @@ -166,7 +166,10 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i preprocessor_settings = model_settings.get("preprocessor", None) if preprocessor_settings: - locals_dict = {"person_max_window": person_max_window} + locals_dict = { + "person_max_window": person_max_window, + "person_available_periods": person_available_periods, + } expressions.assign_columns( df=choosers, @@ -259,6 +262,9 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i choices_list.append(choices) + # FIXME only want to keep actual purposes, adding cols in alts will mess this up + # this is complicated by canonical_ids calculated based on alts if not specified explicitly + # thus, adding column to input alts will change IDs and break estimation mode.... del alternatives["tot_tours"] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate diff --git a/activitysim/abm/models/util/overlap.py b/activitysim/abm/models/util/overlap.py index 70fadfbd43..f7773a14a3 100644 --- a/activitysim/abm/models/util/overlap.py +++ b/activitysim/abm/models/util/overlap.py @@ -250,3 +250,96 @@ def person_max_window(persons): max_window.index = persons.index return max_window + + +def calculate_consecutive(array): + # Append zeros columns at either sides of counts + append1 = np.zeros((array.shape[0], 1), dtype=int) + array_ext = np.column_stack((append1, array, append1)) + + # Get start and stop indices with 1s as triggers + diffs = np.diff((array_ext == 1).astype(int), axis=1) + starts = np.argwhere(diffs == 1) + stops = np.argwhere(diffs == -1) + + # Get intervals using differences between start and stop indices + intvs = stops[:, 1] - starts[:, 1] + + # Store intervals as a 2D array for further vectorized ops to make. + c = np.bincount(starts[:, 0]) + mask = np.arange(c.max()) < c[:, None] + intvs2D = mask.astype(float) + intvs2D[mask] = intvs + + # Get max along each row as final output + out = intvs2D.max(1).astype(int) + return out + + +def person_available_periods(persons, start_bin=None, end_bin=None, continuous=False): + """ + Returns the number of available time period bins foreach person in persons. + Can limit the calculation to include starting and/or ending bins. + Can return either the total number of available time bins with continuous = True, + or only the maximum + + This is equivalent to person_max_window if no start/end bins provided and continous=True + + time bins are inclusive, i.e. [start_bin, end_bin] + + e.g. + available out of timetable has dummy first and last bins + available = [ + [1,1,1,1,1,1,1,1,1,1,1,1], + [1,1,0,1,1,0,0,1,0,1,0,1], + #-,0,1,2,3,4,5,6,7,8,9,- time bins + ] + returns: + for start_bin=None, end_bin=None, continuous=False: (10, 5) + for start_bin=None, end_bin=None, continuous=True: (10, 2) + for start_bin=5, end_bin=9, continuous=False: (5, 2) + for start_bin=5, end_bin=9, continuous=True: (5, 1) + + + Parameters + ---------- + start_bin : (int) starting time bin to include starting from 0 + end_bin : (int) ending time bin to include + continuous : (bool) count all available bins if false or just largest continuous run if True + + Returns + ------- + pd.Series of the number of available time bins indexed by person ID + """ + timetable = inject.get_injectable("timetable") + + # ndarray with one row per person and one column per time period + # array value of 1 where free periods and 0 elsewhere + s = pd.Series(persons.index.values, index=persons.index) + + # first and last bins are dummys in the time table + # so if you have 48 half hour time periods, shape is (len(persons), 50) + available = timetable.individually_available(s) + + # Create a mask to exclude bins before the starting bin and after the ending bin + mask = np.ones(available.shape[1], dtype=bool) + mask[0] = False + mask[len(mask) - 1] = False + if start_bin is not None: + # +1 needed due to dummy first bin + mask[: start_bin + 1] = False + if end_bin is not None: + # +2 for dummy first bin and inclusive end_bin + mask[end_bin + 2 :] = False + + # Apply the mask to the array + masked_array = available[:, mask] + + # Calculate the number of available time periods for each person + availability = np.sum(masked_array, axis=1) + + if continuous: + availability = calculate_consecutive(masked_array) + + availability = pd.Series(availability, index=persons.index) + return availability diff --git a/activitysim/core/config.py b/activitysim/core/config.py index 7024d0512b..a86093f3e6 100644 --- a/activitysim/core/config.py +++ b/activitysim/core/config.py @@ -708,7 +708,7 @@ def filter_warnings(): # These warning are left as warnings as an invitation for future enhancement. from pandas.errors import PerformanceWarning - warnings.filterwarnings("default", category=PerformanceWarning) + warnings.filterwarnings("ignore", category=PerformanceWarning) # pandas 1.5 # beginning in pandas version 1.5, a new warning is emitted when a column is set via iloc diff --git a/activitysim/estimation/larch/nonmand_tour_freq.py b/activitysim/estimation/larch/nonmand_tour_freq.py index 9dfdac73a4..ba911dd219 100644 --- a/activitysim/estimation/larch/nonmand_tour_freq.py +++ b/activitysim/estimation/larch/nonmand_tour_freq.py @@ -7,6 +7,8 @@ from larch import DataFrames, Model from larch.log import logger_name from larch.util import Dict +import pickle +from datetime import datetime from .general import ( apply_coefficients, @@ -27,6 +29,7 @@ def interaction_simulate_data( coefficients_files="{segment_name}/{name}_coefficients_{segment_name}.csv", chooser_data_files="{segment_name}/{name}_choosers_combined.csv", alt_values_files="{segment_name}/{name}_interaction_expression_values.csv", + segment_subset=[], ): edb_directory = edb_directory.format(name=name) @@ -46,21 +49,30 @@ def _read_csv(filename, **kwargs): alt_values = {} segment_names = [s["NAME"] for s in settings["SPEC_SEGMENTS"]] + if len(segment_subset) > 0: + assert set(segment_subset).issubset( + set(segment_names) + ), f"{segment_subset} is not a subset of {segment_names}" + segment_names = segment_subset for segment_name in segment_names: + print(f"Loading EDB for {segment_name} segment") coefficients[segment_name] = _read_csv( coefficients_files.format(name=name, segment_name=segment_name), index_col="coefficient_name", + comment="#", ) chooser_data[segment_name] = _read_csv( chooser_data_files.format(name=name, segment_name=segment_name), ) alt_values[segment_name] = _read_csv( alt_values_files.format(name=name, segment_name=segment_name), + comment="#", ) spec = _read_csv( spec_file, + comment="#", ) spec = remove_apostrophes(spec, ["Label"]) # alt_names = list(spec.columns[3:]) @@ -118,10 +130,80 @@ def unavail(model, x_ca): return unav +# FIXME move all this to larch/general.py? see ActititySim issue #686 +def _read_feather(filename, name, edb_directory, **kwargs): + filename = filename.format(name=name) + return pd.read_feather(os.path.join(edb_directory, filename), **kwargs) + + +def _to_feather(df, filename, name, edb_directory, **kwargs): + filename = filename.format(name=name) + return df.to_feather(os.path.join(edb_directory, filename), **kwargs) + + +def _read_pickle(filename, name, edb_directory, **kwargs): + filename = filename.format(name=name) + return pd.read_pickle(os.path.join(edb_directory, filename), **kwargs) + + +def _to_pickle(df, filename, name, edb_directory, **kwargs): + filename = filename.format(name=name) + return df.to_pickle(os.path.join(edb_directory, filename), **kwargs) + + +def _file_exists(filename, name, edb_directory): + filename = filename.format(name=name) + return os.path.exists(os.path.join(edb_directory, filename)) + + +def get_x_ca_df(alt_values, name, edb_directory, num_chunks): + def split(a, n): + k, m = divmod(len(a), n) + return (a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)) + + # process x_ca with cv_to_ca with or without chunking + x_ca_pickle_file = "{name}_x_ca.pkl" + if num_chunks == 1: + x_ca = cv_to_ca(alt_values) + elif _file_exists(x_ca_pickle_file, name, edb_directory): + # if pickle file from previous x_ca processing exist, load it to save time + time_start = datetime.now() + x_ca = _read_pickle(x_ca_pickle_file, name, edb_directory) + print( + f"x_ca data loaded from {name}_x_ca.fea - time elapsed {(datetime.now() - time_start).total_seconds()}" + ) + else: + time_start = datetime.now() + # calculate num_chunks based on chunking_size (or max number of rows per chunk) + chunking_size = round(len(alt_values) / num_chunks, 3) + print( + f"Using {num_chunks} chunks results in chunk size of {chunking_size} (of {len(alt_values)} total rows)" + ) + all_chunk_ids = list(alt_values.index.get_level_values(0).unique()) + split_ids = list(split(all_chunk_ids, num_chunks)) + x_ca_list = [] + for i, chunk_ids in enumerate(split_ids): + alt_values_i = alt_values.loc[chunk_ids] + x_ca_i = cv_to_ca(alt_values_i) + x_ca_list.append(x_ca_i) + print( + f"\rx_ca_i compute done for chunk {i+1}/{num_chunks} - time elapsed {(datetime.now() - time_start).total_seconds()}" + ) + x_ca = pd.concat(x_ca_list, axis=0) + # save final x_ca result as pickle file to save time for future data loading + _to_pickle(x_ca, x_ca_pickle_file, name, edb_directory) + print( + f"x_ca compute done - time elapsed {(datetime.now() - time_start).total_seconds()}" + ) + return x_ca + + def nonmand_tour_freq_model( edb_directory="output/estimation_data_bundle/{name}/", return_data=False, condense_parameters=False, + segment_subset=[], + num_chunks=1, ): """ Prepare nonmandatory tour frequency models for estimation. @@ -141,10 +223,16 @@ def nonmand_tour_freq_model( data = interaction_simulate_data( name="non_mandatory_tour_frequency", edb_directory=edb_directory, + segment_subset=segment_subset, ) settings = data.settings segment_names = [s["NAME"] for s in settings["SPEC_SEGMENTS"]] + if len(segment_subset) > 0: + assert set(segment_subset).issubset( + set(segment_names) + ), f"{segment_subset} is not a subset of {segment_names}" + segment_names = segment_subset if condense_parameters: data.relabel_coef = link_same_value_coefficients( segment_names, data.coefficients, data.spec @@ -157,6 +245,7 @@ def nonmand_tour_freq_model( m = {} for segment_name in segment_names: + print(f"Creating larch model for {segment_name}") segment_model = m[segment_name] = Model() # One of the alternatives is coded as 0, so # we need to explicitly initialize the MNL nesting graph @@ -178,11 +267,20 @@ def nonmand_tour_freq_model( .set_index("person_id") .rename(columns={"TAZ": "HOMETAZ"}) ) - x_ca = cv_to_ca(alt_values[segment_name].set_index(["person_id", "variable"])) + print("\t performing cv to ca step") + # x_ca = cv_to_ca(alt_values[segment_name].set_index(["person_id", "variable"])) + x_ca = get_x_ca_df( + alt_values=alt_values[segment_name].set_index(["person_id", "variable"]), + name=segment_name, + edb_directory=edb_directory.format(name="non_mandatory_tour_frequency"), + num_chunks=num_chunks, + ) + d = DataFrames( co=x_co, ca=x_ca, - av=~unavail(segment_model, x_ca), + av=True, + # av=~unavail(segment_model, x_ca), ) m[segment_name].dataservice = d From 56ec71b69bdbebc6fed12a9ce15cd7b6e8cc4aea Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 17 Aug 2023 14:22:33 -0700 Subject: [PATCH 34/58] assertions in location choice estimation --- activitysim/estimation/larch/location_choice.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py index 41e2f78e63..12bae4e4ee 100644 --- a/activitysim/estimation/larch/location_choice.py +++ b/activitysim/estimation/larch/location_choice.py @@ -106,6 +106,9 @@ def _read_csv(filename, **kwargs): .set_index("segment") ) size_spec = size_spec.loc[:, size_spec.max() > 0] + assert ( + len(size_spec) > 0 + ), f"Empty size_spec, is model_selector {SIZE_TERM_SELECTOR} in your size term file?" size_coef = size_coefficients_from_spec(size_spec) @@ -214,6 +217,9 @@ def _read_csv(filename, **kwargs): else: av = 1 + assert len(x_co) > 0, "Empty chooser dataframe" + assert len(x_ca_1) > 0, "Empty alternatives dataframe" + d = DataFrames(co=x_co, ca=x_ca_1, av=av) m = Model(dataservice=d) From 49a20fb1419b75e4f7c4e35d66a7718084d90577 Mon Sep 17 00:00:00 2001 From: dhensle Date: Thu, 17 Aug 2023 14:24:48 -0700 Subject: [PATCH 35/58] blacken --- activitysim/estimation/larch/location_choice.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py index 0035cb53e5..870343f893 100644 --- a/activitysim/estimation/larch/location_choice.py +++ b/activitysim/estimation/larch/location_choice.py @@ -380,6 +380,7 @@ def non_mandatory_tour_destination_model(**kwargs): **kwargs, ) + def external_non_mandatory_destination_model(**kwargs): unused = kwargs.pop("name", None) return location_choice_model( @@ -387,6 +388,7 @@ def external_non_mandatory_destination_model(**kwargs): **kwargs, ) + def trip_destination_model(**kwargs): unused = kwargs.pop("name", None) return location_choice_model( From 0950b8093196d96032273c7abd68a1599ce770d9 Mon Sep 17 00:00:00 2001 From: Joel Freedman Date: Tue, 12 Sep 2023 14:21:37 -0700 Subject: [PATCH 36/58] Added estimation functionality for telecommute_frequency_model --- .../estimation/larch/simple_simulate.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py index 843311da17..6538466188 100644 --- a/activitysim/estimation/larch/simple_simulate.py +++ b/activitysim/estimation/larch/simple_simulate.py @@ -377,3 +377,21 @@ def transponder_ownership_model( 1: 2, # 1 means pass, alternative 2 }, ) + +def telecommute_frequency_model( + name="telecommute_frequency", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + "No_Telecommute" : 1, + "1_day_week" : 2, + "2_3_days_week" : 3, + "4_days_week" : 4, + }, + ) + From fad6808a3804c68eb3d9a0a69ac310d8fcef5a5b Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Mon, 25 Sep 2023 15:59:54 -0700 Subject: [PATCH 37/58] Updated trip_matrices.py to not overwrite trip destination with parking location and have trip tables read from parking location for trips originating at a parking location --- activitysim/abm/models/trip_matrices.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py index 0c9e1f447f..2db8f8c5a4 100644 --- a/activitysim/abm/models/trip_matrices.py +++ b/activitysim/abm/models/trip_matrices.py @@ -52,11 +52,23 @@ def write_trip_matrices(network_los): parking_settings = config.read_model_settings("parking_location_choice.yaml") parking_taz_col_name = parking_settings["ALT_DEST_COL_NAME"] if parking_taz_col_name in trips_df: - # TODO make parking zone negative, not zero, if not used + + trips_df["true_origin"] = trips_df["origin"] + trips_df["true_destination"] = trips_df["destination"] + + # Get origin parking zone if vehicle not parked at origin + trips_df["origin_parking_zone"] = np.where( + trips_df["tour_id"] == trips_df["tour_id"].shift(1), + trips_df[parking_taz_col_name].shift(1), + -1 + ) + trips_df.loc[trips_df[parking_taz_col_name] > 0, "destination"] = trips_df[ parking_taz_col_name ] - # Also need address the return trip + trips_df.loc[trips_df["origin_parking_zone"] > 0, "origin"] = trips_df[ + "origin_parking_zone" + ] # write matrices by zone system type if network_los.zone_system == los.ONE_ZONE: # taz trips written to taz matrices @@ -213,6 +225,12 @@ def write_trip_matrices(network_los): write_matrices( aggregate_trips, zone_index, orig_index, dest_index, model_settings, True ) + + if "parking_location" in config.setting("models"): + # Set trip origin and destination to be the actual location the person is and not where their vehicle is parked + trips_df["origin"] = trips_df["true_origin"] + trips_df["destination"] = trips_df["true_destination"] + del trips_df["true_origin"], trips_df["true_destination"] def annotate_trips(trips, network_los, model_settings): From 5adfc7beb473457dc1978a71747a82a34b668586 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 1 Feb 2024 20:29:53 -0800 Subject: [PATCH 38/58] remove alts from alts_long in veh type choice --- activitysim/abm/models/vehicle_type_choice.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index 324e505dda..20d1d58208 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -222,6 +222,11 @@ def construct_model_alternatives(model_settings, alts_cats_dict, vehicle_type_da else: # eliminate alternatives if no vehicle type data alts_wide = alts_wide[alts_wide._merge != "left_only"] + # need to also remove any alts from alts_long + alts_long.set_index(['body_type', 'fuel_type', 'age'], inplace=True) + alts_long = alts_long[alts_long.index.isin( + alts_wide.set_index(['body_type', 'fuel_type', 'age']).index) + ].reset_index() alts_wide.drop(columns="_merge", inplace=True) # converting age to integer to allow interactions in utilities From 15b7f31a3e07d41f2a394651c85ed2a7e46d26fb Mon Sep 17 00:00:00 2001 From: David Hensle Date: Fri, 2 Feb 2024 17:18:42 -0800 Subject: [PATCH 39/58] logging and correct order for veh_type name --- activitysim/abm/models/vehicle_type_choice.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index 20d1d58208..e6ebf5b200 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -221,11 +221,17 @@ def construct_model_alternatives(model_settings, alts_cats_dict, vehicle_type_da ), f"missing vehicle data for alternatives:\n {missing_alts}" else: # eliminate alternatives if no vehicle type data + num_alts_before_filer = len(alts_wide) alts_wide = alts_wide[alts_wide._merge != "left_only"] + logger.warning( + f"Removed {num_alts_before_filer - len(alts_wide)} alternatives not included in input vehicle type data." + ) # need to also remove any alts from alts_long - alts_long.set_index(['body_type', 'fuel_type', 'age'], inplace=True) - alts_long = alts_long[alts_long.index.isin( - alts_wide.set_index(['body_type', 'fuel_type', 'age']).index) + alts_long.set_index(["body_type", "age", "fuel_type"], inplace=True) + alts_long = alts_long[ + alts_long.index.isin( + alts_wide.set_index(["body_type", "age", "fuel_type"]).index + ) ].reset_index() alts_wide.drop(columns="_merge", inplace=True) From 2a99bed359ee8307c301da4eb14375644bfb95b3 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Mon, 5 Feb 2024 16:56:05 -0800 Subject: [PATCH 40/58] correct veh type alts for choices --- activitysim/abm/models/vehicle_type_choice.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index e6ebf5b200..95d4087aa4 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -233,6 +233,7 @@ def construct_model_alternatives(model_settings, alts_cats_dict, vehicle_type_da alts_wide.set_index(["body_type", "age", "fuel_type"]).index ) ].reset_index() + alts_long.index = alts_wide.index alts_wide.drop(columns="_merge", inplace=True) # converting age to integer to allow interactions in utilities @@ -427,14 +428,12 @@ def iterate_vehicle_type_choice( choices.rename(columns={"choice": "vehicle_type"}, inplace=True) if alts_cats_dict: - alts = ( - alts_long[alts_long.columns] - .apply(lambda row: "_".join(row.values.astype(str)), axis=1) - .values - ) + alts = alts_long[alts_long.columns].apply( + lambda row: "_".join(row.values.astype(str)), axis=1 + ).to_dict() else: - alts = model_spec.columns - choices["vehicle_type"] = choices["vehicle_type"].map(dict(enumerate(alts))) + alts = enumerate(dict(model_spec.columns)) + choices["vehicle_type"] = choices["vehicle_type"].map(alts) # STEP II: append probabilistic vehicle type attributes if probs_spec_file is not None: From 8cccce30d298d77f7699f40f387ebe4f7f2a475d Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Tue, 20 Feb 2024 16:41:34 -0800 Subject: [PATCH 41/58] Updated origin adjustment to parking zone to only happen if the trip is an auto mode --- activitysim/abm/models/trip_matrices.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py index 2db8f8c5a4..97a6361fc6 100644 --- a/activitysim/abm/models/trip_matrices.py +++ b/activitysim/abm/models/trip_matrices.py @@ -51,6 +51,16 @@ def write_trip_matrices(network_los): if "parking_location" in config.setting("models"): parking_settings = config.read_model_settings("parking_location_choice.yaml") parking_taz_col_name = parking_settings["ALT_DEST_COL_NAME"] + auto_nest_name = parking_settings["AUTO_MODE_NEST"] + + # Read trip mode choice settings to get auto modes + trip_mode_choice_settings = config.read_model_settings("trip_mode_choice.yaml") + trip_mode_choice_nest = config.get_logit_mocel_settings(trip_mode_choice_settings) + for alternative in trip_mode_choice_nest["alternatives"]: + if alternative["name"] == auto_nest_name: + auto_modes = alternative["alternatives"] + break + if parking_taz_col_name in trips_df: trips_df["true_origin"] = trips_df["origin"] @@ -59,7 +69,11 @@ def write_trip_matrices(network_los): # Get origin parking zone if vehicle not parked at origin trips_df["origin_parking_zone"] = np.where( trips_df["tour_id"] == trips_df["tour_id"].shift(1), - trips_df[parking_taz_col_name].shift(1), + np.where( + trip_df["trip_mode"].apply(lambda x: x in auto_modes), + trips_df[parking_taz_col_name].shift(1), + -1 + ) -1 ) From 851319dc1019fe3d489bbc062d7fbc165c0fb6b2 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Tue, 20 Feb 2024 16:45:21 -0800 Subject: [PATCH 42/58] Final trips table now reports origin and destination TAZs regardless of parking TAZ --- activitysim/abm/models/trip_matrices.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py index 97a6361fc6..7d592c4e4a 100644 --- a/activitysim/abm/models/trip_matrices.py +++ b/activitysim/abm/models/trip_matrices.py @@ -246,6 +246,13 @@ def write_trip_matrices(network_los): trips_df["destination"] = trips_df["true_destination"] del trips_df["true_origin"], trips_df["true_destination"] + trips_df["otaz"] = ( + pipeline.get_table("land_use").reindex(trips_df["origin"]).TAZ.tolist() + ) + trips_df["dtaz"] = ( + pipeline.get_table("land_use").reindex(trips_df["destination"]).TAZ.tolist() + ) + def annotate_trips(trips, network_los, model_settings): """ From 5fb2b839c5712d17ed5220c7a65267dd3b073445 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Tue, 20 Feb 2024 16:47:37 -0800 Subject: [PATCH 43/58] Previous commit is only necessary for 2- and 3-zone systems --- activitysim/abm/models/trip_matrices.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py index 7d592c4e4a..3e14ef61b9 100644 --- a/activitysim/abm/models/trip_matrices.py +++ b/activitysim/abm/models/trip_matrices.py @@ -246,12 +246,13 @@ def write_trip_matrices(network_los): trips_df["destination"] = trips_df["true_destination"] del trips_df["true_origin"], trips_df["true_destination"] - trips_df["otaz"] = ( - pipeline.get_table("land_use").reindex(trips_df["origin"]).TAZ.tolist() - ) - trips_df["dtaz"] = ( - pipeline.get_table("land_use").reindex(trips_df["destination"]).TAZ.tolist() - ) + if network_los.zone_system == los.TWO_ZONE or network_los.zone_system == los.THREE_ZONE: + trips_df["otaz"] = ( + pipeline.get_table("land_use").reindex(trips_df["origin"]).TAZ.tolist() + ) + trips_df["dtaz"] = ( + pipeline.get_table("land_use").reindex(trips_df["destination"]).TAZ.tolist() + ) def annotate_trips(trips, network_los, model_settings): From 11a3d332120f29776f8c2f7327023086118852cc Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Tue, 20 Feb 2024 17:24:37 -0800 Subject: [PATCH 44/58] Removed lookup of trip mode choice configs so user specifies auto modes in parking location settings --- activitysim/abm/models/trip_matrices.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py index 3e14ef61b9..37692ed439 100644 --- a/activitysim/abm/models/trip_matrices.py +++ b/activitysim/abm/models/trip_matrices.py @@ -51,15 +51,8 @@ def write_trip_matrices(network_los): if "parking_location" in config.setting("models"): parking_settings = config.read_model_settings("parking_location_choice.yaml") parking_taz_col_name = parking_settings["ALT_DEST_COL_NAME"] - auto_nest_name = parking_settings["AUTO_MODE_NEST"] - - # Read trip mode choice settings to get auto modes - trip_mode_choice_settings = config.read_model_settings("trip_mode_choice.yaml") - trip_mode_choice_nest = config.get_logit_mocel_settings(trip_mode_choice_settings) - for alternative in trip_mode_choice_nest["alternatives"]: - if alternative["name"] == auto_nest_name: - auto_modes = alternative["alternatives"] - break + assert "AUTO_MODES" in parking_settings, "AUTO_MODES must be specified in parking location settings to properly adjust trip tables for assignment" + auto_modes = parking_settings["AUTO_MODES"] if parking_taz_col_name in trips_df: From 389131e2b399c67052b971275a8c3518df0707ec Mon Sep 17 00:00:00 2001 From: Joe Flood Date: Thu, 14 Mar 2024 08:46:54 -0700 Subject: [PATCH 45/58] Fixed bugs found by Ali --- activitysim/abm/models/trip_matrices.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py index 37692ed439..cfa65f4182 100644 --- a/activitysim/abm/models/trip_matrices.py +++ b/activitysim/abm/models/trip_matrices.py @@ -63,10 +63,10 @@ def write_trip_matrices(network_los): trips_df["origin_parking_zone"] = np.where( trips_df["tour_id"] == trips_df["tour_id"].shift(1), np.where( - trip_df["trip_mode"].apply(lambda x: x in auto_modes), + trips_df["trip_mode"].apply(lambda x: x in auto_modes), trips_df[parking_taz_col_name].shift(1), -1 - ) + ), -1 ) From f9695426cf6cb947fd9495f9e0726cb65cbca73b Mon Sep 17 00:00:00 2001 From: David Hensle Date: Mon, 18 Mar 2024 11:09:55 -0700 Subject: [PATCH 46/58] adding school escort columns that were missed if no escorting --- activitysim/abm/models/school_escorting.py | 2 ++ activitysim/abm/models/util/school_escort_tours_trips.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index 319c0c0e54..2ae72a968f 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -563,6 +563,8 @@ def school_escorting( "depart", "purpose", "destination", + "escort_participants", + "chauf_tour_id", ] school_escort_trips = pd.DataFrame(columns=trip_cols) diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py index 691239ee10..0a7796dea7 100644 --- a/activitysim/abm/models/util/school_escort_tours_trips.py +++ b/activitysim/abm/models/util/school_escort_tours_trips.py @@ -401,6 +401,9 @@ def merge_school_escort_trips_into_pipeline(): # checking to see if there are school escort trips to merge in if len(school_escort_trips) == 0: + # if no trips, fill escorting columns with NA + trips[["escort_participants", "school_escort_direction", "school_escort_trip_id",]] = pd.NA + pipeline.replace_table("trips", trips) return trips # want to remove stops if school escorting takes place on that half tour so we can replace them with the actual stops From 627e2867b904633c47007e912f69b65c84c7f549 Mon Sep 17 00:00:00 2001 From: Ali Etezady <58451076+aletzdy@users.noreply.github.com> Date: Tue, 19 Mar 2024 10:50:58 -0700 Subject: [PATCH 47/58] fixing disaggregate accessibility bug in zone sampler --- activitysim/abm/models/disaggregate_accessibility.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index 2bb78bf60e..94bf963c54 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -275,6 +275,10 @@ def zone_sampler(self): maz_candidates = maz_candidates[ ~maz_candidates.MAZ.isin(maz_sample_idx) ] + + # Need to make sure we sample from TAZs that still exist in the maz_candidates + taz_candidates = taz_candidates[taz_candidates.index.isin(maz_candidates.TAZ)] + # Calculate the remaining samples to collect n_samples_remaining = n_samples - len(maz_sample_idx) n_samples_remaining = ( From 92bf60da0069e4bc9f39cbad1a583d44a2bed16d Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 28 Mar 2024 09:49:03 -0700 Subject: [PATCH 48/58] Made code changes to get categorical vehicle types to work --- activitysim/abm/models/vehicle_type_choice.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index 95d4087aa4..de723602ff 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -347,6 +347,17 @@ def iterate_vehicle_type_choice( model_settings, alts_cats_dict, vehicle_type_data ) + # alts preprocessor + alts_preprocessor_settings = model_settings.alts_preprocessor + if alts_preprocessor_settings: + expressions.assign_columns( + state, + df=alts_wide, + model_settings=alts_preprocessor_settings, + locals_dict=locals_dict, + trace_label=trace_label, + ) + # - preparing choosers for iterating vehicles_merged = vehicles_merged.to_frame() vehicles_merged["already_owned_veh"] = "" @@ -380,6 +391,12 @@ def iterate_vehicle_type_choice( len(choosers), ) + # filter columns of alts and choosers + if len(model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE) > 0: + choosers = choosers[model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE] + if len(model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE) > 0: + alts_wide = alts_wide[model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE] + # if there were so many alts that they had to be created programmatically, # by combining categorical variables, then the utility expressions should make # use of interaction terms to accommodate alt-specific coefficients and constants From f1bdd88ecb95794576445e238579e89c717db565 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 28 Mar 2024 09:52:12 -0700 Subject: [PATCH 49/58] Changed settings call to old format --- activitysim/abm/models/vehicle_type_choice.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index de723602ff..faef05e652 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -393,9 +393,9 @@ def iterate_vehicle_type_choice( # filter columns of alts and choosers if len(model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE) > 0: - choosers = choosers[model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE] + choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")] if len(model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE) > 0: - alts_wide = alts_wide[model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE] + alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")] # if there were so many alts that they had to be created programmatically, # by combining categorical variables, then the utility expressions should make From b0061bc4fec193836d56d6b43c42427fdfc7e4e0 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 28 Mar 2024 09:55:59 -0700 Subject: [PATCH 50/58] Fixed a couple more model settings calls --- activitysim/abm/models/vehicle_type_choice.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index faef05e652..ae7a679a9d 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -392,9 +392,9 @@ def iterate_vehicle_type_choice( ) # filter columns of alts and choosers - if len(model_settings.COLS_TO_INCLUDE_IN_CHOOSER_TABLE) > 0: + if len(model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")) > 0: choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")] - if len(model_settings.COLS_TO_INCLUDE_IN_ALTS_TABLE) > 0: + if len(model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")) > 0: alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")] # if there were so many alts that they had to be created programmatically, From a3f6056c065ffaecb95e8683ff270decd28cfbf7 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 28 Mar 2024 10:01:49 -0700 Subject: [PATCH 51/58] Removed state variable from function call and added default empty arrays for model settings --- activitysim/abm/models/vehicle_type_choice.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index ae7a679a9d..c853f173ab 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -351,7 +351,6 @@ def iterate_vehicle_type_choice( alts_preprocessor_settings = model_settings.alts_preprocessor if alts_preprocessor_settings: expressions.assign_columns( - state, df=alts_wide, model_settings=alts_preprocessor_settings, locals_dict=locals_dict, @@ -392,10 +391,10 @@ def iterate_vehicle_type_choice( ) # filter columns of alts and choosers - if len(model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")) > 0: - choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE")] - if len(model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")) > 0: - alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE")] + if len(model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])) > 0: + choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])] + if len(model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE", [])) > 0: + alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE"l [])] # if there were so many alts that they had to be created programmatically, # by combining categorical variables, then the utility expressions should make From 7449923c2ffc23d06a48ef3958bb7bd69d1de5cc Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 28 Mar 2024 10:57:24 -0700 Subject: [PATCH 52/58] Fixed call of reading in alts preprocessor --- activitysim/abm/models/vehicle_type_choice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index c853f173ab..13b6ca5f70 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -348,7 +348,7 @@ def iterate_vehicle_type_choice( ) # alts preprocessor - alts_preprocessor_settings = model_settings.alts_preprocessor + alts_preprocessor_settings = model_settings.get("alts_preprocessor", None) if alts_preprocessor_settings: expressions.assign_columns( df=alts_wide, From 74833cbf1ac935520b34da963d2104a16ef222cd Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 28 Mar 2024 16:38:48 -0700 Subject: [PATCH 53/58] Replaced l with , in vehicle_type_choice.py --- activitysim/abm/models/vehicle_type_choice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index 13b6ca5f70..efc4ef19b6 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -394,7 +394,7 @@ def iterate_vehicle_type_choice( if len(model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])) > 0: choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])] if len(model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE", [])) > 0: - alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE"l [])] + alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE", [])] # if there were so many alts that they had to be created programmatically, # by combining categorical variables, then the utility expressions should make From 422ca6cca078ed3c66d1489f2903f4b24c47a383 Mon Sep 17 00:00:00 2001 From: aber-sandag Date: Mon, 8 Apr 2024 14:25:56 -0700 Subject: [PATCH 54/58] Load skims into shared memory to be accessed by later models --- activitysim/core/mem.py | 3 ++ activitysim/core/skim_dict_factory.py | 41 +++++++++++++++++---------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/activitysim/core/mem.py b/activitysim/core/mem.py index ae832f250c..de558b926d 100644 --- a/activitysim/core/mem.py +++ b/activitysim/core/mem.py @@ -296,6 +296,9 @@ def shared_memory_size(data_buffers=None): shared_size += Dataset.shm.preload_shared_memory_size(data_buffer[11:]) continue + if isinstance(data_buffer, multiprocessing.shared_memory.SharedMemory): + shared_size += data_buffer.size + continue try: obj = data_buffer.get_obj() except Exception: diff --git a/activitysim/core/skim_dict_factory.py b/activitysim/core/skim_dict_factory.py index 450b98d25c..5755964d53 100644 --- a/activitysim/core/skim_dict_factory.py +++ b/activitysim/core/skim_dict_factory.py @@ -409,19 +409,26 @@ def allocate_skim_buffer(self, skim_info, shared=False): f"total size: {util.INT(csz)} ({util.GB(csz)})" ) - if shared: - if dtype_name == "float64": - typecode = "d" - elif dtype_name == "float32": - typecode = "f" - else: - raise RuntimeError( - "allocate_skim_buffer unrecognized dtype %s" % dtype_name - ) - - buffer = multiprocessing.RawArray(typecode, buffer_size) - else: - buffer = np.zeros(buffer_size, dtype=dtype) + # if shared: + # if dtype_name == "float64": + # typecode = "d" + # elif dtype_name == "float32": + # typecode = "f" + # else: + # raise RuntimeError( + # "allocate_skim_buffer unrecognized dtype %s" % dtype_name + # ) + + # buffer = multiprocessing.RawArray(typecode, buffer_size) + shared_mem_name = f"skim_shared_memory__{skim_info.skim_tag}" + try: + buffer = multiprocessing.shared_memory.SharedMemory(name=shared_mem_name) + logger.info(f"skim buffer already allocated in shared memory: {shared_mem_name}, size: {buffer.size}") + except FileNotFoundError: + buffer = multiprocessing.shared_memory.SharedMemory(create=True, size=csz, name=shared_mem_name) + logger.info(f"allocating skim buffer in shared memory: {shared_mem_name}, size: {buffer.size}") + # else: + # buffer = np.zeros(buffer_size, dtype=dtype) return buffer @@ -440,8 +447,9 @@ def _skim_data_from_buffer(self, skim_info, skim_buffer): """ dtype = np.dtype(skim_info.dtype_name) - assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape) - skim_data = np.frombuffer(skim_buffer, dtype=dtype).reshape( + # assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape) + assert skim_buffer.size >= util.iprod(skim_info.skim_data_shape) * dtype.itemsize + skim_data = np.frombuffer(skim_buffer.buf, dtype=dtype, count=util.iprod(skim_info.skim_data_shape)).reshape( skim_info.skim_data_shape ) return skim_data @@ -462,6 +470,9 @@ def load_skims_to_buffer(self, skim_info, skim_buffer): skim_data = self._skim_data_from_buffer(skim_info, skim_buffer) assert skim_data.shape == skim_info.skim_data_shape + if skim_data.any(): + return + if read_cache: # returns None if cache file not found cache_data = self._open_existing_readonly_memmap_skim_cache(skim_info) From 31c3c353a3b069df7b710f52595d1b8310e615df Mon Sep 17 00:00:00 2001 From: aber-sandag Date: Wed, 10 Apr 2024 10:24:36 -0700 Subject: [PATCH 55/58] Don't use shared memory when running singleprocessed (temp fix) --- activitysim/core/skim_dict_factory.py | 37 +++++++++++++++------------ 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/activitysim/core/skim_dict_factory.py b/activitysim/core/skim_dict_factory.py index 5755964d53..a3b37aea2c 100644 --- a/activitysim/core/skim_dict_factory.py +++ b/activitysim/core/skim_dict_factory.py @@ -409,7 +409,7 @@ def allocate_skim_buffer(self, skim_info, shared=False): f"total size: {util.INT(csz)} ({util.GB(csz)})" ) - # if shared: + if shared: # if dtype_name == "float64": # typecode = "d" # elif dtype_name == "float32": @@ -420,15 +420,15 @@ def allocate_skim_buffer(self, skim_info, shared=False): # ) # buffer = multiprocessing.RawArray(typecode, buffer_size) - shared_mem_name = f"skim_shared_memory__{skim_info.skim_tag}" - try: - buffer = multiprocessing.shared_memory.SharedMemory(name=shared_mem_name) - logger.info(f"skim buffer already allocated in shared memory: {shared_mem_name}, size: {buffer.size}") - except FileNotFoundError: - buffer = multiprocessing.shared_memory.SharedMemory(create=True, size=csz, name=shared_mem_name) - logger.info(f"allocating skim buffer in shared memory: {shared_mem_name}, size: {buffer.size}") - # else: - # buffer = np.zeros(buffer_size, dtype=dtype) + shared_mem_name = f"skim_shared_memory__{skim_info.skim_tag}" + try: + buffer = multiprocessing.shared_memory.SharedMemory(name=shared_mem_name) + logger.info(f"skim buffer already allocated in shared memory: {shared_mem_name}, size: {buffer.size}") + except FileNotFoundError: + buffer = multiprocessing.shared_memory.SharedMemory(create=True, size=csz, name=shared_mem_name) + logger.info(f"allocating skim buffer in shared memory: {shared_mem_name}, size: {buffer.size}") + else: + buffer = np.zeros(buffer_size, dtype=dtype) return buffer @@ -447,11 +447,16 @@ def _skim_data_from_buffer(self, skim_info, skim_buffer): """ dtype = np.dtype(skim_info.dtype_name) - # assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape) - assert skim_buffer.size >= util.iprod(skim_info.skim_data_shape) * dtype.itemsize - skim_data = np.frombuffer(skim_buffer.buf, dtype=dtype, count=util.iprod(skim_info.skim_data_shape)).reshape( - skim_info.skim_data_shape - ) + if isinstance(skim_buffer, multiprocessing.shared_memory.SharedMemory): + assert skim_buffer.size >= util.iprod(skim_info.skim_data_shape) * dtype.itemsize + skim_data = np.frombuffer(skim_buffer.buf, dtype=dtype, count=util.iprod(skim_info.skim_data_shape)).reshape( + skim_info.skim_data_shape + ) + else: + assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape) + skim_data = np.frombuffer(skim_buffer, dtype=dtype).reshape( + skim_info.skim_data_shape + ) return skim_data def load_skims_to_buffer(self, skim_info, skim_buffer): @@ -470,7 +475,7 @@ def load_skims_to_buffer(self, skim_info, skim_buffer): skim_data = self._skim_data_from_buffer(skim_info, skim_buffer) assert skim_data.shape == skim_info.skim_data_shape - if skim_data.any(): + if isinstance(skim_buffer, multiprocessing.shared_memory.SharedMemory) and skim_data.any(): return if read_cache: From 777503395d1804f5864f1d52bac8851097985c97 Mon Sep 17 00:00:00 2001 From: aber-sandag Date: Mon, 15 Apr 2024 12:14:53 -0700 Subject: [PATCH 56/58] Fix skims in trip_mode_choice annotate_trips --- activitysim/abm/models/trip_mode_choice.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/activitysim/abm/models/trip_mode_choice.py b/activitysim/abm/models/trip_mode_choice.py index e7bb200e45..b165e24b73 100644 --- a/activitysim/abm/models/trip_mode_choice.py +++ b/activitysim/abm/models/trip_mode_choice.py @@ -317,6 +317,11 @@ def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id): pipeline.replace_table("trips", trips_df) if model_settings.get("annotate_trips"): + locals_dict = {} + locals_dict.update(constants) + simulate.set_skim_wrapper_targets(trips_merged, skims) + locals_dict.update(skims) + locals_dict["timeframe"] = "trip" annotate.annotate_trips(model_settings, trace_label, locals_dict) if trace_hh_id: From 98713f304470b3be1a5dcd9060fe812bb08ac7ec Mon Sep 17 00:00:00 2001 From: David Hensle Date: Fri, 19 Apr 2024 13:42:35 -0700 Subject: [PATCH 57/58] fix to #841 --- activitysim/abm/models/util/overlap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/abm/models/util/overlap.py b/activitysim/abm/models/util/overlap.py index f7773a14a3..8665c8a513 100644 --- a/activitysim/abm/models/util/overlap.py +++ b/activitysim/abm/models/util/overlap.py @@ -266,7 +266,7 @@ def calculate_consecutive(array): intvs = stops[:, 1] - starts[:, 1] # Store intervals as a 2D array for further vectorized ops to make. - c = np.bincount(starts[:, 0]) + c = np.bincount(starts[:, 0], minlength=array.shape[0]) mask = np.arange(c.max()) < c[:, None] intvs2D = mask.astype(float) intvs2D[mask] = intvs From e4dc5ac747f09121e617e314a08be3cf70a102bf Mon Sep 17 00:00:00 2001 From: David Hensle Date: Wed, 1 May 2024 11:41:46 -0700 Subject: [PATCH 58/58] reducing memory needs in parking_location_choice --- .../abm/models/parking_location_choice.py | 22 ++++-- activitysim/core/util.py | 75 +++++++++++++++++++ 2 files changed, 91 insertions(+), 6 deletions(-) diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py index a87703b8b9..8eaf1bc6f3 100644 --- a/activitysim/abm/models/parking_location_choice.py +++ b/activitysim/abm/models/parking_location_choice.py @@ -16,7 +16,7 @@ ) from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.tracing import print_elapsed_time -from activitysim.core.util import assign_in_place +from activitysim.core.util import assign_in_place, drop_unused_chooser_columns from .util import estimation @@ -99,6 +99,7 @@ def parking_destination_simulate( destination_sample, model_settings, skims, + locals_dict, chunk_size, trace_hh_id, trace_label, @@ -123,11 +124,6 @@ def parking_destination_simulate( logger.info("Running trip_destination_simulate with %d trips", len(trips)) - locals_dict = config.get_model_constants(model_settings).copy() - locals_dict.update(skims) - locals_dict["timeframe"] = "trip" - locals_dict["PARKING"] = skims["op_skims"].dest_key - parking_locations = interaction_sample_simulate( choosers=trips, alternatives=destination_sample, @@ -171,6 +167,19 @@ def choose_parking_location( t0 = print_elapsed_time() alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + + # remove trips and alts columns that are not used in spec + locals_dict = config.get_model_constants(model_settings).copy() + locals_dict.update(skims) + locals_dict["timeframe"] = "trip" + locals_dict["PARKING"] = skims["op_skims"].dest_key + + spec = get_spec_for_segment(model_settings, "SPECIFICATION", segment_name) + trips = drop_unused_chooser_columns(trips, spec, locals_dict, custom_chooser=None) + alternatives = drop_unused_chooser_columns( + alternatives, spec, locals_dict, custom_chooser=None + ) + destination_sample = logit.interaction_dataset( trips, alternatives, alt_index_id=alt_dest_col_name ) @@ -184,6 +193,7 @@ def choose_parking_location( destination_sample=destination_sample, model_settings=model_settings, skims=skims, + locals_dict=locals_dict, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, diff --git a/activitysim/core/util.py b/activitysim/core/util.py index 217cdb8377..421eb35a06 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -468,3 +468,78 @@ def nearest_node_index(node, nodes): deltas = nodes - node dist_2 = np.einsum("ij,ij->i", deltas, deltas) return np.argmin(dist_2) + + +def drop_unused_chooser_columns( + choosers, spec, locals_d, custom_chooser, sharrow_enabled=False +): + """ + Drop unused columns from the chooser table, based on the spec and custom_chooser function. + """ + # keep only variables needed for spec + import re + + # define a regular expression to find variables in spec + pattern = r"[a-zA-Z_][a-zA-Z0-9_]*" + + unique_variables_in_spec = set( + spec.reset_index()["Expression"].apply(lambda x: re.findall(pattern, x)).sum() + ) + + if locals_d: + unique_variables_in_spec.add(locals_d.get("orig_col_name", None)) + unique_variables_in_spec.add(locals_d.get("dest_col_name", None)) + if locals_d.get("timeframe") == "trip": + orig_col_name = locals_d.get("ORIGIN", None) + dest_col_name = locals_d.get("DESTINATION", None) + stop_col_name = None + parking_col_name = locals_d.get("PARKING", None) + primary_origin_col_name = None + if orig_col_name is None and "od_skims" in locals_d: + orig_col_name = locals_d["od_skims"].orig_key + if dest_col_name is None and "od_skims" in locals_d: + dest_col_name = locals_d["od_skims"].dest_key + if stop_col_name is None and "dp_skims" in locals_d: + stop_col_name = locals_d["dp_skims"].dest_key + if primary_origin_col_name is None and "dnt_skims" in locals_d: + primary_origin_col_name = locals_d["dnt_skims"].dest_key + unique_variables_in_spec.add(orig_col_name) + unique_variables_in_spec.add(dest_col_name) + unique_variables_in_spec.add(parking_col_name) + unique_variables_in_spec.add(primary_origin_col_name) + unique_variables_in_spec.add(stop_col_name) + unique_variables_in_spec.add("trip_period") + # when using trip_scheduling_choice for trup scheduling + unique_variables_in_spec.add("last_outbound_stop") + unique_variables_in_spec.add("last_inbound_stop") + + # when sharrow mode, need to keep the following columns in the choosers table + if sharrow_enabled: + unique_variables_in_spec.add("out_period") + unique_variables_in_spec.add("in_period") + unique_variables_in_spec.add("purpose_index_num") + + if custom_chooser: + import inspect + + custom_chooser_lines = inspect.getsource(custom_chooser) + unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines)) + + logger.info("Dropping unused variables in chooser table") + + logger.info( + "before dropping, the choosers table has {} columns: {}".format( + len(choosers.columns), choosers.columns + ) + ) + + # keep only variables needed for spec + choosers = choosers[[c for c in choosers.columns if c in unique_variables_in_spec]] + + logger.info( + "after dropping, the choosers table has {} columns: {}".format( + len(choosers.columns), choosers.columns + ) + ) + + return choosers \ No newline at end of file