diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py index 564d6f94b6..476bd18adf 100644 --- a/activitysim/abm/models/auto_ownership.py +++ b/activitysim/abm/models/auto_ownership.py @@ -2,9 +2,9 @@ # See full license in LICENSE.txt. import logging -from activitysim.core import config, inject, pipeline, simulate, tracing +from activitysim.core import config, expressions, inject, pipeline, simulate, tracing -from .util import estimation +from .util import estimation, annotate logger = logging.getLogger(__name__) @@ -32,6 +32,21 @@ def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_ logger.info("Running %s with %d households", trace_label, len(choosers)) + # - preprocessor + preprocessor_settings = model_settings.get("preprocessor", None) + if preprocessor_settings: + + locals_d = {} + if constants is not None: + locals_d.update(constants) + + expressions.assign_columns( + df=choosers, + model_settings=preprocessor_settings, + locals_dict=locals_d, + trace_label=trace_label, + ) + if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) @@ -69,5 +84,8 @@ def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_ "auto_ownership", households.auto_ownership, value_counts=True ) + if model_settings.get("annotate_households"): + annotate.annotate_households(model_settings, trace_label) + if trace_hh_id: tracing.trace_df(households, label="auto_ownership", warn_if_empty=True) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index f7da93687b..f41bea9eee 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -151,6 +151,11 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id): for hhsize in range(2, cdap.MAX_HHSIZE + 1): spec = cdap.get_cached_spec(hhsize) estimator.write_table(spec, "spec_%s" % hhsize, append=False) + if add_joint_tour_utility: + joint_spec = cdap.get_cached_joint_spec(hhsize) + estimator.write_table( + joint_spec, "joint_spec_%s" % hhsize, append=False + ) logger.info("Running cdap_simulate with %d persons", len(persons_merged.index)) @@ -184,6 +189,11 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id): if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, "persons", "cdap_activity") + if add_joint_tour_utility: + hh_joint.index.name = "household_id" + hh_joint = estimator.get_survey_values( + hh_joint, "households", "has_joint_tour" + ) estimator.write_override_choices(choices) estimator.end_estimation() diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index fe79d3fcdf..94bf963c54 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -275,6 +275,10 @@ def zone_sampler(self): maz_candidates = maz_candidates[ ~maz_candidates.MAZ.isin(maz_sample_idx) ] + + # Need to make sure we sample from TAZs that still exist in the maz_candidates + taz_candidates = taz_candidates[taz_candidates.index.isin(maz_candidates.TAZ)] + # Calculate the remaining samples to collect n_samples_remaining = n_samples - len(maz_sample_idx) n_samples_remaining = ( @@ -569,14 +573,13 @@ def merge_persons(self): inject.add_table("proto_persons_merged", persons_merged) -def get_disaggregate_logsums(network_los, chunk_size, trace_hh_id): +def get_disaggregate_logsums( + network_los, chunk_size, trace_hh_id, disagg_model_settings +): logsums = {} persons_merged = pipeline.get_table("proto_persons_merged").sort_index( inplace=False ) - disagg_model_settings = read_disaggregate_accessibility_yaml( - "disaggregate_accessibility.yaml" - ) for model_name in [ "workplace_location", @@ -696,8 +699,14 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id): tracing.register_traceable_table(tablename, df) del df + disagg_model_settings = read_disaggregate_accessibility_yaml( + "disaggregate_accessibility.yaml" + ) + # Run location choice - logsums = get_disaggregate_logsums(network_los, chunk_size, trace_hh_id) + logsums = get_disaggregate_logsums( + network_los, chunk_size, trace_hh_id, disagg_model_settings + ) logsums = {k + "_accessibility": v for k, v in logsums.items()} # Combined accessibility table @@ -736,20 +745,20 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id): logsums["proto_disaggregate_accessibility"] = access_df # Drop any tables prematurely created - for tablename in [ - "school_destination_size", - "workplace_destination_size", - ]: - pipeline.drop_table(tablename) + # FIXME: dropping size tables breaks restart functionality for location choice models. + # hopefully this pipeline mess just goes away with move away from orca.... + # for tablename in [ + # "school_destination_size", + # "workplace_destination_size", + # ]: + # pipeline.drop_table(tablename) for ch in list(pipeline.get_rn_generator().channels.keys()): pipeline.get_rn_generator().drop_channel(ch) - # Drop any prematurely added traceables - for trace in [ - x for x in inject.get_injectable("traceable_tables") if "proto_" not in x - ]: - tracing.deregister_traceable_table(trace) + # Dropping all traceable tables + for table in inject.get_injectable("traceable_tables"): + tracing.deregister_traceable_table(table) # need to clear any premature tables that were added during the previous run orca._TABLES.clear() @@ -760,4 +769,22 @@ def compute_disaggregate_accessibility(network_los, chunk_size, trace_hh_id): # Inject accessibility results into pipeline [inject.add_table(k, df) for k, df in logsums.items()] + # available post-processing + for annotations in disagg_model_settings.get("postprocess_proto_tables", []): + tablename = annotations["tablename"] + df = pipeline.get_table(tablename) + assert df is not None + assert annotations is not None + assign_columns( + df=df, + model_settings={ + **annotations["annotate"], + **disagg_model_settings["suffixes"], + }, + trace_label=tracing.extend_trace_label( + "disaggregate_accessibility.postprocess", tablename + ), + ) + pipeline.replace_table(tablename, df) + return diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py index 02651d2a44..02f0dbe84e 100644 --- a/activitysim/abm/models/joint_tour_destination.py +++ b/activitysim/abm/models/joint_tour_destination.py @@ -62,7 +62,7 @@ def joint_tour_destination( estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( - tours, + joint_tours, persons_merged, want_logsums, want_sample_table, diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index 3e52d27fae..ffdb36e2c7 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -35,9 +35,15 @@ def joint_tour_frequency_composition( model_settings = config.read_model_settings(model_settings_file_name) + # FIXME setting index as "alt" causes crash in estimation mode... + # happens in school escorting too! + # alt_tdd = simulate.read_model_alts( + # "joint_tour_frequency_composition_alternatives.csv", set_index="alt" + # ) alt_tdd = simulate.read_model_alts( - "joint_tour_frequency_composition_alternatives.csv", set_index="alt" + "joint_tour_frequency_composition_alternatives.csv", set_index=None ) + alt_tdd.index = alt_tdd["alt"].values # - only interested in households with more than one cdap travel_active person and # - at least one non-preschooler @@ -94,7 +100,6 @@ def joint_tour_frequency_composition( estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(choosers) - estimator.write_alternatives(alts) assert choosers.index.name == "household_id" assert "household_id" not in choosers.columns @@ -102,6 +107,9 @@ def joint_tour_frequency_composition( estimator.set_chooser_id(choosers.index.name) + # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? + estimator.set_alt_id("alt_id") + # The choice value 'joint_tour_frequency_composition' assigned by interaction_simulate # is the index value of the chosen alternative in the alternatives table. choices = interaction_simulate( @@ -134,6 +142,7 @@ def joint_tour_frequency_composition( # - but we don't know the tour participants yet # - so we arbitrarily choose the first person in the household # - to be point person for the purpose of generating an index and setting origin + # FIXME: not all models are guaranteed to have PNUM temp_point_persons = persons.loc[persons.PNUM == 1] temp_point_persons["person_id"] = temp_point_persons.index temp_point_persons = temp_point_persons.set_index("household_id") diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py index ee8658ae5f..d079322e6e 100644 --- a/activitysim/abm/models/joint_tour_participation.py +++ b/activitysim/abm/models/joint_tour_participation.py @@ -209,6 +209,10 @@ def participants_chooser(probs, choosers, spec, trace_label): probs[choice_col] = np.where(probs[choice_col] > 0, 1, 0) non_choice_col = [col for col in probs.columns if col != choice_col][0] probs[non_choice_col] = 1 - probs[choice_col] + if iter > MAX_ITERATIONS + 1: + raise RuntimeError( + f"{num_tours_remaining} tours could not be satisfied even with forcing participation" + ) else: raise RuntimeError( f"{num_tours_remaining} tours could not be satisfied after {iter} iterations" diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index dd5e279b39..7e9a76d551 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -17,6 +17,7 @@ ) from activitysim.core.interaction_sample import interaction_sample from activitysim.core.interaction_sample_simulate import interaction_sample_simulate +from activitysim.core.util import reindex from .util import estimation from .util import logsums as logsum @@ -138,15 +139,8 @@ def _location_sample( logger.info("Running %s with %d persons" % (trace_label, len(choosers.index))) sample_size = model_settings["SAMPLE_SIZE"] - if config.setting("disable_destination_sampling", False) or ( - estimator and estimator.want_unsampled_alternatives - ): - # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count - logger.info( - "Estimation mode for %s using unsampled alternatives short_circuit_choices" - % (trace_label,) - ) - sample_size = 0 + if estimator: + sample_size = model_settings.get("ESTIMATION_SAMPLE_SIZE", 0) locals_d = { "skims": skims, @@ -154,6 +148,8 @@ def _location_sample( "orig_col_name": skims.orig_key, # added for sharrow flows "dest_col_name": skims.dest_key, # added for sharrow flows "timeframe": "timeless", + "reindex": reindex, + "land_use": inject.get_table("land_use").to_frame(), } constants = config.get_model_constants(model_settings) locals_d.update(constants) @@ -470,6 +466,38 @@ def run_location_sample( trace_label=trace_label, ) + # FIXME temporary code to ensure sampled alternative is in choices for estimation + # Hack to get shorter run times when you don't care about creating EDB for location choice models + if estimator: + # grabbing survey values + survey_persons = estimation.manager.get_survey_table("persons") + if "school_location" in trace_label: + survey_choices = survey_persons["school_zone_id"].reset_index() + elif ("workplace_location" in trace_label) and ("external" not in trace_label): + survey_choices = survey_persons["workplace_zone_id"].reset_index() + else: + return choices + survey_choices.columns = ["person_id", "alt_dest"] + survey_choices = survey_choices[ + survey_choices["person_id"].isin(choices.index) + & (survey_choices.alt_dest > 0) + ] + # merging survey destination into table if not available + joined_data = survey_choices.merge( + choices, on=["person_id", "alt_dest"], how="left", indicator=True + ) + missing_rows = joined_data[joined_data["_merge"] == "left_only"] + missing_rows["pick_count"] = 1 + if len(missing_rows) > 0: + new_choices = missing_rows[ + ["person_id", "alt_dest", "prob", "pick_count"] + ].set_index("person_id") + choices = choices.append(new_choices, ignore_index=False).sort_index() + # making probability the mean of all other sampled destinations by person + choices["prob"] = choices["prob"].fillna( + choices.groupby("person_id")["prob"].transform("mean") + ) + return choices @@ -601,6 +629,8 @@ def run_location_simulate( "orig_col_name": skims.orig_key, # added for sharrow flows "dest_col_name": skims.dest_key, # added for sharrow flows "timeframe": "timeless", + "reindex": reindex, + "land_use": inject.get_table("land_use").to_frame(), } constants = config.get_model_constants(model_settings) if constants is not None: @@ -808,6 +838,24 @@ def run_location_choice( ) tracing.trace_df(choices_df, estimation_trace_label) + if want_logsums & (not skip_choice): + # grabbing index, could be person_id or proto_person_id + index_name = choices_df.index.name + # merging mode choice logsum of chosen alternative to choices + choices_df = ( + pd.merge( + choices_df.reset_index(), + location_sample_df.reset_index()[ + [index_name, model_settings["ALT_DEST_COL_NAME"], ALT_LOGSUM] + ], + how="left", + left_on=[index_name, "choice"], + right_on=[index_name, model_settings["ALT_DEST_COL_NAME"]], + ) + .drop(columns=model_settings["ALT_DEST_COL_NAME"]) + .set_index(index_name) + ) + choices_list.append(choices_df) if want_sample_table: @@ -825,7 +873,7 @@ def run_location_choice( else: # this will only happen with small samples (e.g. singleton) with no (e.g.) school segs logger.warning("%s no choices", trace_label) - choices_df = pd.DataFrame(columns=["choice", "logsum"]) + choices_df = pd.DataFrame(columns=["choice", "logsum", ALT_LOGSUM]) if len(sample_list) > 0: save_sample_df = pd.concat(sample_list) @@ -869,7 +917,8 @@ def iterate_location_choice( Returns ------- adds choice column model_settings['DEST_CHOICE_COLUMN_NAME'] - adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided + adds destination choice logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided + adds mode choice logsum to selected destination column model_settings['MODE_CHOICE_LOGSUM_COLUMN_NAME']- if provided adds annotations to persons table """ @@ -879,7 +928,11 @@ def iterate_location_choice( chooser_filter_column = model_settings["CHOOSER_FILTER_COLUMN_NAME"] dest_choice_column_name = model_settings["DEST_CHOICE_COLUMN_NAME"] - logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + dc_logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + mc_logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME") + want_logsums = (dc_logsum_column_name is not None) | ( + mc_logsum_column_name is not None + ) sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") want_sample_table = ( @@ -929,7 +982,7 @@ def iterate_location_choice( persons_merged_df_, network_los, shadow_price_calculator=spc, - want_logsums=logsum_column_name is not None, + want_logsums=want_logsums, want_sample_table=want_sample_table, estimator=estimator, model_settings=model_settings, @@ -1005,10 +1058,15 @@ def iterate_location_choice( ) # add the dest_choice_logsum column to persons dataframe - if logsum_column_name: - persons_df[logsum_column_name] = ( + if dc_logsum_column_name: + persons_df[dc_logsum_column_name] = ( choices_df["logsum"].reindex(persons_df.index).astype("float") ) + # add the mode choice logsum column to persons dataframe + if mc_logsum_column_name: + persons_df[mc_logsum_column_name] = ( + choices_df[ALT_LOGSUM].reindex(persons_df.index).astype("float") + ) if save_sample_df is not None: # might be None for tiny samples even if sample_table_name was specified @@ -1047,9 +1105,13 @@ def iterate_location_choice( if trace_hh_id: tracing.trace_df(households_df, label=trace_label, warn_if_empty=True) - if logsum_column_name: + if dc_logsum_column_name: + tracing.print_summary( + dc_logsum_column_name, choices_df["logsum"], value_counts=True + ) + if mc_logsum_column_name: tracing.print_summary( - logsum_column_name, choices_df["logsum"], value_counts=True + mc_logsum_column_name, choices_df[ALT_LOGSUM], value_counts=True ) return persons_df diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index 521f49c47c..a76a3650bf 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -20,7 +20,7 @@ from .util import annotate from .util.school_escort_tours_trips import recompute_tour_count_statistics -from .util.overlap import person_max_window +from .util.overlap import person_max_window, person_available_periods from .util.tour_frequency import process_non_mandatory_tours logger = logging.getLogger(__name__) @@ -166,7 +166,10 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i preprocessor_settings = model_settings.get("preprocessor", None) if preprocessor_settings: - locals_dict = {"person_max_window": person_max_window} + locals_dict = { + "person_max_window": person_max_window, + "person_available_periods": person_available_periods, + } expressions.assign_columns( df=choosers, @@ -259,6 +262,9 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i choices_list.append(choices) + # FIXME only want to keep actual purposes, adding cols in alts will mess this up + # this is complicated by canonical_ids calculated based on alts if not specified explicitly + # thus, adding column to input alts will change IDs and break estimation mode.... del alternatives["tot_tours"] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate @@ -345,13 +351,14 @@ def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_i # make sure they created the right tours survey_tours = estimation.manager.get_survey_table("tours").sort_index() - non_mandatory_survey_tours = survey_tours[ - survey_tours.tour_category == "non_mandatory" - ] - assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) - assert non_mandatory_survey_tours.index.equals( - non_mandatory_tours.sort_index().index - ) + # FIXME below check needs to remove the pure-escort tours from the survey tours table + # non_mandatory_survey_tours = survey_tours[ + # survey_tours.tour_category == "non_mandatory" + # ] + # assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) + # assert non_mandatory_survey_tours.index.equals( + # non_mandatory_tours.sort_index().index + # ) # make sure they created tours with the expected tour_ids columns = ["person_id", "household_id", "tour_type", "tour_category"] diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py index a87703b8b9..8eaf1bc6f3 100644 --- a/activitysim/abm/models/parking_location_choice.py +++ b/activitysim/abm/models/parking_location_choice.py @@ -16,7 +16,7 @@ ) from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.tracing import print_elapsed_time -from activitysim.core.util import assign_in_place +from activitysim.core.util import assign_in_place, drop_unused_chooser_columns from .util import estimation @@ -99,6 +99,7 @@ def parking_destination_simulate( destination_sample, model_settings, skims, + locals_dict, chunk_size, trace_hh_id, trace_label, @@ -123,11 +124,6 @@ def parking_destination_simulate( logger.info("Running trip_destination_simulate with %d trips", len(trips)) - locals_dict = config.get_model_constants(model_settings).copy() - locals_dict.update(skims) - locals_dict["timeframe"] = "trip" - locals_dict["PARKING"] = skims["op_skims"].dest_key - parking_locations = interaction_sample_simulate( choosers=trips, alternatives=destination_sample, @@ -171,6 +167,19 @@ def choose_parking_location( t0 = print_elapsed_time() alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] + + # remove trips and alts columns that are not used in spec + locals_dict = config.get_model_constants(model_settings).copy() + locals_dict.update(skims) + locals_dict["timeframe"] = "trip" + locals_dict["PARKING"] = skims["op_skims"].dest_key + + spec = get_spec_for_segment(model_settings, "SPECIFICATION", segment_name) + trips = drop_unused_chooser_columns(trips, spec, locals_dict, custom_chooser=None) + alternatives = drop_unused_chooser_columns( + alternatives, spec, locals_dict, custom_chooser=None + ) + destination_sample = logit.interaction_dataset( trips, alternatives, alt_index_id=alt_dest_col_name ) @@ -184,6 +193,7 @@ def choose_parking_location( destination_sample=destination_sample, model_settings=model_settings, skims=skims, + locals_dict=locals_dict, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index 1b5a97fc93..2ae72a968f 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -46,7 +46,7 @@ def determine_escorting_participants(choosers, persons, model_settings): # can specify different weights to determine chaperones persontype_weight = model_settings.get("PERSON_WEIGHT", 100) - gender_weight = model_settings.get("PERSON_WEIGHT", 10) + gender_weight = model_settings.get("GENDER_WEIGHT", 10) age_weight = model_settings.get("AGE_WEIGHT", 1) # can we move all of these to a config file? @@ -122,7 +122,7 @@ def add_prev_choices_to_choosers(choosers, choices, alts, stage): stage_alts, how="left", left_on=escorting_choice, - right_on=stage_alts.index.name, + right_index=True, ) .set_index("household_id") ) @@ -198,8 +198,12 @@ def create_school_escorting_bundles_table(choosers, tours, stage): bundles : pd.DataFrame one school escorting bundle per row """ - # making a table of bundles - choosers = choosers.reset_index() + # want to keep household_id in columns, which is already there if running in estimation mode + if "household_id" in choosers.columns: + choosers = choosers.reset_index(drop=True) + else: + choosers = choosers.reset_index() + # creating a row for every school escorting bundle choosers = choosers.loc[choosers.index.repeat(choosers["nbundles"])] bundles = pd.DataFrame() @@ -362,7 +366,11 @@ def school_escorting( households_merged = households_merged.to_frame() tours = tours.to_frame() - alts = simulate.read_model_alts(model_settings["ALTS"], set_index="Alt") + # FIXME setting index as "Alt" causes crash in estimation mode... + # happens in joint_tour_frequency_composition too! + # alts = simulate.read_model_alts(model_settings["ALTS"], set_index="Alt") + alts = simulate.read_model_alts(model_settings["ALTS"], set_index=None) + alts.index = alts["Alt"].values households_merged, participant_columns = determine_escorting_participants( households_merged, persons, model_settings @@ -379,7 +387,9 @@ def school_escorting( choices = None for stage_num, stage in enumerate(school_escorting_stages): stage_trace_label = trace_label + "_" + stage - estimator = estimation.manager.begin_estimation("school_escorting_" + stage) + estimator = estimation.manager.begin_estimation( + model_name="school_escorting_" + stage, bundle_name="school_escorting" + ) model_spec_raw = simulate.read_model_spec( file_name=model_settings[stage.upper() + "_SPEC"] @@ -434,9 +444,26 @@ def school_escorting( if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) - estimator.write_spec(model_settings) - estimator.write_coefficients(coefficients_df, model_settings) + estimator.write_spec(model_settings, tag=stage.upper() + "_SPEC") + estimator.write_coefficients( + coefficients_df, file_name=stage.upper() + "_COEFFICIENTS" + ) estimator.write_choosers(choosers) + estimator.write_alternatives(alts, bundle_directory=True) + + # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column + # shuold we do it here or have interaction_simulate do it? + # chooser index must be duplicated in column or it will be omitted from interaction_dataset + # estimation requires that chooser_id is either in index or a column of interaction_dataset + # so it can be reformatted (melted) and indexed by chooser_id and alt_id + assert choosers.index.name == "household_id" + assert "household_id" not in choosers.columns + choosers["household_id"] = choosers.index + + # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? + estimator.set_alt_id("alt_id") + + estimator.set_chooser_id(choosers.index.name) log_alt_losers = config.setting("log_alt_losers", False) @@ -474,47 +501,74 @@ def school_escorting( if stage_num >= 1: choosers["Alt"] = choices - choosers = choosers.join(alts, how="left", on="Alt") + choosers = choosers.join(alts.set_index("Alt"), how="left", on="Alt") bundles = create_school_escorting_bundles_table( choosers[choosers["Alt"] > 1], tours, stage ) escort_bundles.append(bundles) + pipeline.replace_table("households", households) + escort_bundles = pd.concat(escort_bundles) - escort_bundles["bundle_id"] = ( - escort_bundles["household_id"] * 10 - + escort_bundles.groupby("household_id").cumcount() - + 1 - ) - escort_bundles.sort_values( - by=["household_id", "school_escort_direction"], - ascending=[True, False], - inplace=True, - ) - school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours( - escort_bundles - ) - chauf_tour_id_map = { - v: k for k, v in school_escort_tours["bundle_id"].to_dict().items() - } - escort_bundles["chauf_tour_id"] = np.where( - escort_bundles["escort_type"] == "ride_share", - escort_bundles["first_mand_tour_id"], - escort_bundles["bundle_id"].map(chauf_tour_id_map), - ) + # Only want to create bundles and tours and trips if at least one household has school escorting + if len(escort_bundles) > 0: + escort_bundles["bundle_id"] = ( + escort_bundles["household_id"] * 10 + + escort_bundles.groupby("household_id").cumcount() + + 1 + ) + escort_bundles.sort_values( + by=["household_id", "school_escort_direction"], + ascending=[True, False], + inplace=True, + ) - tours = school_escort_tours_trips.add_pure_escort_tours(tours, school_escort_tours) - tours = school_escort_tours_trips.process_tours_after_escorting_model( - escort_bundles, tours - ) + school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours( + escort_bundles + ) + chauf_tour_id_map = { + v: k for k, v in school_escort_tours["bundle_id"].to_dict().items() + } + escort_bundles["chauf_tour_id"] = np.where( + escort_bundles["escort_type"] == "ride_share", + escort_bundles["first_mand_tour_id"], + escort_bundles["bundle_id"].map(chauf_tour_id_map), + ) - school_escort_trips = school_escort_tours_trips.create_school_escort_trips( - escort_bundles - ) + tours = school_escort_tours_trips.add_pure_escort_tours( + tours, school_escort_tours + ) + tours = school_escort_tours_trips.process_tours_after_escorting_model( + escort_bundles, tours + ) + + school_escort_trips = school_escort_tours_trips.create_school_escort_trips( + escort_bundles + ) + + else: + # create empty school escort tours & trips tables to be used downstream + tours["school_esc_outbound"] = pd.NA + tours["school_esc_inbound"] = pd.NA + tours["school_escort_direction"] = pd.NA + tours["next_pure_escort_start"] = pd.NA + school_escort_tours = pd.DataFrame(columns=tours.columns) + trip_cols = [ + "household_id", + "person_id", + "tour_id", + "trip_id", + "outbound", + "depart", + "purpose", + "destination", + "escort_participants", + "chauf_tour_id", + ] + school_escort_trips = pd.DataFrame(columns=trip_cols) # update pipeline - pipeline.replace_table("households", households) pipeline.replace_table("tours", tours) pipeline.get_rn_generator().drop_channel("tours") pipeline.get_rn_generator().add_channel("tours", tours) diff --git a/activitysim/abm/models/stop_frequency.py b/activitysim/abm/models/stop_frequency.py index 94a208075f..4e05fe8749 100644 --- a/activitysim/abm/models/stop_frequency.py +++ b/activitysim/abm/models/stop_frequency.py @@ -199,7 +199,7 @@ def stop_frequency( print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}") different = True trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index)] - if len(survey_trips_not_in_trips) > 0: + if len(trips_not_in_survey_trips) > 0: print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}") different = True assert not different diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 545cfee29f..01554d984d 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -134,6 +134,7 @@ def _destination_sample( "size_terms": size_term_matrix, "size_terms_array": size_term_matrix.df.to_numpy(), "timeframe": "trip", + "land_use": inject.get_table("land_use").to_frame(), } ) locals_dict.update(skims) @@ -829,6 +830,7 @@ def trip_destination_simulate( "size_terms": size_term_matrix, "size_terms_array": size_term_matrix.df.to_numpy(), "timeframe": "trip", + "land_use": inject.get_table("land_use").to_frame(), } ) locals_dict.update(skims) diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py index 0c9e1f447f..cfa65f4182 100644 --- a/activitysim/abm/models/trip_matrices.py +++ b/activitysim/abm/models/trip_matrices.py @@ -51,12 +51,31 @@ def write_trip_matrices(network_los): if "parking_location" in config.setting("models"): parking_settings = config.read_model_settings("parking_location_choice.yaml") parking_taz_col_name = parking_settings["ALT_DEST_COL_NAME"] + assert "AUTO_MODES" in parking_settings, "AUTO_MODES must be specified in parking location settings to properly adjust trip tables for assignment" + auto_modes = parking_settings["AUTO_MODES"] + if parking_taz_col_name in trips_df: - # TODO make parking zone negative, not zero, if not used + + trips_df["true_origin"] = trips_df["origin"] + trips_df["true_destination"] = trips_df["destination"] + + # Get origin parking zone if vehicle not parked at origin + trips_df["origin_parking_zone"] = np.where( + trips_df["tour_id"] == trips_df["tour_id"].shift(1), + np.where( + trips_df["trip_mode"].apply(lambda x: x in auto_modes), + trips_df[parking_taz_col_name].shift(1), + -1 + ), + -1 + ) + trips_df.loc[trips_df[parking_taz_col_name] > 0, "destination"] = trips_df[ parking_taz_col_name ] - # Also need address the return trip + trips_df.loc[trips_df["origin_parking_zone"] > 0, "origin"] = trips_df[ + "origin_parking_zone" + ] # write matrices by zone system type if network_los.zone_system == los.ONE_ZONE: # taz trips written to taz matrices @@ -213,6 +232,20 @@ def write_trip_matrices(network_los): write_matrices( aggregate_trips, zone_index, orig_index, dest_index, model_settings, True ) + + if "parking_location" in config.setting("models"): + # Set trip origin and destination to be the actual location the person is and not where their vehicle is parked + trips_df["origin"] = trips_df["true_origin"] + trips_df["destination"] = trips_df["true_destination"] + del trips_df["true_origin"], trips_df["true_destination"] + + if network_los.zone_system == los.TWO_ZONE or network_los.zone_system == los.THREE_ZONE: + trips_df["otaz"] = ( + pipeline.get_table("land_use").reindex(trips_df["origin"]).TAZ.tolist() + ) + trips_df["dtaz"] = ( + pipeline.get_table("land_use").reindex(trips_df["destination"]).TAZ.tolist() + ) def annotate_trips(trips, network_los, model_settings): diff --git a/activitysim/abm/models/trip_mode_choice.py b/activitysim/abm/models/trip_mode_choice.py index e7bb200e45..b165e24b73 100644 --- a/activitysim/abm/models/trip_mode_choice.py +++ b/activitysim/abm/models/trip_mode_choice.py @@ -317,6 +317,11 @@ def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id): pipeline.replace_table("trips", trips_df) if model_settings.get("annotate_trips"): + locals_dict = {} + locals_dict.update(constants) + simulate.set_skim_wrapper_targets(trips_merged, skims) + locals_dict.update(skims) + locals_dict["timeframe"] = "trip" annotate.annotate_trips(model_settings, trace_label, locals_dict) if trace_hh_id: diff --git a/activitysim/abm/models/util/annotate.py b/activitysim/abm/models/util/annotate.py index e50519b38e..727ef4292f 100644 --- a/activitysim/abm/models/util/annotate.py +++ b/activitysim/abm/models/util/annotate.py @@ -15,6 +15,44 @@ logger = logging.getLogger(__name__) +def annotate_households(model_settings, trace_label, locals_dict={}): + """ + Add columns to the households table in the pipeline according to spec. + + Parameters + ---------- + model_settings : dict + trace_label : str + """ + households = inject.get_table("households").to_frame() + expressions.assign_columns( + df=households, + model_settings=model_settings.get("annotate_households"), + locals_dict=locals_dict, + trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), + ) + pipeline.replace_table("households", households) + + +def annotate_persons(model_settings, trace_label, locals_dict={}): + """ + Add columns to the persons table in the pipeline according to spec. + + Parameters + ---------- + model_settings : dict + trace_label : str + """ + persons = inject.get_table("persons").to_frame() + expressions.assign_columns( + df=persons, + model_settings=model_settings.get("annotate_persons"), + locals_dict=locals_dict, + trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), + ) + pipeline.replace_table("persons", persons) + + def annotate_tours(model_settings, trace_label, locals_dict={}): """ Add columns to the tours table in the pipeline according to spec. diff --git a/activitysim/abm/models/util/estimation.py b/activitysim/abm/models/util/estimation.py index 6a5dbadf1f..7c23b97b5e 100644 --- a/activitysim/abm/models/util/estimation.py +++ b/activitysim/abm/models/util/estimation.py @@ -218,6 +218,13 @@ def write_omnibus_table(self): if len(self.omnibus_tables) == 0: return + settings = config.read_model_settings(ESTIMATION_SETTINGS_FILE_NAME) + + edbs_to_skip = settings.get("SKIP_BUNDLE_WRITE_FOR", []) + if self.bundle_name in edbs_to_skip: + self.debug(f"Skipping write to disk for {self.bundle_name}") + return + for omnibus_table, table_names in self.omnibus_tables.items(): self.debug( @@ -236,12 +243,20 @@ def write_omnibus_table(self): 1 if omnibus_table in self.omnibus_tables_append_columns else 0 ) - df = pd.concat([self.tables[t] for t in table_names], axis=concat_axis) + # df = pd.concat([self.tables[t] for t in table_names], axis=concat_axis) + if len(table_names) == 0: + # empty tables + df = pd.DataFrame() + else: + df = pd.concat([self.tables[t] for t in table_names], axis=concat_axis) + + self.debug(f"sorting tables: {table_names}") df.sort_index(ascending=True, inplace=True, kind="mergesort") file_path = self.output_file_path(omnibus_table, "csv") assert not os.path.isfile(file_path) + self.debug(f"writing table: {file_path}") df.to_csv(file_path, mode="a", index=True, header=True) self.debug("write_omnibus_choosers: %s" % file_path) diff --git a/activitysim/abm/models/util/overlap.py b/activitysim/abm/models/util/overlap.py index 70fadfbd43..8665c8a513 100644 --- a/activitysim/abm/models/util/overlap.py +++ b/activitysim/abm/models/util/overlap.py @@ -250,3 +250,96 @@ def person_max_window(persons): max_window.index = persons.index return max_window + + +def calculate_consecutive(array): + # Append zeros columns at either sides of counts + append1 = np.zeros((array.shape[0], 1), dtype=int) + array_ext = np.column_stack((append1, array, append1)) + + # Get start and stop indices with 1s as triggers + diffs = np.diff((array_ext == 1).astype(int), axis=1) + starts = np.argwhere(diffs == 1) + stops = np.argwhere(diffs == -1) + + # Get intervals using differences between start and stop indices + intvs = stops[:, 1] - starts[:, 1] + + # Store intervals as a 2D array for further vectorized ops to make. + c = np.bincount(starts[:, 0], minlength=array.shape[0]) + mask = np.arange(c.max()) < c[:, None] + intvs2D = mask.astype(float) + intvs2D[mask] = intvs + + # Get max along each row as final output + out = intvs2D.max(1).astype(int) + return out + + +def person_available_periods(persons, start_bin=None, end_bin=None, continuous=False): + """ + Returns the number of available time period bins foreach person in persons. + Can limit the calculation to include starting and/or ending bins. + Can return either the total number of available time bins with continuous = True, + or only the maximum + + This is equivalent to person_max_window if no start/end bins provided and continous=True + + time bins are inclusive, i.e. [start_bin, end_bin] + + e.g. + available out of timetable has dummy first and last bins + available = [ + [1,1,1,1,1,1,1,1,1,1,1,1], + [1,1,0,1,1,0,0,1,0,1,0,1], + #-,0,1,2,3,4,5,6,7,8,9,- time bins + ] + returns: + for start_bin=None, end_bin=None, continuous=False: (10, 5) + for start_bin=None, end_bin=None, continuous=True: (10, 2) + for start_bin=5, end_bin=9, continuous=False: (5, 2) + for start_bin=5, end_bin=9, continuous=True: (5, 1) + + + Parameters + ---------- + start_bin : (int) starting time bin to include starting from 0 + end_bin : (int) ending time bin to include + continuous : (bool) count all available bins if false or just largest continuous run if True + + Returns + ------- + pd.Series of the number of available time bins indexed by person ID + """ + timetable = inject.get_injectable("timetable") + + # ndarray with one row per person and one column per time period + # array value of 1 where free periods and 0 elsewhere + s = pd.Series(persons.index.values, index=persons.index) + + # first and last bins are dummys in the time table + # so if you have 48 half hour time periods, shape is (len(persons), 50) + available = timetable.individually_available(s) + + # Create a mask to exclude bins before the starting bin and after the ending bin + mask = np.ones(available.shape[1], dtype=bool) + mask[0] = False + mask[len(mask) - 1] = False + if start_bin is not None: + # +1 needed due to dummy first bin + mask[: start_bin + 1] = False + if end_bin is not None: + # +2 for dummy first bin and inclusive end_bin + mask[end_bin + 2 :] = False + + # Apply the mask to the array + masked_array = available[:, mask] + + # Calculate the number of available time periods for each person + availability = np.sum(masked_array, axis=1) + + if continuous: + availability = calculate_consecutive(masked_array) + + availability = pd.Series(availability, index=persons.index) + return availability diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py index 778fb86454..0a7796dea7 100644 --- a/activitysim/abm/models/util/school_escort_tours_trips.py +++ b/activitysim/abm/models/util/school_escort_tours_trips.py @@ -399,6 +399,13 @@ def merge_school_escort_trips_into_pipeline(): tours = pipeline.get_table("tours") trips = pipeline.get_table("trips") + # checking to see if there are school escort trips to merge in + if len(school_escort_trips) == 0: + # if no trips, fill escorting columns with NA + trips[["escort_participants", "school_escort_direction", "school_escort_trip_id",]] = pd.NA + pipeline.replace_table("trips", trips) + return trips + # want to remove stops if school escorting takes place on that half tour so we can replace them with the actual stops out_se_tours = tours[ tours["school_esc_outbound"].isin(["pure_escort", "ride_share"]) @@ -603,6 +610,10 @@ def force_escortee_tour_modes_to_match_chauffeur(tours): # Does it even matter if trip modes are getting matched later? escort_bundles = inject.get_table("escort_bundles").to_frame() + if len(escort_bundles) == 0: + # do not need to do anything if no escorting + return tours + # grabbing the school tour ids for each school escort bundle se_tours = escort_bundles[["school_tour_ids", "chauf_tour_id"]].copy() # merging in chauffeur tour mode diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index 324e505dda..efc4ef19b6 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -221,7 +221,19 @@ def construct_model_alternatives(model_settings, alts_cats_dict, vehicle_type_da ), f"missing vehicle data for alternatives:\n {missing_alts}" else: # eliminate alternatives if no vehicle type data + num_alts_before_filer = len(alts_wide) alts_wide = alts_wide[alts_wide._merge != "left_only"] + logger.warning( + f"Removed {num_alts_before_filer - len(alts_wide)} alternatives not included in input vehicle type data." + ) + # need to also remove any alts from alts_long + alts_long.set_index(["body_type", "age", "fuel_type"], inplace=True) + alts_long = alts_long[ + alts_long.index.isin( + alts_wide.set_index(["body_type", "age", "fuel_type"]).index + ) + ].reset_index() + alts_long.index = alts_wide.index alts_wide.drop(columns="_merge", inplace=True) # converting age to integer to allow interactions in utilities @@ -335,6 +347,16 @@ def iterate_vehicle_type_choice( model_settings, alts_cats_dict, vehicle_type_data ) + # alts preprocessor + alts_preprocessor_settings = model_settings.get("alts_preprocessor", None) + if alts_preprocessor_settings: + expressions.assign_columns( + df=alts_wide, + model_settings=alts_preprocessor_settings, + locals_dict=locals_dict, + trace_label=trace_label, + ) + # - preparing choosers for iterating vehicles_merged = vehicles_merged.to_frame() vehicles_merged["already_owned_veh"] = "" @@ -368,6 +390,12 @@ def iterate_vehicle_type_choice( len(choosers), ) + # filter columns of alts and choosers + if len(model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])) > 0: + choosers = choosers[model_settings.get("COLS_TO_INCLUDE_IN_CHOOSER_TABLE", [])] + if len(model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE", [])) > 0: + alts_wide = alts_wide[model_settings.get("COLS_TO_INCLUDE_IN_ALTS_TABLE", [])] + # if there were so many alts that they had to be created programmatically, # by combining categorical variables, then the utility expressions should make # use of interaction terms to accommodate alt-specific coefficients and constants @@ -416,14 +444,12 @@ def iterate_vehicle_type_choice( choices.rename(columns={"choice": "vehicle_type"}, inplace=True) if alts_cats_dict: - alts = ( - alts_long[alts_long.columns] - .apply(lambda row: "_".join(row.values.astype(str)), axis=1) - .values - ) + alts = alts_long[alts_long.columns].apply( + lambda row: "_".join(row.values.astype(str)), axis=1 + ).to_dict() else: - alts = model_spec.columns - choices["vehicle_type"] = choices["vehicle_type"].map(dict(enumerate(alts))) + alts = enumerate(dict(model_spec.columns)) + choices["vehicle_type"] = choices["vehicle_type"].map(alts) # STEP II: append probabilistic vehicle type attributes if probs_spec_file is not None: diff --git a/activitysim/abm/tables/disaggregate_accessibility.py b/activitysim/abm/tables/disaggregate_accessibility.py index 4c4eb9ad40..db65652f5f 100644 --- a/activitysim/abm/tables/disaggregate_accessibility.py +++ b/activitysim/abm/tables/disaggregate_accessibility.py @@ -151,14 +151,13 @@ def disaggregate_accessibility(persons, households, land_use, accessibility): accessibility_cols = [ x for x in proto_accessibility_df.columns if "accessibility" in x ] + keep_cols = model_settings.get("KEEP_COLS", accessibility_cols) # Parse the merging parameters assert merging_params is not None # Check if already assigned! - if set(accessibility_cols).intersection(persons_merged_df.columns) == set( - accessibility_cols - ): + if set(keep_cols).intersection(persons_merged_df.columns) == set(keep_cols): return # Find the nearest zone (spatially) with accessibilities calculated @@ -190,7 +189,7 @@ def disaggregate_accessibility(persons, households, land_use, accessibility): # because it will get slightly different logsums for households in the same zone. # This is because different destination zones were selected. To resolve, get mean by cols. right_df = ( - proto_accessibility_df.groupby(merge_cols)[accessibility_cols] + proto_accessibility_df.groupby(merge_cols)[keep_cols] .mean() .sort_values(nearest_cols) .reset_index() @@ -223,9 +222,9 @@ def disaggregate_accessibility(persons, households, land_use, accessibility): ) # Predict the nearest person ID and pull the logsums - matched_logsums_df = right_df.loc[clf.predict(x_pop)][ - accessibility_cols - ].reset_index(drop=True) + matched_logsums_df = right_df.loc[clf.predict(x_pop)][keep_cols].reset_index( + drop=True + ) merge_df = pd.concat( [left_df.reset_index(drop=False), matched_logsums_df], axis=1 ).set_index("person_id") @@ -257,12 +256,12 @@ def disaggregate_accessibility(persons, households, land_use, accessibility): # Check that it was correctly left-joined assert all(persons_merged_df[merge_cols] == merge_df[merge_cols]) - assert any(merge_df[accessibility_cols].isnull()) + assert any(merge_df[keep_cols].isnull()) # Inject merged accessibilities so that it can be included in persons_merged function - inject.add_table("disaggregate_accessibility", merge_df[accessibility_cols]) + inject.add_table("disaggregate_accessibility", merge_df[keep_cols]) - return merge_df[accessibility_cols] + return merge_df[keep_cols] inject.broadcast( diff --git a/activitysim/core/config.py b/activitysim/core/config.py index 7024d0512b..a86093f3e6 100644 --- a/activitysim/core/config.py +++ b/activitysim/core/config.py @@ -708,7 +708,7 @@ def filter_warnings(): # These warning are left as warnings as an invitation for future enhancement. from pandas.errors import PerformanceWarning - warnings.filterwarnings("default", category=PerformanceWarning) + warnings.filterwarnings("ignore", category=PerformanceWarning) # pandas 1.5 # beginning in pandas version 1.5, a new warning is emitted when a column is set via iloc diff --git a/activitysim/core/mem.py b/activitysim/core/mem.py index ae832f250c..de558b926d 100644 --- a/activitysim/core/mem.py +++ b/activitysim/core/mem.py @@ -296,6 +296,9 @@ def shared_memory_size(data_buffers=None): shared_size += Dataset.shm.preload_shared_memory_size(data_buffer[11:]) continue + if isinstance(data_buffer, multiprocessing.shared_memory.SharedMemory): + shared_size += data_buffer.size + continue try: obj = data_buffer.get_obj() except Exception: diff --git a/activitysim/core/skim_dict_factory.py b/activitysim/core/skim_dict_factory.py index 450b98d25c..a3b37aea2c 100644 --- a/activitysim/core/skim_dict_factory.py +++ b/activitysim/core/skim_dict_factory.py @@ -410,16 +410,23 @@ def allocate_skim_buffer(self, skim_info, shared=False): ) if shared: - if dtype_name == "float64": - typecode = "d" - elif dtype_name == "float32": - typecode = "f" - else: - raise RuntimeError( - "allocate_skim_buffer unrecognized dtype %s" % dtype_name - ) - - buffer = multiprocessing.RawArray(typecode, buffer_size) + # if dtype_name == "float64": + # typecode = "d" + # elif dtype_name == "float32": + # typecode = "f" + # else: + # raise RuntimeError( + # "allocate_skim_buffer unrecognized dtype %s" % dtype_name + # ) + + # buffer = multiprocessing.RawArray(typecode, buffer_size) + shared_mem_name = f"skim_shared_memory__{skim_info.skim_tag}" + try: + buffer = multiprocessing.shared_memory.SharedMemory(name=shared_mem_name) + logger.info(f"skim buffer already allocated in shared memory: {shared_mem_name}, size: {buffer.size}") + except FileNotFoundError: + buffer = multiprocessing.shared_memory.SharedMemory(create=True, size=csz, name=shared_mem_name) + logger.info(f"allocating skim buffer in shared memory: {shared_mem_name}, size: {buffer.size}") else: buffer = np.zeros(buffer_size, dtype=dtype) @@ -440,10 +447,16 @@ def _skim_data_from_buffer(self, skim_info, skim_buffer): """ dtype = np.dtype(skim_info.dtype_name) - assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape) - skim_data = np.frombuffer(skim_buffer, dtype=dtype).reshape( - skim_info.skim_data_shape - ) + if isinstance(skim_buffer, multiprocessing.shared_memory.SharedMemory): + assert skim_buffer.size >= util.iprod(skim_info.skim_data_shape) * dtype.itemsize + skim_data = np.frombuffer(skim_buffer.buf, dtype=dtype, count=util.iprod(skim_info.skim_data_shape)).reshape( + skim_info.skim_data_shape + ) + else: + assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape) + skim_data = np.frombuffer(skim_buffer, dtype=dtype).reshape( + skim_info.skim_data_shape + ) return skim_data def load_skims_to_buffer(self, skim_info, skim_buffer): @@ -462,6 +475,9 @@ def load_skims_to_buffer(self, skim_info, skim_buffer): skim_data = self._skim_data_from_buffer(skim_info, skim_buffer) assert skim_data.shape == skim_info.skim_data_shape + if isinstance(skim_buffer, multiprocessing.shared_memory.SharedMemory) and skim_data.any(): + return + if read_cache: # returns None if cache file not found cache_data = self._open_existing_readonly_memmap_skim_cache(skim_info) diff --git a/activitysim/core/tracing.py b/activitysim/core/tracing.py index 44707c0aea..6bbc2854de 100644 --- a/activitysim/core/tracing.py +++ b/activitysim/core/tracing.py @@ -776,18 +776,27 @@ def interaction_trace_rows(interaction_df, choosers, sample_size=None): households_table_name.pop(), ) - if choosers.index.name == "person_id" and persons_table_name in traceable_table_ids: + if ( + choosers.index.name in ["person_id", "proto_person_id"] + ) and persons_table_name in traceable_table_ids: slicer_column_name = choosers.index.name - targets = traceable_table_ids["persons"] - elif choosers.index.name == "household_id" and "households" in traceable_table_ids: + targets = traceable_table_ids[persons_table_name] + elif ( + choosers.index.name in ["household_id", "proto_household_id"] + ) and households_table_name in traceable_table_ids: slicer_column_name = choosers.index.name - targets = traceable_table_ids["households"] + targets = traceable_table_ids[households_table_name] elif "household_id" in choosers.columns and "households" in traceable_table_ids: slicer_column_name = "household_id" targets = traceable_table_ids[households_table_name] elif "person_id" in choosers.columns and persons_table_name in traceable_table_ids: slicer_column_name = "person_id" targets = traceable_table_ids[persons_table_name] + elif ( + choosers.index.name == "proto_tour_id" and "proto_tours" in traceable_table_ids + ): + slicer_column_name = choosers.index.name + targets = traceable_table_ids["proto_tours"] else: print(choosers.columns) raise RuntimeError( diff --git a/activitysim/core/util.py b/activitysim/core/util.py index 217cdb8377..421eb35a06 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -468,3 +468,78 @@ def nearest_node_index(node, nodes): deltas = nodes - node dist_2 = np.einsum("ij,ij->i", deltas, deltas) return np.argmin(dist_2) + + +def drop_unused_chooser_columns( + choosers, spec, locals_d, custom_chooser, sharrow_enabled=False +): + """ + Drop unused columns from the chooser table, based on the spec and custom_chooser function. + """ + # keep only variables needed for spec + import re + + # define a regular expression to find variables in spec + pattern = r"[a-zA-Z_][a-zA-Z0-9_]*" + + unique_variables_in_spec = set( + spec.reset_index()["Expression"].apply(lambda x: re.findall(pattern, x)).sum() + ) + + if locals_d: + unique_variables_in_spec.add(locals_d.get("orig_col_name", None)) + unique_variables_in_spec.add(locals_d.get("dest_col_name", None)) + if locals_d.get("timeframe") == "trip": + orig_col_name = locals_d.get("ORIGIN", None) + dest_col_name = locals_d.get("DESTINATION", None) + stop_col_name = None + parking_col_name = locals_d.get("PARKING", None) + primary_origin_col_name = None + if orig_col_name is None and "od_skims" in locals_d: + orig_col_name = locals_d["od_skims"].orig_key + if dest_col_name is None and "od_skims" in locals_d: + dest_col_name = locals_d["od_skims"].dest_key + if stop_col_name is None and "dp_skims" in locals_d: + stop_col_name = locals_d["dp_skims"].dest_key + if primary_origin_col_name is None and "dnt_skims" in locals_d: + primary_origin_col_name = locals_d["dnt_skims"].dest_key + unique_variables_in_spec.add(orig_col_name) + unique_variables_in_spec.add(dest_col_name) + unique_variables_in_spec.add(parking_col_name) + unique_variables_in_spec.add(primary_origin_col_name) + unique_variables_in_spec.add(stop_col_name) + unique_variables_in_spec.add("trip_period") + # when using trip_scheduling_choice for trup scheduling + unique_variables_in_spec.add("last_outbound_stop") + unique_variables_in_spec.add("last_inbound_stop") + + # when sharrow mode, need to keep the following columns in the choosers table + if sharrow_enabled: + unique_variables_in_spec.add("out_period") + unique_variables_in_spec.add("in_period") + unique_variables_in_spec.add("purpose_index_num") + + if custom_chooser: + import inspect + + custom_chooser_lines = inspect.getsource(custom_chooser) + unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines)) + + logger.info("Dropping unused variables in chooser table") + + logger.info( + "before dropping, the choosers table has {} columns: {}".format( + len(choosers.columns), choosers.columns + ) + ) + + # keep only variables needed for spec + choosers = choosers[[c for c in choosers.columns if c in unique_variables_in_spec]] + + logger.info( + "after dropping, the choosers table has {} columns: {}".format( + len(choosers.columns), choosers.columns + ) + ) + + return choosers \ No newline at end of file diff --git a/activitysim/estimation/larch/__init__.py b/activitysim/estimation/larch/__init__.py index 1d87e1ce12..a175db6e7d 100644 --- a/activitysim/estimation/larch/__init__.py +++ b/activitysim/estimation/larch/__init__.py @@ -9,6 +9,7 @@ from .scheduling import * from .simple_simulate import * from .stop_frequency import * +from .external_worker_identification import * def component_model(name, *args, **kwargs): diff --git a/activitysim/estimation/larch/cdap.py b/activitysim/estimation/larch/cdap.py index fdb801de03..ee96ee5aab 100644 --- a/activitysim/estimation/larch/cdap.py +++ b/activitysim/estimation/larch/cdap.py @@ -19,8 +19,10 @@ _logger = logging.getLogger(logger_name) +MAX_HHSIZE = 5 -def generate_alternatives(n_persons): + +def generate_alternatives(n_persons, add_joint=False): """ Generate a dictionary of CDAP alternatives. @@ -39,8 +41,14 @@ def generate_alternatives(n_persons): alt_names = list( "".join(i) for i in itertools.product(basic_patterns, repeat=n_persons) ) + if add_joint: + pattern = r"[MN]" + joint_alts = [ + alt + "J" for alt in alt_names if len(re.findall(pattern, alt)) >= 2 + ] + alt_names = alt_names + joint_alts alt_codes = np.arange(1, len(alt_names) + 1) - return Dict(zip(alt_names, alt_codes)) + return dict(zip(alt_names, alt_codes)) def apply_replacements(expression, prefix, tokens): @@ -67,7 +75,9 @@ def apply_replacements(expression, prefix, tokens): return expression -def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()): +def cdap_base_utility_by_person( + model, n_persons, spec, alts=None, value_tokens=(), add_joint=False +): """ Build the base utility by person for each pattern. @@ -100,7 +110,7 @@ def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens= model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"]) else: if alts is None: - alts = generate_alternatives(n_persons) + alts = generate_alternatives(n_persons, add_joint) person_numbers = range(1, n_persons + 1) for pnum in person_numbers: for i in spec.index: @@ -220,13 +230,71 @@ def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficie model.utility_co[anum] += linear_component -def cdap_split_data(households, values): +def cdap_joint_tour_utility(model, n_persons, alts, joint_coef, values): + """ + FIXME: Not fully implemented!!!! + + Code is adapted from the cdap model in ActivitySim with the joint tour component + Structure is pretty much in place, but dependencies need to be filtered out. + """ + + for row in joint_coef.itertuples(): + for aname, anum in alts.items(): + # only adding joint tour utility to alternatives with joint tours + if "J" not in aname: + continue + expression = row.Expression + dependency_name = row.dependency + coefficient = row.coefficient + + # dealing with dependencies + if dependency_name in ["M_px", "N_px", "H_px"]: + if "_pxprod" in expression: + prod_conds = row.Expression.split("|") + expanded_expressions = [ + tup + for tup in itertools.product( + range(len(prod_conds)), repeat=n_persons + ) + ] + for expression_tup in expanded_expressions: + expression_list = [] + dependency_list = [] + for counter in range(len(expression_tup)): + expression_list.append( + prod_conds[expression_tup[counter]].replace( + "xprod", str(counter + 1) + ) + ) + if expression_tup[counter] == 0: + dependency_list.append( + dependency_name.replace("x", str(counter + 1)) + ) + + expression_value = "&".join(expression_list) + # FIXME only apply to alternative if dependency satisfied + bug + model.utility_co[anum] += X(expression_value) * P(coefficient) + + elif "_px" in expression: + for pnum in range(1, n_persons + 1): + dependency_name = row.dependency.replace("x", str(pnum)) + expression = row.Expression.replace("x", str(pnum)) + # FIXME only apply to alternative if dependency satisfied + bug + model.utility_co[anum] += X(expression) * P(coefficient) + + else: + model.utility_co[anum] += X(expression) * P(coefficient) + + +def cdap_split_data(households, values, add_joint): if "cdap_rank" not in values: raise ValueError("assign cdap_rank to values first") # only process the first 5 household members - values = values[values.cdap_rank <= 5] + values = values[values.cdap_rank <= MAX_HHSIZE] cdap_data = {} - for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, 5)): + for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, MAX_HHSIZE)): if hhsize == 1: v = pd.merge(values, hhs_part.household_id, on="household_id").set_index( "household_id" @@ -239,16 +307,31 @@ def cdap_split_data(households, values): ) v.columns = [f"p{i[1]}_{i[0]}" for i in v.columns] for agglom in ["override_choice", "model_choice"]: - v[agglom] = v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]].sum(1) + v[agglom] = ( + v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]] + .fillna("H") + .sum(1) + ) + if add_joint: + joint_tour_indicator = ( + hhs_part.set_index("household_id") + .reindex(v.index) + .has_joint_tour + ) + pd.testing.assert_index_equal(v.index, joint_tour_indicator.index) + v[agglom] = np.where( + joint_tour_indicator == 1, v[agglom] + "J", v[agglom] + ) cdap_data[hhsize] = v + return cdap_data -def cdap_dataframes(households, values): - data = cdap_split_data(households, values) +def cdap_dataframes(households, values, add_joint): + data = cdap_split_data(households, values, add_joint) dfs = {} for hhsize in data.keys(): - alts = generate_alternatives(hhsize) + alts = generate_alternatives(hhsize, add_joint) dfs[hhsize] = DataFrames( co=data[hhsize], alt_names=alts.keys(), @@ -296,6 +379,7 @@ def cdap_data( spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", + joint_coeffs_file="{name}_joint_tour_coefficients.csv", ): edb_directory = edb_directory.format(name=name) if not os.path.exists(edb_directory): @@ -326,7 +410,7 @@ def read_yaml(filename, **kwargs): if person_type_map is None: raise KeyError("PERSON_TYPE_MAP missing from cdap_settings.yaml") - person_rank = cdap.assign_cdap_rank(persons, person_type_map) + # person_rank = cdap.assign_cdap_rank(persons, person_type_map) coefficients = read_csv( coefficients_file, @@ -341,9 +425,28 @@ def read_yaml(filename, **kwargs): comment="#", ) + try: + joint_coef = read_csv( + joint_coeffs_file, + # dtype={"interaction_ptypes": str}, + # keep_default_na=False, + comment="#", + ) + add_joint = True + except FileNotFoundError: + joint_coef = None + add_joint = False + print("Including joint tour utiltiy?:", add_joint) + spec1 = read_csv(spec1_file, comment="#") values = read_csv(chooser_data_file, comment="#") - values["cdap_rank"] = person_rank + person_rank = cdap.assign_cdap_rank( + persons[persons.household_id.isin(values.household_id)] + .set_index("person_id") + .reindex(values.person_id), + person_type_map, + ) + values["cdap_rank"] = person_rank.values return Dict( edb_directory=Path(edb_directory), @@ -353,6 +456,8 @@ def read_yaml(filename, **kwargs): coefficients=coefficients, households=hhs, settings=settings, + joint_coef=joint_coef, + add_joint=add_joint, ) @@ -365,6 +470,7 @@ def cdap_model( spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", + joint_coeffs_file="{name}_joint_tour_coefficients.csv", return_data=False, ): d = cdap_data( @@ -377,6 +483,7 @@ def cdap_model( spec1_file=spec1_file, settings_file=settings_file, chooser_data_file=chooser_data_file, + joint_coeffs_file=joint_coeffs_file, ) households = d.households @@ -384,8 +491,9 @@ def cdap_model( spec1 = d.spec1 interaction_coef = d.interaction_coef coefficients = d.coefficients + add_joint = d.add_joint - cdap_dfs = cdap_dataframes(households, values) + cdap_dfs = cdap_dataframes(households, values, add_joint) m = {} _logger.info(f"building for model 1") m[1] = Model(dataservice=cdap_dfs[1]) @@ -398,12 +506,15 @@ def cdap_model( interaction_coef["cardinality"] = interaction_coef[ "interaction_ptypes" ].str.len() - for s in [2, 3, 4, 5]: + for s in range(2, MAX_HHSIZE + 1): + # for s in [2, 3, 4, 5]: _logger.info(f"building for model {s}") m[s] = Model(dataservice=cdap_dfs[s]) - alts = generate_alternatives(s) + alts = generate_alternatives(s, add_joint) cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns) cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients) + # if add_joint: + # cdap_joint_tour_utility(m[s], s, alts, d.joint_coef, values) m[s].choice_any = True m[s].availability_any = True diff --git a/activitysim/estimation/larch/external_worker_identification.py b/activitysim/estimation/larch/external_worker_identification.py new file mode 100644 index 0000000000..63d267cf09 --- /dev/null +++ b/activitysim/estimation/larch/external_worker_identification.py @@ -0,0 +1,80 @@ +import os +from typing import Collection + +import numpy as np +import pandas as pd +import yaml +from larch import DataFrames, Model, P, X +from larch.util import Dict + +from .general import ( + apply_coefficients, + dict_of_linear_utility_from_spec, + remove_apostrophes, +) +from .simple_simulate import simple_simulate_data + + +def external_worker_identification_model( + name="external_worker_identification", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + data = simple_simulate_data( + name=name, + edb_directory=edb_directory, + values_index_col="person_id", + ) + coefficients = data.coefficients + # coef_template = data.coef_template # not used + spec = data.spec + chooser_data = data.chooser_data + settings = data.settings + + altnames = list(spec.columns[3:]) + altcodes = range(len(altnames)) + + chooser_data = remove_apostrophes(chooser_data) + chooser_data.fillna(0, inplace=True) + + # Remove choosers with invalid observed choice + chooser_data = chooser_data[chooser_data["override_choice"] >= 0] + + m = Model() + # One of the alternatives is coded as 0, so + # we need to explicitly initialize the MNL nesting graph + # and set to root_id to a value other than zero. + m.initialize_graph(alternative_codes=altcodes, root_id=99) + + m.utility_co = dict_of_linear_utility_from_spec( + spec, + "Label", + dict(zip(altnames, altcodes)), + ) + + apply_coefficients(coefficients, m) + + d = DataFrames( + co=chooser_data, + av=True, + alt_codes=altcodes, + alt_names=altnames, + ) + + m.dataservice = d + m.choice_co_code = "override_choice" + + if return_data: + return ( + m, + Dict( + edb_directory=data.edb_directory, + chooser_data=chooser_data, + coefficients=coefficients, + spec=spec, + altnames=altnames, + altcodes=altcodes, + ), + ) + + return m diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py index 74a426e714..870343f893 100644 --- a/activitysim/estimation/larch/location_choice.py +++ b/activitysim/estimation/larch/location_choice.py @@ -106,6 +106,9 @@ def _read_csv(filename, **kwargs): .set_index("segment") ) size_spec = size_spec.loc[:, size_spec.max() > 0] + assert ( + len(size_spec) > 0 + ), f"Empty size_spec, is model_selector {SIZE_TERM_SELECTOR} in your size term file?" size_coef = size_coefficients_from_spec(size_spec) @@ -214,6 +217,9 @@ def _read_csv(filename, **kwargs): else: av = 1 + assert len(x_co) > 0, "Empty chooser dataframe" + assert len(x_ca_1) > 0, "Empty alternatives dataframe" + d = DataFrames(co=x_co, ca=x_ca_1, av=av) m = Model(dataservice=d) @@ -331,6 +337,14 @@ def workplace_location_model(**kwargs): ) +def external_workplace_location_model(**kwargs): + unused = kwargs.pop("name", None) + return location_choice_model( + name="external_workplace_location", + **kwargs, + ) + + def school_location_model(**kwargs): unused = kwargs.pop("name", None) return location_choice_model( @@ -367,6 +381,14 @@ def non_mandatory_tour_destination_model(**kwargs): ) +def external_non_mandatory_destination_model(**kwargs): + unused = kwargs.pop("name", None) + return location_choice_model( + name="external_non_mandatory_destination", + **kwargs, + ) + + def trip_destination_model(**kwargs): unused = kwargs.pop("name", None) return location_choice_model( diff --git a/activitysim/estimation/larch/nonmand_tour_freq.py b/activitysim/estimation/larch/nonmand_tour_freq.py index 9dfdac73a4..ba911dd219 100644 --- a/activitysim/estimation/larch/nonmand_tour_freq.py +++ b/activitysim/estimation/larch/nonmand_tour_freq.py @@ -7,6 +7,8 @@ from larch import DataFrames, Model from larch.log import logger_name from larch.util import Dict +import pickle +from datetime import datetime from .general import ( apply_coefficients, @@ -27,6 +29,7 @@ def interaction_simulate_data( coefficients_files="{segment_name}/{name}_coefficients_{segment_name}.csv", chooser_data_files="{segment_name}/{name}_choosers_combined.csv", alt_values_files="{segment_name}/{name}_interaction_expression_values.csv", + segment_subset=[], ): edb_directory = edb_directory.format(name=name) @@ -46,21 +49,30 @@ def _read_csv(filename, **kwargs): alt_values = {} segment_names = [s["NAME"] for s in settings["SPEC_SEGMENTS"]] + if len(segment_subset) > 0: + assert set(segment_subset).issubset( + set(segment_names) + ), f"{segment_subset} is not a subset of {segment_names}" + segment_names = segment_subset for segment_name in segment_names: + print(f"Loading EDB for {segment_name} segment") coefficients[segment_name] = _read_csv( coefficients_files.format(name=name, segment_name=segment_name), index_col="coefficient_name", + comment="#", ) chooser_data[segment_name] = _read_csv( chooser_data_files.format(name=name, segment_name=segment_name), ) alt_values[segment_name] = _read_csv( alt_values_files.format(name=name, segment_name=segment_name), + comment="#", ) spec = _read_csv( spec_file, + comment="#", ) spec = remove_apostrophes(spec, ["Label"]) # alt_names = list(spec.columns[3:]) @@ -118,10 +130,80 @@ def unavail(model, x_ca): return unav +# FIXME move all this to larch/general.py? see ActititySim issue #686 +def _read_feather(filename, name, edb_directory, **kwargs): + filename = filename.format(name=name) + return pd.read_feather(os.path.join(edb_directory, filename), **kwargs) + + +def _to_feather(df, filename, name, edb_directory, **kwargs): + filename = filename.format(name=name) + return df.to_feather(os.path.join(edb_directory, filename), **kwargs) + + +def _read_pickle(filename, name, edb_directory, **kwargs): + filename = filename.format(name=name) + return pd.read_pickle(os.path.join(edb_directory, filename), **kwargs) + + +def _to_pickle(df, filename, name, edb_directory, **kwargs): + filename = filename.format(name=name) + return df.to_pickle(os.path.join(edb_directory, filename), **kwargs) + + +def _file_exists(filename, name, edb_directory): + filename = filename.format(name=name) + return os.path.exists(os.path.join(edb_directory, filename)) + + +def get_x_ca_df(alt_values, name, edb_directory, num_chunks): + def split(a, n): + k, m = divmod(len(a), n) + return (a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)) + + # process x_ca with cv_to_ca with or without chunking + x_ca_pickle_file = "{name}_x_ca.pkl" + if num_chunks == 1: + x_ca = cv_to_ca(alt_values) + elif _file_exists(x_ca_pickle_file, name, edb_directory): + # if pickle file from previous x_ca processing exist, load it to save time + time_start = datetime.now() + x_ca = _read_pickle(x_ca_pickle_file, name, edb_directory) + print( + f"x_ca data loaded from {name}_x_ca.fea - time elapsed {(datetime.now() - time_start).total_seconds()}" + ) + else: + time_start = datetime.now() + # calculate num_chunks based on chunking_size (or max number of rows per chunk) + chunking_size = round(len(alt_values) / num_chunks, 3) + print( + f"Using {num_chunks} chunks results in chunk size of {chunking_size} (of {len(alt_values)} total rows)" + ) + all_chunk_ids = list(alt_values.index.get_level_values(0).unique()) + split_ids = list(split(all_chunk_ids, num_chunks)) + x_ca_list = [] + for i, chunk_ids in enumerate(split_ids): + alt_values_i = alt_values.loc[chunk_ids] + x_ca_i = cv_to_ca(alt_values_i) + x_ca_list.append(x_ca_i) + print( + f"\rx_ca_i compute done for chunk {i+1}/{num_chunks} - time elapsed {(datetime.now() - time_start).total_seconds()}" + ) + x_ca = pd.concat(x_ca_list, axis=0) + # save final x_ca result as pickle file to save time for future data loading + _to_pickle(x_ca, x_ca_pickle_file, name, edb_directory) + print( + f"x_ca compute done - time elapsed {(datetime.now() - time_start).total_seconds()}" + ) + return x_ca + + def nonmand_tour_freq_model( edb_directory="output/estimation_data_bundle/{name}/", return_data=False, condense_parameters=False, + segment_subset=[], + num_chunks=1, ): """ Prepare nonmandatory tour frequency models for estimation. @@ -141,10 +223,16 @@ def nonmand_tour_freq_model( data = interaction_simulate_data( name="non_mandatory_tour_frequency", edb_directory=edb_directory, + segment_subset=segment_subset, ) settings = data.settings segment_names = [s["NAME"] for s in settings["SPEC_SEGMENTS"]] + if len(segment_subset) > 0: + assert set(segment_subset).issubset( + set(segment_names) + ), f"{segment_subset} is not a subset of {segment_names}" + segment_names = segment_subset if condense_parameters: data.relabel_coef = link_same_value_coefficients( segment_names, data.coefficients, data.spec @@ -157,6 +245,7 @@ def nonmand_tour_freq_model( m = {} for segment_name in segment_names: + print(f"Creating larch model for {segment_name}") segment_model = m[segment_name] = Model() # One of the alternatives is coded as 0, so # we need to explicitly initialize the MNL nesting graph @@ -178,11 +267,20 @@ def nonmand_tour_freq_model( .set_index("person_id") .rename(columns={"TAZ": "HOMETAZ"}) ) - x_ca = cv_to_ca(alt_values[segment_name].set_index(["person_id", "variable"])) + print("\t performing cv to ca step") + # x_ca = cv_to_ca(alt_values[segment_name].set_index(["person_id", "variable"])) + x_ca = get_x_ca_df( + alt_values=alt_values[segment_name].set_index(["person_id", "variable"]), + name=segment_name, + edb_directory=edb_directory.format(name="non_mandatory_tour_frequency"), + num_chunks=num_chunks, + ) + d = DataFrames( co=x_co, ca=x_ca, - av=~unavail(segment_model, x_ca), + av=True, + # av=~unavail(segment_model, x_ca), ) m[segment_name].dataservice = d diff --git a/activitysim/estimation/larch/simple_simulate.py b/activitysim/estimation/larch/simple_simulate.py index 0ee055fe6c..6538466188 100644 --- a/activitysim/estimation/larch/simple_simulate.py +++ b/activitysim/estimation/larch/simple_simulate.py @@ -231,6 +231,22 @@ def free_parking_model( ) +def work_from_home_model( + name="work_from_home", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + True: 1, + False: 2, + }, # True is work from home, false is does not work from home, names match spec positions + ) + + def mandatory_tour_frequency_model( name="mandatory_tour_frequency", edb_directory="output/estimation_data_bundle/{name}/", @@ -295,3 +311,87 @@ def joint_tour_participation_model( 1: 2, # 1 means not participate, alternative 2 }, ) + + +def external_non_mandatory_identification_model( + name="external_non_mandatory_identification", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + values_index_col="tour_id", + choices={ + 0: 1, # 0 means external, alternative 1 + 1: 2, # 1 means not external, alternative 2 + }, + ) + + +def transit_pass_subsidy_model( + name="transit_pass_subsidy", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + print("transit pass subsidy") + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + 0: 1, # 0 means no subsidy, alternative 1 + 1: 2, # 1 means subsidy, alternative 2 + }, + ) + + +def transit_pass_ownership_model( + name="transit_pass_ownership", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + 0: 1, # 0 means no pass, alternative 1 + 1: 2, # 1 means pass, alternative 2 + }, + ) + + +def transponder_ownership_model( + name="transponder_ownership", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + 0: 1, # 0 means no pass, alternative 1 + 1: 2, # 1 means pass, alternative 2 + }, + ) + +def telecommute_frequency_model( + name="telecommute_frequency", + edb_directory="output/estimation_data_bundle/{name}/", + return_data=False, +): + return simple_simulate_model( + name=name, + edb_directory=edb_directory, + return_data=return_data, + choices={ + "No_Telecommute" : 1, + "1_day_week" : 2, + "2_3_days_week" : 3, + "4_days_week" : 4, + }, + ) + diff --git a/activitysim/estimation/larch/stop_frequency.py b/activitysim/estimation/larch/stop_frequency.py index c572af5e87..e580491e02 100644 --- a/activitysim/estimation/larch/stop_frequency.py +++ b/activitysim/estimation/larch/stop_frequency.py @@ -42,8 +42,7 @@ def stop_frequency_data( seg_purpose = seg_["primary_purpose"] seg_subdir = Path(os.path.join(edb_directory, seg_purpose)) segment_coef[seg_["primary_purpose"]] = pd.read_csv( - seg_subdir / seg_["COEFFICIENTS"], - index_col="coefficient_name", + seg_subdir / seg_["COEFFICIENTS"], index_col="coefficient_name", comment="#" ) for seg in segments: @@ -89,13 +88,13 @@ def stop_frequency_data( seg_purpose = seg["primary_purpose"] seg_subdir = Path(os.path.join(edb_directory, seg_purpose)) coeffs_ = pd.read_csv( - seg_subdir / seg["COEFFICIENTS"], index_col="coefficient_name" + seg_subdir / seg["COEFFICIENTS"], index_col="coefficient_name", comment="#" ) coeffs_.index = pd.Index( [f"{i}_{seg_purpose}" for i in coeffs_.index], name="coefficient_name" ) seg_coefficients.append(coeffs_) - spec = pd.read_csv(seg_subdir / "stop_frequency_SPEC_.csv") + spec = pd.read_csv(seg_subdir / "stop_frequency_SPEC_.csv", comment="#") spec = remove_apostrophes(spec, ["Label"]) # spec.iloc[:, 3:] = spec.iloc[:, 3:].applymap(lambda x: f"{x}_{seg_purpose}" if not pd.isna(x) else x) seg_spec.append(spec) diff --git a/conda-environments/activitysim-dev-base.yml b/conda-environments/activitysim-dev-base.yml index f0bdcb3a36..7b534edf8a 100644 --- a/conda-environments/activitysim-dev-base.yml +++ b/conda-environments/activitysim-dev-base.yml @@ -41,7 +41,7 @@ dependencies: - numpydoc - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2.0 - pre-commit - psutil >= 4.1 - pyarrow >= 2.0 diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml index c46b436dea..519f442db1 100644 --- a/conda-environments/activitysim-dev.yml +++ b/conda-environments/activitysim-dev.yml @@ -37,7 +37,7 @@ dependencies: - numpydoc - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2.0 - pre-commit - psutil >= 4.1 - pyarrow >= 2.0 diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml index 63d353ea51..9207c8899e 100644 --- a/conda-environments/github-actions-tests.yml +++ b/conda-environments/github-actions-tests.yml @@ -16,7 +16,7 @@ dependencies: - numpy >= 1.16.1 - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2.0 - psutil >= 4.1 - pyarrow >= 2.0 - pypyr >= 5.3