{'dataframe': 'RangeIndex: 1110 entries, 0 to 1109'\n",
+ " 'Columns: 37 entries, _time to distance_miles'\n",
+ " 'dtypes: datetime64[ns](4), float64(8), int32(2), int64(2), '\n",
+ " 'object(13), uint32(6), uint64(2)'\n",
+ " 'memory usage: 286.3+ KB',\n",
+ " 'expression': '# Cross-entity features'\n",
+ " ''\n",
+ " '# Compute some per-trip metrics for Dropoffs'\n",
+ " 'let trip_speed = Dropoff.distance_miles / Dropoff.trip_time * '\n",
+ " '60 * 60'\n",
+ " 'let dropoff_with_metrics = Dropoff | extend({trip_speed})'\n",
+ " ''\n",
+ " '# Re-key by trip source and destination'\n",
+ " 'let dropoff_by_src_bin = dropoff_with_metrics | '\n",
+ " 'with_key(Dropoff.pu_location_id)'\n",
+ " 'let dropoff_by_dst_bin = dropoff_with_metrics | '\n",
+ " 'with_key(Dropoff.do_location_id)'\n",
+ " ''\n",
+ " '# Compute aggregates related to trips departing from a given '\n",
+ " 'bin'\n",
+ " 'let departure_mean_speed_10m = dropoff_by_src_bin.trip_speed | '\n",
+ " 'mean(window=sliding(10,minutely()))'\n",
+ " 'let departure_mean_speed_60m = dropoff_by_src_bin.trip_speed | '\n",
+ " 'mean(window=sliding(60,minutely()))'\n",
+ " 'let departure_mean_speed_1d = dropoff_by_src_bin.trip_speed | '\n",
+ " 'mean(window=sliding(24,hourly()))'\n",
+ " 'let departure_count_10m = dropoff_by_src_bin | '\n",
+ " 'count(window=sliding(10, minutely()))'\n",
+ " 'let departure_count_60m = dropoff_by_src_bin | '\n",
+ " 'count(window=sliding(60, minutely()))'\n",
+ " 'let departure_count_1d = dropoff_by_src_bin | '\n",
+ " 'count(window=sliding(24, hourly()))'\n",
+ " ''\n",
+ " '# Compute aggregates related to trips arriving at a given bin'\n",
+ " 'let arrival_mean_speed_10m = dropoff_by_dst_bin.trip_speed | '\n",
+ " 'mean(window=sliding(10,minutely()))'\n",
+ " 'let arrival_mean_speed_60m = dropoff_by_dst_bin.trip_speed | '\n",
+ " 'mean(window=sliding(60,minutely()))'\n",
+ " 'let arrival_mean_speed_1d = dropoff_by_dst_bin.trip_speed | '\n",
+ " 'mean(window=sliding(24,hourly()))'\n",
+ " 'let arrival_count_10m = dropoff_by_dst_bin | '\n",
+ " 'count(window=sliding(10, minutely()))'\n",
+ " 'let arrival_count_60m = dropoff_by_dst_bin | '\n",
+ " 'count(window=sliding(60, minutely()))'\n",
+ " 'let arrival_count_1d = dropoff_by_dst_bin | '\n",
+ " 'count(window=sliding(24, hourly()))'\n",
+ " ''\n",
+ " '#in Pickup | remove_fields($input, \"request_datetime\", '\n",
+ " '\"on_scene_datetime\", \"pickup_datetime\") | extend({'\n",
+ " 'in Pickup | extend({'\n",
+ " ''\n",
+ " ' # TODO:'\n",
+ " ' # hour of day'\n",
+ " ' # day of week'\n",
+ " ' #distance_miles: Pickup.distance_miles,'\n",
+ " ''\n",
+ " ' monthday: day_of_month(Pickup.pickup_datetime as '\n",
+ " 'timestamp_ns) | else(-1),'\n",
+ " ''\n",
+ " ' # Features related to recent trips departing from the same '\n",
+ " 'area'\n",
+ " ' departure_mean_speed_10m: departure_mean_speed_10m | '\n",
+ " 'lookup(Pickup.pu_location_id),'\n",
+ " ' departure_mean_speed_60m: departure_mean_speed_60m | '\n",
+ " 'lookup(Pickup.pu_location_id),'\n",
+ " ' departure_mean_speed_1d: departure_mean_speed_1d | '\n",
+ " 'lookup(Pickup.pu_location_id),'\n",
+ " ' departure_count_10m: departure_count_10m | '\n",
+ " 'lookup(Pickup.pu_location_id),'\n",
+ " ' departure_count_60m: departure_count_60m | '\n",
+ " 'lookup(Pickup.pu_location_id),'\n",
+ " ' departure_count_1d: departure_count_1d | '\n",
+ " 'lookup(Pickup.pu_location_id),'\n",
+ " ''\n",
+ " ' # Features related to recent trips arriving in the same '\n",
+ " 'area'\n",
+ " ' arrival_mean_speed_10m: arrival_mean_speed_10m | '\n",
+ " 'lookup(Pickup.do_location_id),'\n",
+ " ' arrival_mean_speed_60m: arrival_mean_speed_60m | '\n",
+ " 'lookup(Pickup.do_location_id),'\n",
+ " ' arrival_mean_speed_1d: arrival_mean_speed_1d | '\n",
+ " 'lookup(Pickup.do_location_id),'\n",
+ " ' arrival_count_10m: arrival_count_10m | '\n",
+ " 'lookup(Pickup.do_location_id),'\n",
+ " ' arrival_count_60m: arrival_count_60m | '\n",
+ " 'lookup(Pickup.do_location_id),'\n",
+ " ' arrival_count_1d: arrival_count_1d | '\n",
+ " 'lookup(Pickup.do_location_id),'\n",
+ " '})'\n",
+ " \"# We'll make predictions from features computed at the time of \"\n",
+ " 'each pickup'\n",
+ " '| when(is_valid(Pickup))'\n",
+ " ''\n",
+ " \"# We'll predict the duration of the trip, which we learn at \"\n",
+ " 'the time of the next dropoff'\n",
+ " '| last(window=since(is_valid(Dropoff)))'\n",
+ " '| when(is_valid($input) and is_valid(Dropoff))'\n",
+ " '| extend({target: Dropoff.trip_time})'\n",
+ " ''\n",
+ " '# cleaning'\n",
+ " '| when($input.distance_miles < 50) # distance outliers'\n",
+ " '| when($input.target < 24 * 60 * 60) # trips longer than a '\n",
+ " 'day'\n",
+ " '| when($input.target > 60) # trips shorter than a minute',\n",
+ " 'query_response': state: STATE_SUCCESS\n",
+ "config {\n",
+ " data_token_id: \"c7a71379-778e-4824-9c66-9cbcc384e439\"\n",
+ " slice_request {\n",
+ " percent {\n",
+ " percent: 2\n",
+ " }\n",
+ " }\n",
+ "}\n",
+ "analysis {\n",
+ " can_execute: true\n",
+ " schema {\n",
+ " fields {\n",
+ " name: \"target\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_I64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"monthday\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_I64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"departure_mean_speed_10m\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_F64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"departure_mean_speed_60m\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_F64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"departure_mean_speed_1d\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_F64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"departure_count_10m\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_U32\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"departure_count_60m\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_U32\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"departure_count_1d\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_U32\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"arrival_mean_speed_10m\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_F64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"arrival_mean_speed_60m\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_F64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"arrival_mean_speed_1d\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_F64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"arrival_count_10m\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_U32\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"arrival_count_60m\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_U32\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"arrival_count_1d\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_U32\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"hvfhs_license_num\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"dispatching_base_num\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"originating_base_num\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"request_datetime\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_TIMESTAMP_MICROSECOND\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"on_scene_datetime\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_TIMESTAMP_MICROSECOND\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"pickup_datetime\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_TIMESTAMP_MICROSECOND\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"pu_location_id\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_I32\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"do_location_id\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_I32\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"trip_miles\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_F64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"shared_request_flag\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"shared_match_flag\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"access_a_ride_flag\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"wav_request_flag\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"wav_match_flag\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"pu_zone\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"pu_borough\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"do_zone\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"do_borough\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_STRING\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " fields {\n",
+ " name: \"distance_miles\"\n",
+ " data_type {\n",
+ " primitive: PRIMITIVE_TYPE_F64\n",
+ " }\n",
+ " nullable: true\n",
+ " }\n",
+ " }\n",
+ "}\n",
+ "fenl_diagnostics {\n",
+ " fenl_diagnostics {\n",
+ " severity: SEVERITY_WARNING\n",
+ " code: \"W2000\"\n",
+ " message: \"Incompatible time domains\"\n",
+ " formatted: \"warning[W2000]: Incompatible time domains --> Query:57:3 |57 | | last(window=since(is_valid(Dropoff))) | ^^^^ ------------------------ Time Domain: Table \\'Dropoff\\' | | | Incompatible time domains for operation | --> internal:1:1 | 1 | $input | ------ Time Domain: Table \\'Pickup\\'\"\n",
+ " }\n",
+ "}\n",
+ "metrics {\n",
+ " time_preparing {\n",
+ " nanos: 10151584\n",
+ " }\n",
+ " time_computing {\n",
+ " seconds: 15\n",
+ " nanos: 362543916\n",
+ " }\n",
+ " output_files: 1\n",
+ " total_input_rows: 17367583\n",
+ " processed_input_rows: 17367583\n",
+ " produced_output_rows: 1110\n",
+ "}\n",
+ "request_details {\n",
+ " request_id: \"b70d11defd5ebd4ab5b288247f2f0aeb\"\n",
+ "}\n",
+ "query_id: \"754fd0a4-7d1a-421a-82ac-0e1eee9bc3a9\"\n",
+ "destination {\n",
+ " object_store {\n",
+ " file_type: FILE_TYPE_PARQUET\n",
+ " output_prefix_uri: \"file:///Users/ryan.michael/.cache/kaskada/data/results/9817b83f-211b-41b8-8d7f-5026fcc14d48/nhmM-0CVVBzzb5R3_98_-Ea2g8y0VPsws9bAUQ/\"\n",
+ " output_paths {\n",
+ " paths: \"/Users/ryan.michael/.cache/kaskada/data/results/9817b83f-211b-41b8-8d7f-5026fcc14d48/nhmM-0CVVBzzb5R3_98_-Ea2g8y0VPsws9bAUQ/7b9d632d-7a71-4039-9cd9-6ba04ad82179-part-0.parquet\"\n",
+ " }\n",
+ " }\n",
+ "}\n",
+ "}