Source code for ridepy.util.analytics.vehicle

import numpy as np
import pandas as pd


[docs] def get_vehicle_quantities(stops: pd.DataFrame, requests: pd.DataFrame) -> pd.DataFrame: """ Compute various quantities aggregated **per vehicle**. Currently, the following observables are returned: - avg_occupancy - avg_segment_dist - avg_segment_time - total_dist_driven - total_time_driven - avg_direct_dist - avg_direct_time - total_direct_dist - total_direct_time - efficiency_dist - efficiency_time - avg_system_stoplist_length_service_time - avg_system_stoplist_length_submission_time - avg_stoplist_length_service_time - avg_stoplist_length_submission_time Parameters ---------- stops Stops dataframe requests Requests dataframe Returns ------- ``pd.DataFrame`` containing the aforementioned observables as columns, indexed by ``vehicle_id`` """ serviced_requests = ( requests[requests[("rejected", "timestamp")].isna()] if ("rejected", "timestamp") in requests else requests ) avg_occupancy = stops.groupby("vehicle_id").apply( lambda gdf: (gdf["occupancy"] * gdf["state_duration"]).sum() / gdf["state_duration"].sum() ) avg_segment_dist = stops.groupby("vehicle_id")["dist_to_next"].mean() avg_segment_time = stops.groupby("vehicle_id")["time_to_next"].mean() total_dist_driven = stops.groupby("vehicle_id")["dist_to_next"].sum() total_time_driven = stops.groupby("vehicle_id")["time_to_next"].sum() if not serviced_requests.empty: avg_direct_dist = serviced_requests.groupby( ("serviced", "vehicle_id"), group_keys=False ).apply(lambda gdf: gdf.submitted.direct_travel_distance.mean()) avg_direct_time = serviced_requests.groupby( ("serviced", "vehicle_id"), group_keys=False ).apply(lambda gdf: gdf.submitted.direct_travel_time.mean()) total_direct_dist = serviced_requests.groupby( ("serviced", "vehicle_id"), group_keys=False ).apply(lambda gdf: gdf.submitted.direct_travel_distance.sum()) total_direct_time = serviced_requests.groupby( ("serviced", "vehicle_id"), group_keys=False ).apply(lambda gdf: gdf.submitted.direct_travel_time.sum()) efficiency_dist = total_direct_dist / total_dist_driven efficiency_time = total_direct_time / total_time_driven else: avg_direct_dist = pd.Series(np.nan, index=avg_occupancy.index, dtype=float) avg_direct_time = pd.Series(np.nan, index=avg_occupancy.index, dtype=float) total_direct_dist = pd.Series(np.nan, index=avg_occupancy.index, dtype=float) total_direct_time = pd.Series(np.nan, index=avg_occupancy.index, dtype=float) efficiency_dist = pd.Series(np.nan, index=avg_occupancy.index, dtype=float) efficiency_time = pd.Series(np.nan, index=avg_occupancy.index, dtype=float) res = pd.DataFrame( dict( avg_occupancy=avg_occupancy, avg_segment_dist=avg_segment_dist, avg_segment_time=avg_segment_time, total_dist_driven=total_dist_driven, total_time_driven=total_time_driven, avg_direct_dist=avg_direct_dist, avg_direct_time=avg_direct_time, total_direct_dist=total_direct_dist, total_direct_time=total_direct_time, efficiency_dist=efficiency_dist, efficiency_time=efficiency_time, ) ).rename_axis("vehicle_id") if "system_stoplist_length_service_time" in stops: res["avg_system_stoplist_length_service_time"] = stops.groupby( "vehicle_id" ).apply( lambda gdf: ( gdf["system_stoplist_length_service_time"] * gdf["state_duration"] ).sum() / gdf["state_duration"].sum() ) if "system_stoplist_length_submission_time" in stops: res["avg_system_stoplist_length_submission_time"] = stops.groupby( "vehicle_id" ).apply( lambda gdf: ( gdf["system_stoplist_length_submission_time"] * gdf["state_duration"] ).sum() / gdf["state_duration"].sum() ) if "stoplist_length_service_time" in stops: res["avg_stoplist_length_service_time"] = stops.groupby("vehicle_id").apply( lambda gdf: ( gdf["stoplist_length_service_time"] * gdf["state_duration"] ).sum() / gdf["state_duration"].sum() ) if "stoplist_length_submission_time" in stops: res["avg_stoplist_length_submission_time"] = stops.groupby("vehicle_id").apply( lambda gdf: ( gdf["stoplist_length_submission_time"] * gdf["state_duration"] ).sum() / gdf["state_duration"].sum() ) return res