Synthetic Trajectory Generation with Nomad

This notebook demonstrates how to generate realistic synthetic human mobility trajectories.

[1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from pathlib import Path
from joblib import Parallel, delayed

from nomad.city_gen import City
from nomad.traj_gen import Agent, Population
from nomad.stop_detection.viz import plot_pings, plot_time_barcode

[3]:

city = City.from_geopackage('garden-city.gpkg')
city._build_hub_network(hub_size=16)
city.compute_gravity(exponent=2.0)
city.compute_shortest_paths(callable_only=True)

print(f"City: {city.name}")
print(f"Dimensions: {city.dimensions}")
print(f"Buildings: {len(city.buildings_gdf)}")

City: Garden City
Dimensions: (22, 22)
Buildings: 106

Part 1: Effect of Sampling Parameters on Sparsity

Generate 3 agents with 2-day trajectories, varying beta_duration and beta_start to show their effect on sparsity (q = observed points / ground truth points).

[4]:

population = Population(city)
population.generate_agents(N=3, seed=42, name_count=2)

# Vary beta_duration and beta_start to target different sparsity levels
sampling_params = [
    {'beta_ping': 5, 'beta_start': 100, 'beta_durations': 60},
    {'beta_ping': 5, 'beta_start': 250, 'beta_durations': 150},
    {'beta_ping': 5, 'beta_start': 400, 'beta_durations': 240}
]

# Generate 2-day trajectories for quick visualization
for i, (agent_id, agent) in enumerate(population.roster.items()):
    agent.generate_trajectory(
        datetime=pd.Timestamp("2024-01-01T07:00-04:00"),
        end_time=pd.Timestamp("2024-01-03T07:00-04:00"),
        seed=i
    )

    agent.sample_trajectory(
        **sampling_params[i],
        replace_sparse_traj=True,
        seed=i
    )

    q = len(agent.sparse_traj) / len(agent.trajectory)
    print(f"Agent {i}: q={q:.3f}, beta_start={sampling_params[i]['beta_start']}, "
          f"beta_dur={sampling_params[i]['beta_durations']}")

Agent 0: q=0.051, beta_start=100, beta_dur=60
Agent 1: q=0.031, beta_start=250, beta_dur=150
Agent 2: q=0.135, beta_start=400, beta_dur=240

[5]:

fig, axes = plt.subplots(2, 3, figsize=(15, 10),
                         gridspec_kw={'height_ratios': [10, 1]})

for i, (agent_id, agent) in enumerate(population.roster.items()):
    ax_map = axes[0, i]
    ax_barcode = axes[1, i]

    city.plot_city(ax=ax_map, doors=False, address=False)

    traj = agent.sparse_traj
    plot_pings(traj, ax=ax_map, s=15, point_color='red',
               x='x', y='y', timestamp='timestamp')

    plot_time_barcode(traj['timestamp'], ax=ax_barcode, set_xlim=True)

    q = len(traj) / len(agent.trajectory)
    ax_map.set_title(f"Agent {i}: {len(traj)} obs (q={q:.2f})\n"
                     f"beta_start={sampling_params[i]['beta_start']}, "
                     f"beta_dur={sampling_params[i]['beta_durations']}")
    ax_map.set_axis_off()

plt.tight_layout()
plt.savefig('data/trajectories_visualization.png', dpi=150, bbox_inches='tight')
plt.show()

../_images/source_generate_synthetic_trajectories_5_0.png

Part 2: Parallel Generation at Scale

Generate trajectories for 15 users using parallelization.

[6]:

def generate_agent_trajectory(args):
    """Worker function for parallel generation."""
    identifier, home, work, seed = args
    city = City.from_geopackage('garden-city.gpkg')
    city._build_hub_network(hub_size=16)
    city.compute_gravity(exponent=2.0)
    city.compute_shortest_paths(callable_only=True)
    agent = Agent(identifier=identifier, city=city, home=home, workplace=work)

    agent.generate_trajectory(
        datetime=pd.Timestamp("2024-01-01T07:00-04:00"),
        end_time=pd.Timestamp("2024-01-08T07:00-04:00"),
        seed=seed
    )
    agent.sample_trajectory(
        beta_ping=5,
        replace_sparse_traj=True,
        seed=seed
    )
    sparse_df = agent.sparse_traj.copy()
    sparse_df['user_id'] = identifier
    sparse_df['home'] = home
    sparse_df['workplace'] = work
    return sparse_df

[7]:

n_agents = 15
rng = np.random.default_rng(100)
homes = city.buildings_gdf[city.buildings_gdf['building_type'] == 'home']['id'].to_numpy()
workplaces = city.buildings_gdf[city.buildings_gdf['building_type'] == 'workplace']['id'].to_numpy()

agent_params = [
    (f'agent_{i:04d}',
     rng.choice(homes),
     rng.choice(workplaces),
     i)
    for i in range(n_agents)
]

[8]:

print(f"Generating {n_agents} agents in parallel...")
start_time = time.time()

results = Parallel(n_jobs=-1, verbose=10)(
    delayed(generate_agent_trajectory)(params) for params in agent_params
)

generation_time = time.time() - start_time
print(f"Generated {n_agents} agents in {generation_time:.2f}s ({generation_time/n_agents:.2f}s per agent)")

Generating 15 agents in parallel...

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of  15 | elapsed:   18.3s remaining:  2.0min
[Parallel(n_jobs=-1)]: Done   4 out of  15 | elapsed:   23.3s remaining:  1.1min
[Parallel(n_jobs=-1)]: Done   6 out of  15 | elapsed:   24.6s remaining:   36.9s
[Parallel(n_jobs=-1)]: Done   8 out of  15 | elapsed:   24.9s remaining:   21.8s
[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed:   27.0s remaining:   13.5s
[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed:   29.2s remaining:    7.2s

Generated 15 agents in 30.52s (2.03s per agent)

[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:   30.4s finished

[10]:

parallel_population = Population(city)
for df, params in zip(results, agent_params):
    identifier, home, work, seed = params
    agent = Agent(identifier=identifier, city=city, home=home, workplace=work, seed=seed)
    agent.sparse_traj = df.drop(columns=['home', 'workplace'])
    parallel_population.add_agent(agent, verbose=False)

output_path = 'data/trajectories_15_users'
parallel_population.save_pop(
    sparse_path=str(output_path),
    fmt='parquet'
)
print(f"Saved sparse trajectories to {output_path}")

Saved sparse trajectories to data/trajectories_15_users