Source code for nomad.displacement

from nomad.agg.privacy import _laplace_pcts
import numpy as np
import pandas as pd

[docs] def normalize_od(data, origin_col, dest_col, weight_col, diff_privacy_eps=None, seed=None): """ Convert an origin–destination table to percentages (share of total trips) with optional Laplace DP noise. Parameters ---------- data : pandas DataFrame, OD table. origin_col : str, column with origin geography. dest_col : str, column with destination geography. weight_col : str, column with trip counts (non-negative ints). diff_privacy_eps: float or None. If set, add Laplace noise with privacy budget ε; if None, return exact percentages. seed : optional int for reproducibility when ε is given. Returns ------- pandas DataFrame with columns [origin_col, dest_col, 'percentage'] where 'percentage' sums to 100. """ for col in [origin_col, dest_col, weight_col]: if col not in data.columns: raise ValueError(f"Column '{col}' not found in data.") N = data[weight_col].sum() out = data.copy() out['_fraction'] = out[weight_col] / N # in [0,1] if diff_privacy_eps is not None: out['_fraction'] = _laplace_pcts( out['_fraction'].to_numpy(), total_trips=N, epsilon=diff_privacy_eps, seed=seed ) out['percentage'] = 100 * out['_fraction'] return out[[origin_col, dest_col, 'percentage']]
[docs] def normalized_remained(data, origin_col, dest_col, weight_col, diff_privacy_eps=None, seed=None): """ Percentage of trips that *remain* in the origin geography, with optional per-origin Laplace DP noise. Each origin contributes a single row; percentages are computed relative to that origin’s total outgoing trips. Parameters ---------- data, origin_col, dest_col, weight_col : see `normalize_od`. diff_privacy_eps : float or None. ε for DP; None for exact values. seed : optional int. Returns ------- pandas DataFrame with columns [origin_col, 'percentage']. """ for col in [origin_col, dest_col, weight_col]: if col not in data.columns: raise ValueError(f"Column '{col}' not found in data.") origin_totals = ( data.groupby(origin_col)[weight_col] .sum() .rename('origin_total') ) stayed = data[data[origin_col] == data[dest_col]].copy() stayed = stayed.merge(origin_totals, left_on=origin_col, right_index=True) stayed['_fraction'] = stayed[weight_col] / stayed['origin_total'] if diff_privacy_eps is not None: stayed['_fraction'] = _laplace_pcts( stayed['_fraction'].to_numpy(), total_trips=stayed['origin_total'], epsilon=diff_privacy_eps, seed=seed) stayed['percentage'] = 100 * stayed['_fraction'] return stayed[[origin_col, 'percentage']]
[docs] def normalized_moved(data, origin_col, dest_col, weight_col, diff_privacy_eps=None, seed=None): """ Normalised list of *moving* OD flows (origin ≠ destination) with optional DP noise. Parameters ---------- data, origin_col, dest_col, weight_col : see `normalize_od`. diff_privacy_eps : float or None. ε for DP; None for exact values. seed : optional int. Returns ------- pandas DataFrame with columns [origin_col, dest_col, 'percentage'] whose percentages sum to 100. """ for col in [origin_col, dest_col, weight_col]: if col not in data.columns: raise ValueError(f"Column '{col}' not found in data.") moved = data[data[origin_col] != data[dest_col]].copy() moved_total = moved[weight_col].sum() moved['_fraction'] = moved[weight_col] / moved_total if diff_privacy_eps is not None: moved['_fraction'] = _laplace_pcts( moved['_fraction'].to_numpy(), total_trips=moved['origin_total'], epsilon=diff_privacy_eps, seed=seed) moved['percentage'] = 100 * moved['_fraction'] return moved[[origin_col, dest_col, 'percentage']]