Source code for nomad.displacement

from nomad.agg.privacy import _laplace_pcts
import numpy as np
import pandas as pd


[docs]
def normalize_od(data, origin_col, dest_col, weight_col,
                 diff_privacy_eps=None, seed=None):
    """
    Convert an origin–destination table to percentages
    (share of total trips) with optional Laplace DP noise.
    Parameters
    ----------
    data            : pandas DataFrame, OD table.
    origin_col      : str, column with origin geography.
    dest_col        : str, column with destination geography.
    weight_col      : str, column with trip counts (non-negative ints).
    diff_privacy_eps: float or None.  If set, add Laplace noise with
                      privacy budget ε; if None, return exact percentages.
    seed            : optional int for reproducibility when ε is given.
    Returns
    -------
    pandas DataFrame with columns
        [origin_col, dest_col, 'percentage']
    where 'percentage' sums to 100.
    """
    for col in [origin_col, dest_col, weight_col]:
        if col not in data.columns:
            raise ValueError(f"Column '{col}' not found in data.")

    N = data[weight_col].sum()
    out = data.copy()
    out['_fraction'] = out[weight_col] / N        # in [0,1]

    if diff_privacy_eps is not None:
        out['_fraction'] = _laplace_pcts(
            out['_fraction'].to_numpy(),
            total_trips=N,
            epsilon=diff_privacy_eps,
            seed=seed
        )

    out['percentage'] = 100 * out['_fraction']
    return out[[origin_col, dest_col, 'percentage']]




[docs]
def normalized_remained(data, origin_col, dest_col, weight_col,
                         diff_privacy_eps=None, seed=None):
    """
    Percentage of trips that *remain* in the origin geography,
    with optional per-origin Laplace DP noise.
    Each origin contributes a single row; percentages are computed
    relative to that origin’s total outgoing trips.
    Parameters
    ----------
    data, origin_col, dest_col, weight_col : see `normalize_od`.
    diff_privacy_eps : float or None.  ε for DP; None for exact values.
    seed             : optional int.
    Returns
    -------
    pandas DataFrame with columns
        [origin_col, 'percentage'].
    """
    for col in [origin_col, dest_col, weight_col]:
        if col not in data.columns:
            raise ValueError(f"Column '{col}' not found in data.")

    origin_totals = (
        data.groupby(origin_col)[weight_col]
            .sum()
            .rename('origin_total')
    )

    stayed = data[data[origin_col] == data[dest_col]].copy()
    stayed = stayed.merge(origin_totals, left_on=origin_col, right_index=True)
    stayed['_fraction'] = stayed[weight_col] / stayed['origin_total']

    if diff_privacy_eps is not None:
         stayed['_fraction'] = _laplace_pcts(
             stayed['_fraction'].to_numpy(),
             total_trips=stayed['origin_total'],
             epsilon=diff_privacy_eps,
             seed=seed)
        
    stayed['percentage'] = 100 * stayed['_fraction']
    return stayed[[origin_col, 'percentage']]




[docs]
def normalized_moved(data, origin_col, dest_col, weight_col,
                     diff_privacy_eps=None, seed=None):
    """
    Normalised list of *moving* OD flows
    (origin ≠ destination) with optional DP noise.
    Parameters
    ----------
    data, origin_col, dest_col, weight_col : see `normalize_od`.
    diff_privacy_eps : float or None.  ε for DP; None for exact values.
    seed             : optional int.
    Returns
    -------
    pandas DataFrame with columns
        [origin_col, dest_col, 'percentage']  whose percentages sum to 100.
    """
    for col in [origin_col, dest_col, weight_col]:
        if col not in data.columns:
            raise ValueError(f"Column '{col}' not found in data.")

    moved = data[data[origin_col] != data[dest_col]].copy()
    moved_total = moved[weight_col].sum()
    moved['_fraction'] = moved[weight_col] / moved_total

    if diff_privacy_eps is not None:
         moved['_fraction'] = _laplace_pcts(
             moved['_fraction'].to_numpy(),
             total_trips=moved['origin_total'],
             epsilon=diff_privacy_eps,
             seed=seed)

    moved['percentage'] = 100 * moved['_fraction']
    return moved[[origin_col, dest_col, 'percentage']]