Source code for swarmsim.Utils.params_utils

import numpy as np
import pandas as pd
from pathlib import Path
import logging



[docs]
def get_parameters(params_config: dict, params_shapes: dict[str, tuple], num_samples: int) -> dict[str, np.ndarray]:
    """
    Load or generate parameters for population initialization with flexible configuration.

    This function serves as the main entry point for parameter generation in the swarmsim
    framework. It supports both file-based parameter loading and procedural parameter
    generation using statistical distributions, enabling flexible population initialization
    for multi-agent simulations.

    Parameters
    ----------
    params_config : dict
        Configuration dictionary specifying parameter loading/generation method.
        Must contain 'mode' key with value 'file' or 'generate', plus corresponding
        configuration section.
    params_shapes : dict[str, tuple]
        Dictionary mapping parameter names to their expected shapes per agent.
        Used for validation and reshaping operations.
    num_samples : int
        Number of agents/samples to generate parameters for.

    Returns
    -------
    dict[str, np.ndarray]
        Dictionary mapping parameter names to NumPy arrays of shape (num_samples, *shape)
        where shape corresponds to the parameter's expected dimensions.

    Configuration Structure
    ----------------------
    File Mode:
        .. code-block:: yaml

            mode: "file"
            file:
              file_path: "path/to/parameters.csv"  # or .npz

    Generate Mode:
        .. code-block:: yaml

            mode: "generate"
            generate:
              parameter_name:
                sampler: "normal"  # Distribution type
                args:
                  loc: 0.0         # Distribution parameters
                  scale: 1.0
                shape: [3]         # Parameter shape per agent
                homogeneous: false # Same value for all agents

    
    Error Handling
    --------------
    - **FileNotFoundError**: Raised when specified parameter file doesn't exist
    - **KeyError**: Raised when expected parameters are missing from loaded data
    - **ValueError**: Raised for invalid parameter shapes or configurations
    - **RuntimeError**: Raised for invalid mode specifications

    Applications
    ------------
    - **Population Initialization**: Set agent properties for simulation start
    - **Parameter Studies**: Generate parameter sets for sensitivity analysis
    - **Multi-species Systems**: Handle heterogeneous agent populations
    - **Reproducible Research**: Load exact parameter sets from files

    
    See Also
    --------
    _load_parameters : File-based parameter loading implementation
    _generate_parameters : Statistical parameter generation implementation
    _reshape_parameter : Parameter reshaping utilities
    """

    mode = params_config.get("mode")
    if mode == "file":
        settings = params_config.get("file")
        params = _load_parameters(settings, num_samples)
    elif mode == "generate":
        settings = params_config.get("generate")
        params = _generate_parameters(settings, num_samples)
    else:
        raise RuntimeError("Invalid parameter mode. Check the YAML config file.")

    if params_shapes is None:
        raise ValueError("params_shapes must be defined before initializing parameters.")

    missing = [k for k in params_shapes if k not in params]
    if missing:
        raise KeyError(f"Missing parameter(s) in params: {missing}")

    return {
        key: _reshape_parameter(params[key], shape)
        for key, shape in params_shapes.items()
    }



def _load_parameters(params_config: dict, num_samples: int) -> dict[str, np.ndarray]:
    """
    Load parameters from CSV or NPZ files with automatic population matching.

    This helper function handles file-based parameter loading with intelligent
    population size matching. It automatically repeats or truncates parameter
    data to match the required number of samples, ensuring consistent population
    initialization regardless of file size.

    Parameters
    ----------
    params_config : dict
        Configuration dictionary containing file loading settings.
        Must include 'file_path' key pointing to CSV or NPZ file.
    num_samples : int
        Target number of parameter sets to generate. File data will be
        adjusted to match this count.

    Returns
    -------
    dict[str, np.ndarray]
        Dictionary mapping parameter names to NumPy arrays, each with
        num_samples entries along the first dimension.

    File Format Support
    -------------------
    **CSV Files**:
        - Column headers become parameter names
        - Each row represents one agent's parameters
        - Supports all numeric data types
        - Missing values handled automatically

    **NPZ Files**:
        - Each array key becomes a parameter name
        - First dimension must match across all parameters
        - Preserves exact numerical precision
        - Supports complex data structures

    Population Matching
    -------------------
    **Too Few Samples**: Randomly repeats existing rows to reach target count
    **Too Many Samples**: Randomly selects subset to match target count
    **Exact Match**: Uses data as-is without modification

    Examples
    --------
    Loading CSV parameter file:

    .. code-block:: python

        # CSV file structure (example: euglena_params.csv):
        # mass,diameter,max_speed,light_sensitivity
        # 1.2,0.05,2.1,0.8
        # 1.1,0.048,2.3,0.75
        # 1.3,0.052,1.9,0.85

        config = {
            'file_path': 'Configuration/Config_data/euglena_params.csv'
        }
        
        params = _load_parameters(config, num_samples=100)
        
        # Returns:
        # {
        #     'mass': array([1.2, 1.1, 1.3, 1.2, ...]),      # 100 samples
        #     'diameter': array([0.05, 0.048, 0.052, ...]),   # 100 samples
        #     'max_speed': array([2.1, 2.3, 1.9, ...]),       # 100 samples
        #     'light_sensitivity': array([0.8, 0.75, 0.85, ...]) # 100 samples
        # }

    
    Error Handling
    --------------
    - **FileNotFoundError**: When specified file doesn't exist
    - **ValueError**: For unsupported file formats or invalid data
    - **KeyError**: When expected parameter columns/arrays are missing
    - **MemoryError**: For extremely large files (use chunked loading)

    Notes
    -----
    - Preserves parameter names exactly as they appear in files
    - Maintains data types from original files when possible
    - Logs warnings when adjusting population size
    - Supports both absolute and relative file paths
    - Thread-safe for concurrent parameter loading
    """
    file_path = Path(params_config.get("file_path", ""))

    if not file_path.exists():
        raise FileNotFoundError(f"Parameters file '{file_path}' not found.")

    # Load from CSV file using pandas for convenience
    if file_path.suffix == ".csv":
        df = pd.read_csv(file_path)
        params_dict = {col: df[col].to_numpy() for col in df.columns}

    # Load from NPZ file
    elif file_path.suffix == ".npz":
        npz_data = np.load(file_path)
        params_dict = {key: npz_data[key] for key in npz_data.files}

    else:
        raise ValueError(f"Unsupported file type '{file_path.suffix}'. Only '.csv' and '.npz' are supported.")

    current_num_samples = next(iter(params_dict.values())).shape[0]

    # Repeat rows if not enough
    if current_num_samples < num_samples:
        logging.warning("Parameters file has fewer rows than N; repeating some rows to match agent count.")
        repeat_indices = np.random.choice(current_num_samples, size=num_samples - current_num_samples, replace=True)
        for key in params_dict:
            params_dict[key] = np.concatenate([params_dict[key], params_dict[key][repeat_indices]])

    # Truncate if too many rows
    elif current_num_samples > num_samples:
        logging.warning("Parameters file has more rows than N; truncating to match agent count.")
        for key in params_dict:
            params_dict[key] = params_dict[key][:num_samples]

    return params_dict


def _generate_parameters(params_config: dict, num_samples: int) -> dict[str, np.ndarray]:
    """
    Generate parameters using statistical distributions and configuration.

    This function creates parameter sets using a comprehensive collection of
    statistical distributions from NumPy's random module. It supports both
    homogeneous (same value for all agents) and heterogeneous (different
    values per agent) parameter generation with flexible shape control.

    Parameters
    ----------
    params_config : dict
        Configuration dictionary specifying parameters to generate.
        Each key represents a parameter name with associated generation settings.
    num_samples : int
        Number of parameter sets (agents) to generate.

    Returns
    -------
    dict[str, np.ndarray]
        Dictionary mapping parameter names to generated NumPy arrays.
        Arrays have shape (num_samples, *parameter_shape).

    Supported Distributions
    -----------------------
    **Continuous Distributions**:
        - uniform, normal, exponential, gamma, beta, lognormal
        - laplace, logistic, gumbel, rayleigh, weibull, pareto
        - standard_normal, standard_exponential, standard_gamma
        - triangular, vonmises, wald, power, chisquare, f
        - noncentral_chisquare, noncentral_f, standard_cauchy, standard_t

    **Discrete Distributions**:
        - poisson, binomial, negative_binomial, geometric
        - hypergeometric, logseries, zipf

    **Multivariate Distributions**:
        - multivariate_normal, multinomial

    Parameter Configuration
    ----------------------
    Each parameter can be configured as:

    1. **Constant Value**: Simple numeric value or array
    2. **Distribution Sampling**: Dictionary with sampler configuration
    3. **Homogeneous Sampling**: Same random value for all agents
    4. **Heterogeneous Sampling**: Different random values per agent

    Examples
    --------
    Basic parameter generation:

    .. code-block:: python

        config = {
            # Constant parameters
            'species_id': 1,                    # Same for all agents
            'max_neighbors': 10,                # Same for all agents
            
            # Scalar random parameters
            'mass': {
                'sampler': 'normal',
                'args': {'loc': 1.0, 'scale': 0.1}
            },
            
            # Vector random parameters  
            'initial_position': {
                'sampler': 'uniform',
                'args': {'low': -5.0, 'high': 5.0},
                'shape': [3]  # 3D position
            },
            
            # Homogeneous parameters (same random value for all)
            'environment_temperature': {
                'sampler': 'normal',
                'args': {'loc': 20.0, 'scale': 2.0},
                'homogeneous': True
            }
        }
        
        params = _generate_parameters(config, num_samples=100)

    
    Applications
    ------------
    - **Agent Initialization**: Set diverse agent properties for realistic populations
    - **Parameter Studies**: Generate parameter sets for sensitivity analysis
    - **Monte Carlo Simulation**: Generate random inputs for statistical analysis

    See Also
    --------
    numpy.random : Complete documentation of available distributions
    _reshape_parameter : Function for parameter shape manipulation
    get_parameters : Main parameter loading/generation interface
    """
    params_dict = {}

    sampler_mapping = {
        "uniform": np.random.uniform,
        "normal": np.random.normal,
        "poisson": np.random.poisson,
        "exponential": np.random.exponential,
        "binomial": np.random.binomial,
        "gamma": np.random.gamma,
        "beta": np.random.beta,
        "chisquare": np.random.chisquare,
        "f": np.random.f,
        "geometric": np.random.geometric,
        "gumbel": np.random.gumbel,
        "hypergeometric": np.random.hypergeometric,
        "laplace": np.random.laplace,
        "logistic": np.random.logistic,
        "lognormal": np.random.lognormal,
        "logseries": np.random.logseries,
        "multinomial": np.random.multinomial,
        "multivariate_normal": np.random.multivariate_normal,
        "negative_binomial": np.random.negative_binomial,
        "noncentral_chisquare": np.random.noncentral_chisquare,
        "noncentral_f": np.random.noncentral_f,
        "pareto": np.random.pareto,
        "power": np.random.power,
        "rayleigh": np.random.rayleigh,
        "standard_cauchy": np.random.standard_cauchy,
        "standard_exponential": np.random.standard_exponential,
        "standard_gamma": np.random.standard_gamma,
        "standard_normal": np.random.standard_normal,
        "standard_t": np.random.standard_t,
        "triangular": np.random.triangular,
        "vonmises": np.random.vonmises,
        "wald": np.random.wald,
        "weibull": np.random.weibull,
        "zipf": np.random.zipf,
    }

    for par_name, settings in params_config.items():
        if not isinstance(settings, dict):
            constant_value = np.array(settings) if isinstance(settings, (list, tuple)) else settings
            if np.isscalar(constant_value):
                params_dict[par_name] = np.full(num_samples, constant_value)
            else:
                stacked_array = np.stack([constant_value] * num_samples)
                params_dict[par_name] = stacked_array
            continue

        sampler_key = settings.get("sampler", "uniform").lower()
        if sampler_key not in sampler_mapping:
            raise ValueError(f"Unsupported sampler: {sampler_key}")
        sampler = sampler_mapping[sampler_key]

        positional_args = settings.get("positional_args")
        args = settings.get("args", {}).copy()

        shape = settings.get("shape", [])
        if isinstance(shape, list):
            shape = tuple(shape)

        is_homogeneous = settings.get("homogeneous", False)

        if is_homogeneous:
            args["size"] = shape if shape else ()
            if positional_args is not None:
                single_value = sampler(*positional_args, **args)
            else:
                single_value = sampler(**args)

            if shape:
                stacked_array = np.stack([np.array(single_value) for _ in range(num_samples)])
                params_dict[par_name] = stacked_array
            else:
                params_dict[par_name] = np.full(num_samples, single_value)
        else:
            args["size"] = (num_samples,) + shape if shape else num_samples
            if positional_args is not None:
                values = sampler(*positional_args, **args)
            else:
                values = sampler(**args)
            params_dict[par_name] = np.array(values)

    return params_dict


def _reshape_parameter(param_array: np.ndarray, shape: tuple = ()) -> np.ndarray:
    """
    Reshape parameter arrays to consistent multi-agent format with automatic broadcasting.

    This function standardizes parameter arrays to have consistent dimensions across
    all agents, handling scalar-to-tensor broadcasting and special cases like
    diagonal matrix creation. It ensures all parameters have the format
    (num_agents, *parameter_shape) for uniform agent property access.

    Parameters
    ----------
    param_array : np.ndarray
        Input parameter array where the first dimension corresponds to agents.
        Can contain scalars, vectors, or higher-dimensional structures per agent.
    shape : tuple of int, optional
        Target shape for each agent's parameter. Empty tuple () indicates scalar.
        Special handling for square matrices (diagonal initialization from scalars).

    Returns
    -------
    np.ndarray
        Reshaped array with dimensions (num_agents, *shape), where each agent's
        parameter conforms to the specified shape through broadcasting or 
        transformation.

    Shape Transformation Rules
    -------------------------
    1. **Scalar to Scalar**: (N,) → (N,) - No change needed
    2. **Scalar to Vector**: (N,) → (N, d) - Broadcast scalar to vector
    3. **Scalar to Matrix**: (N,) → (N, d, d) - Create diagonal matrices
    4. **Vector to Vector**: (N, d) → (N, d) - Direct assignment
    5. **Matrix to Matrix**: (N, d, d) → (N, d, d) - Direct assignment

    Special Cases
    -------------
    **Diagonal Matrix Creation**: When target shape is (d, d) and input contains
    scalars, creates diagonal matrices with the scalar value on the diagonal.

    **Broadcasting**: When target shape has more dimensions than input, broadcasts
    the input to match the target shape while preserving agent dimension.

    Examples
    --------
    Scalar to vector broadcasting:

    .. code-block:: python

        # Input: scalar values per agent
        masses = np.array([1.0, 1.2, 0.8, 1.1])  # Shape: (4,)
        
        # Reshape to 3D vectors (same mass in all dimensions)
        mass_vectors = _reshape_parameter(masses, shape=(3,))
        print(mass_vectors.shape)  # (4, 3)
        print(mass_vectors[0])     # [1.0, 1.0, 1.0]

    
    Error Handling
    --------------
    - **ValueError**: Raised when input array shapes are incompatible with target shape
    - **TypeError**: Raised when input is not a NumPy array
    - **IndexError**: Raised when accessing invalid array dimensions

    
    Applications
    ------------
    - **Agent Initialization**: Standardize diverse parameter formats
    - **Physical Properties**: Create mass/inertia matrices from scalar values
    - **Neural Networks**: Initialize weight matrices for agent brains
    - **Interaction Models**: Setup pairwise interaction parameter tensors
    - **Control Systems**: Create gain matrices from scalar tuning parameters

    See Also
    --------
    numpy.broadcast_to : NumPy broadcasting documentation
    numpy.zeros : Array initialization
    numpy.stack : Array stacking operations
    get_parameters : Main parameter processing interface
    """

    num_agents = param_array.shape[0]

    if np.isscalar(param_array[0]):
        if len(shape) == 2 and shape[0] == shape[1]:
            d = shape[0]
            result = np.zeros((num_agents, d, d), dtype=np.result_type(param_array))
            diag_indices = np.arange(d)
            result[:, diag_indices, diag_indices] = param_array.reshape(num_agents, 1)
        else:
            reshaped = param_array.reshape((num_agents,) + (1,) * len(shape))
            result = np.broadcast_to(reshaped, (num_agents,) + shape)
    else:
        first_elem = np.array(param_array[0])
        if first_elem.shape != shape:
            raise ValueError(f"Expected shape {shape} but got {first_elem.shape} for the first element.")
        result = np.stack(param_array)

    return result