Source code for copul.checkerboard.biv_check_mixed

from __future__ import annotations

from typing import List, Union

import numpy as np

from copul.checkerboard.biv_check_pi import BivCheckPi
from copul.checkerboard.biv_check_min import BivCheckMin
from copul.checkerboard.biv_check_w import BivCheckW
from copul.family.core.biv_core_copula import BivCoreCopula
from copul.family.core.copula_approximator_mixin import CopulaApproximatorMixin
from copul.family.core.copula_plotting_mixin import CopulaPlottingMixin
from copul.schur_order.cis_verifier import CISVerifier


[docs] class BivCheckMixed(BivCoreCopula, CopulaPlottingMixin, CopulaApproximatorMixin): r""" Mixed checkerboard copula (per–cell choice of :math:`\Pi` / ↗ / ↘). A sign matrix :math:`S` with entries :math:`\{0,+1,-1\}` selects, in every checkerboard rectangle, which base copula to use: * :math:`0` → independence (:math:`\Pi`) * :math:`+1` → perfect positive dependence (check-min, ↗) * :math:`-1` → perfect negative dependence (check-w, ↘) The probability matrix :math:`\Delta` (argument ``matr``) is shared across all three modes. """ params: List = [] intervals: dict = {} def __init__( self, matr: Union[np.ndarray, List[List[float]]], sign: Union[np.ndarray, List[List[int]], None] = None, **kwargs, ): matr = np.asarray(matr, dtype=float) if matr.ndim != 2: raise ValueError("`matr` must be a 2-D array") if np.any(matr < 0): raise ValueError("`matr` must be non-negative") if not np.isclose(matr.sum(), 1.0): matr = matr / matr.sum() self.m, self.n = matr.shape # --- S --------------------------------------------------------- # if sign is None: sign = np.zeros_like(matr, dtype=int) sign = np.asarray(sign, dtype=int) if sign.shape != matr.shape: raise ValueError("`sign` must have the same shape as `matr`") if not np.isin(sign, (-1, 0, 1)).all(): raise ValueError("`sign` entries must be −1, 0, or +1") self.matr = matr # probability matrix Δ self.sign = sign # sign selector S # instantiate base copulas once self._pi = BivCheckPi(matr, **kwargs) self._min = BivCheckMin(matr, **kwargs) self._w = BivCheckW(matr, **kwargs) super().__init__() # ------------------------------------------------------------------ # # basic properties # # ------------------------------------------------------------------ # def __str__(self) -> str: return f"BivCheckMixed(m={self.m}, n={self.n})" __repr__ = __str__ @property def is_absolutely_continuous(self) -> bool: return np.all(self.sign == 0) @property def is_symmetric(self) -> bool: return ( self.m == self.n and np.allclose(self.matr, self.matr.T) and np.array_equal(self.sign, self.sign.T) ) # ------------------------------------------------------------------ # # helper (cell localisation) # # ------------------------------------------------------------------ # def _cell_indices(self, u: float, v: float) -> tuple[int, int]: r""" Mixed checkerboard copula (per–cell choice of :math:`\Pi` / ↗ / ↘). A sign matrix :math:`S` with entries :math:`\{0,+1,-1\}` selects, in every checkerboard rectangle, which base copula to use: * :math:`0` → independence (:math:`\Pi`) * :math:`+1` → perfect positive dependence (check-min, ↗) * :math:`-1` → perfect negative dependence (check-w, ↘) The probability matrix :math:`\Delta` (argument ``matr``) is shared across all three modes. """ i = min(int(np.floor(u * self.m)), self.m - 1) j = min(int(np.floor(v * self.n)), self.n - 1) return i, j # ------------------------------------------------------------------ # # CDF # # ------------------------------------------------------------------ #
[docs] def cdf(self, u: float | np.ndarray, v: float | np.ndarray): r"""Piecewise delegates to :math:`\Pi`, ↗ or ↘ according to ``sign``. For each evaluation point :math:`(u,v)`, the method chooses the base checkerboard copula dictated by the cell’s sign entry and returns the corresponding CDF value. Parameters ---------- u, v : float or ndarray Coordinates in :math:`[0,1]`. Must have identical shapes. Returns ------- float or ndarray CDF value(s) with the same shape as the broadcasted inputs. """ u_arr = np.asarray(u, dtype=float) v_arr = np.asarray(v, dtype=float) if u_arr.shape != v_arr.shape: raise ValueError("u and v must have the same shape") out = np.empty_like(u_arr, dtype=float) it = np.nditer( [u_arr, v_arr, out], flags=["refs_ok", "multi_index"], op_flags=[["readonly"], ["readonly"], ["writeonly"]], ) for uu, vv, dest in it: i, j = self._cell_indices(float(uu), float(vv)) s = self.sign[i, j] if s == 0: dest[...] = self._pi.cdf(float(uu), float(vv)) elif s == 1: dest[...] = self._min.cdf(float(uu), float(vv)) else: # s == −1 dest[...] = self._w.cdf(float(uu), float(vv)) return out if out.shape else float(out)
# ------------------------------------------------------------------ # # matrices for closed-form measures # # ------------------------------------------------------------------ # @staticmethod def _xi_matrix(size: int) -> np.ndarray: r"""Matrix :math:`2\,\mathrm{tri} - I` (strict lower-triangular coefficient). Constructs the :math:`s\times s` matrix :math:`\Xi_s = 2\,\mathrm{tri}_s - I_s`, where :math:`\mathrm{tri}_s` is the unit lower-triangular matrix and :math:`I_s` the identity. Used in the closed-form expression for Kendall’s :math:`\tau`. """ return 2 * np.tri(size) - np.eye(size) def _omega(self) -> np.ndarray: r""" Ω-matrix with entries :math:`\displaystyle \Omega_{i,j}=\frac{(2m-2i-1)(2n-2j-1)}{mn}`, for **zero-based** indices :math:`i=0,\dots,m-1` and :math:`j=0,\dots,n-1`. Returns ------- ndarray Array of shape ``(m, n)`` with the weights used in Spearman’s :math:`\rho` computation. """ i = np.arange(self.m).reshape(-1, 1) # 0 … m-1 j = np.arange(self.n).reshape(1, -1) num = (2 * self.m - 2 * i - 1) * (2 * self.n - 2 * j - 1) return num / (self.m * self.n) # ------------------------------------------------------------------ # # exact dependence measures # # ------------------------------------------------------------------ #
[docs] def kendalls_tau(self) -> float: Xi_m = self._xi_matrix(self.m) Xi_n = self._xi_matrix(self.n) core = np.trace(Xi_m @ self.matr @ Xi_n @ self.matr.T) extra = np.sum(self.sign * (self.matr**2)) return 1.0 - core + extra
[docs] def chatterjees_xi(self, *, condition_on_y: bool = False) -> float: # base value from plain checkerboard base = self._pi.chatterjees_xi(condition_on_y) # scaling m/n depends on conditioning m, n = (self.n, self.m) if condition_on_y else (self.m, self.n) extra = (m / n) * np.sum(np.abs(self.sign) * (self.matr**2)) return base + extra
[docs] def spearmans_rho(self) -> float: Omega = self._omega() core = 3.0 * np.trace(Omega.T @ self.matr) - 3.0 extra = np.sum(self.sign * self.matr) / (self.m * self.n) return core + extra
[docs] def cond_distr( self, dim: int, u: float | np.ndarray | None = None, v: float | np.ndarray | None = None, ): r""" Compute conditional distribution F_{U_{-dim}|U_{dim}}. Calculates the partial derivative of the Copula. If dim=1, returns dC(u,v)/du = P(V <= v | U=u). If dim=2, returns dC(u,v)/dv = P(U <= u | V=v). """ if u is None or v is None: raise ValueError("u and v must be provided for numeric evaluation") u = np.asarray(u) v = np.asarray(v) # Clip inputs to [0, 1] to prevent indexing errors u = np.clip(u, 0.0, 1.0) v = np.clip(v, 0.0, 1.0) # Broadcast if necessary so operations work on matching shapes if u.shape != v.shape: u, v = np.broadcast_arrays(u, v) # Standardize to: x (conditioning variable), y (target variable) if dim == 1: x, y = u, v matr = self.matr sign = self.sign m_cond, n_target = self.m, self.n else: x, y = v, u matr = self.matr.T sign = self.sign.T m_cond, n_target = self.n, self.m # 1. Identify grid indices x_scaled = x * m_cond y_scaled = y * n_target # Integer indices i = np.floor(x_scaled).astype(int) j = np.floor(y_scaled).astype(int) # Clamp to valid range (FIX: removed out= argument to support scalars) i = np.clip(i, 0, m_cond - 1) j = np.clip(j, 0, n_target - 1) # Local coordinates within the cell [0, 1] x_loc = x_scaled - i y_loc = y_scaled - j # 2. Base Probability (Accumulated mass from cells below y in the current column) # Precompute cumsum along the target axis (axis 1) weights = matr * m_cond # Add a column of zeros at the start for cumsum logic cum_weights = np.hstack([np.zeros((m_cond, 1)), np.cumsum(weights, axis=1)]) # Extract base values # cum_weights[i, j] gives sum(weights[i, :j]) # Note: we use tuple(i,j) indexing for correct broadcasting if inputs are arrays base_val = cum_weights[i, j] # 3. Local Contribution from the current cell (i, j) p_cell = weights[i, j] s_cell = sign[i, j] # Case S=0 (Independence): Linear growth val_0 = p_cell * y_loc # Case S=1 (Min / Ascending): Step function when y_loc >= x_loc val_1 = p_cell * (y_loc >= x_loc).astype(float) # Case S=-1 (W / Descending): Step function when y_loc >= 1 - x_loc val_w = p_cell * (y_loc >= (1.0 - x_loc)).astype(float) # Select the correct value based on the sign of the cell local_val = np.select( [s_cell == 0, s_cell == 1, s_cell == -1], [val_0, val_1, val_w], default=0.0, ) return base_val + local_val
[docs] def cond_distr_1(self, u: float | None = None, v: float | None = None): return self.cond_distr(1, u, v)
[docs] def cond_distr_2(self, u: float | None = None, v: float | None = None): return self.cond_distr(2, u, v)
# ------------------------------------------------------------------ # # misc # # ------------------------------------------------------------------ #
[docs] def is_cis(self, i: int = 1) -> bool: return CISVerifier(i).is_cis(self)
# ------------------------------------------------------------------ # # sampling # # ------------------------------------------------------------------ #
[docs] def rvs( self, n: int = 1, *, random_state: int | None = None, **kwargs ) -> np.ndarray: r""" Draw :math:`n` i.i.d. samples from the mixed checkerboard copula. Sampling proceeds by (i) selecting a cell according to :math:`\Delta`, then (ii) drawing a point **inside** that cell according to the cell’s mode (uniform for :math:`\Pi`, diagonal ↗ or ↘ for the two perfect-dependence cases). Parameters ---------- n : int, default 1 Number of samples to generate. random_state : int, optional Seed for the RNG (reproducibility). Returns ------- ndarray of shape (n, 2) Samples :math:`(U,V)` in :math:`[0,1]^2`. """ rng = np.random.default_rng(random_state) # ------ 1) pick the cell for every sample -------------------- # flat_Δ = self.matr.ravel() probs = flat_Δ / flat_Δ.sum() # should already sum to 1 flat_idx = rng.choice(flat_Δ.size, size=n, p=probs) i_idx, j_idx = np.unravel_index(flat_idx, self.matr.shape) # (n,) # ------ 2) draw location *inside* the selected cell ----------- # u = np.empty(n) v = np.empty(n) # split into the three regimes once; then fill the vectors for s_val, mask in ( (0, self.sign[i_idx, j_idx] == 0), # Π (independence) (1, self.sign[i_idx, j_idx] == 1), # ↗ (check-min) (-1, self.sign[i_idx, j_idx] == -1), # ↘ (check-w) ): if not mask.any(): continue ii = i_idx[mask] jj = j_idx[mask] if s_val == 0: # uniform on the rectangle u[mask] = (ii + rng.random(mask.sum())) / self.m v[mask] = (jj + rng.random(mask.sum())) / self.n elif s_val == 1: # perfect + dependence ↗ t = rng.random(mask.sum()) u[mask] = (ii + t) / self.m v[mask] = (jj + t) / self.n else: # perfect – dependence ↘ t = rng.random(mask.sum()) u[mask] = (ii + t) / self.m v[mask] = (jj + 1 - t) / self.n # descending line return np.column_stack((u, v))
# -------------------------------------------------------------------------- # # quick manual check # -------------------------------------------------------------------------- # if __name__ == "__main__": # pragma: no cover Delta = [[1, 1]] S = [[1, 0]] cop = BivCheckMixed(Delta, sign=S) print(np.sqrt(cop.chatterjees_xi())) print(cop.spearmans_rho()) cop.plot_cond_distr_1(plot_type="contour") cop.plot_cond_distr_2() Δ = np.full((2, 2), 0.25) Delta = [[1, 0], [0, 1]] S = np.array([[0, 0], [0, 1]]) cop = BivCheckMixed(Delta, sign=S) print( "τ:", cop.kendalls_tau(), "ρ:", cop.spearmans_rho(), "ξ:", cop.chatterjees_xi() )