Coverage for src/stable_yield_lab/analytics/performance.py: 81%

1r"""Performance analytics helpers for discrete-compounding return data.

3The functions in this module operate on periodic **simple returns** and assume

4that gains are reinvested every period without external cash flows. Under these

5assumptions the cumulative growth of each series follows the discrete

6compounding identity :math:`G_t = \prod_{i=1}^t (1 + r_i)`.

7"""

9from __future__ import annotations

11from dataclasses import dataclass

12from math import sqrt

13from typing import Iterable, Mapping

15import pandas as pd

18def cumulative_return(series: pd.Series) -> pd.Series:

19 r"""Compute the cumulative return of a single return series.

21 The cumulative return through period :math:`t` is defined as

23 .. math::

24 R_t = \prod_{i=1}^t (1 + r_i) - 1,

26 where :math:`r_i` are periodic simple returns expressed as decimal

27 fractions (``0.01`` corresponds to +1%). The calculation assumes discrete

28 compounding with full reinvestment of gains and losses.

30 Parameters

31 ----------

32 series:

33 Periodic simple returns indexed by time.

35 Returns

36 -------

37 pandas.Series

38 Cumulative returns aligned with ``series``.

39 """

40 if series.empty:

41 return series.copy()

42 return (1.0 + series).cumprod() - 1.0

45def nav_series(

46 returns: pd.DataFrame,

47 weights: pd.Series | None = None,

48 initial: float = 1.0,

49) -> pd.Series:

50 r"""Generate a portfolio net asset value (NAV) path from asset returns.

52 Given asset returns :math:`r_{i,t}` and target portfolio weights

53 :math:`w_i`, the portfolio's simple return per period is the weighted sum

55 .. math::

56 r_{p,t} = \sum_i w_i r_{i,t}.

58 With discrete compounding and rebalancing back to ``weights`` each period,

59 the NAV recursion is

61 .. math::

62 \text{NAV}_t = \text{NAV}_{t-1} (1 + r_{p,t}), \qquad \text{NAV}_0 = \text{initial}.

64 Parameters

65 ----------

66 returns:

67 Wide DataFrame of periodic returns (rows are timestamps, columns are

68 assets) expressed as decimal fractions.

69 weights:

70 Target portfolio weights. If ``None`` an equally weighted portfolio is

71 assumed. Missing assets receive a weight of zero. The weights must sum

72 to a non-zero value and are normalised to 1.

73 initial:

74 Starting NAV value. Units are preserved in the output.

76 Returns

77 -------

78 pandas.Series

79 NAV values for each period.

81 Raises

82 ------

83 ValueError

84 If the provided weights sum to zero after alignment with ``returns``.

85 """

86 if returns.empty:

87 return pd.Series(dtype=float)

89 if weights is None:

90 weights = pd.Series(1.0 / returns.shape[1], index=returns.columns)

91 else:

92 weights = weights.reindex(returns.columns).fillna(0.0)

94 total = float(weights.sum())

95 if total == 0.0:

96 raise ValueError("weights sum to zero")

97 norm_weights = weights / total

99 clean_returns = returns.fillna(0.0)

100 portfolio_ret = clean_returns.mul(norm_weights, axis=1).sum(axis=1)

101 compounded = cumulative_return(portfolio_ret)

102 return float(initial) * (1.0 + compounded)

103

104

105def nav_trajectories(returns: pd.DataFrame, *, initial_investment: float) -> pd.DataFrame:

106 r"""Compute individual asset NAV trajectories from periodic returns.

107

108 Each asset is assumed to start with the same capital ``initial_investment``

109 and evolves according to

110

111 .. math::

112 \text{NAV}_{i,t} = \text{NAV}_{i,t-1} (1 + r_{i,t}),

113

114 where :math:`r_{i,t}` are the asset's periodic simple returns. Missing

115 returns are treated as zero performance for the corresponding period.

116

117

118 Parameters

119 ----------

120 returns:

121 Wide DataFrame of periodic returns (rows are timestamps, columns are

122 assets) expressed as decimal fractions.

123 initial_investment:

124 Starting capital per asset. Units are preserved in the output.

125

126 Returns

127 -------

128 pandas.DataFrame

129 NAV values for each asset over time.

130 """

131 if returns.empty: 131 ↛ 132line 131 didn't jump to line 132 because the condition on line 131 was never true

132 return returns.copy()

133

134 growth = (1.0 + returns.fillna(0.0)).cumprod()

135 return growth * float(initial_investment)

136

137

138def yield_trajectories(returns: pd.DataFrame) -> pd.DataFrame:

139 r"""Compute cumulative yield trajectories for each asset.

140

141 The cumulative yield through period :math:`t` for each asset is the

142 discrete compounding of periodic returns:

143

144 .. math::

145 Y_{i,t} = \prod_{k=1}^t (1 + r_{i,k}) - 1.

146

147 Missing returns are interpreted as zero for the corresponding period.

148

149 Parameters

150 ----------

151 returns:

152 Wide DataFrame of periodic returns (rows are timestamps, columns are

153 assets) expressed as decimal fractions.

154

155 Returns

156 -------

157 pandas.DataFrame

158 Cumulative return for each asset as decimal fractions.

159

160 """

161 if returns.empty: 161 ↛ 162line 161 didn't jump to line 162 because the condition on line 161 was never true

162 return returns.copy()

163 return (1.0 + returns.fillna(0.0)).cumprod() - 1.0

164

165

166@dataclass(frozen=True)

167class RebalanceScenario:

168 """Parameters describing a portfolio rebalance experiment."""

169

170 calendar: Iterable[pd.Timestamp] | pd.DatetimeIndex

171 cost_bps: float = 0.0

172

173

174@dataclass(frozen=True)

175class ScenarioRunResult:

176 """Container for aggregated scenario metrics and paths."""

177

178 metrics: pd.DataFrame

179 navs: pd.DataFrame

180 returns: pd.DataFrame

181

182

183@dataclass(frozen=True)

184class _ScenarioPath:

185 nav: pd.Series

186 returns: pd.Series

187 total_cost: float

188

189

190def _normalise_weights(weights: pd.Series, columns: pd.Index) -> pd.Series:

191 aligned = weights.reindex(columns).fillna(0.0)

192 total = float(aligned.sum())

193 if total == 0.0: 193 ↛ 194line 193 didn't jump to line 194 because the condition on line 193 was never true

194 raise ValueError("weights sum to zero after alignment with returns")

195 return aligned / total

196

197

198def _prepare_calendar(

199 calendar: Iterable[pd.Timestamp] | pd.DatetimeIndex | None,

200 index: pd.DatetimeIndex,

201) -> pd.DatetimeIndex:

202 if calendar is None: 202 ↛ 203line 202 didn't jump to line 203 because the condition on line 202 was never true

203 return pd.DatetimeIndex([], tz=index.tz)

204 if isinstance(calendar, pd.DatetimeIndex): 204 ↛ 207line 204 didn't jump to line 207 because the condition on line 204 was always true

205 cal = calendar

206 else:

207 cal = pd.DatetimeIndex(pd.to_datetime(list(calendar)))

208 if index.tz is not None: 208 ↛ 214line 208 didn't jump to line 214 because the condition on line 208 was always true

209 if cal.tz is None: 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true

210 cal = cal.tz_localize(index.tz)

211 else:

212 cal = cal.tz_convert(index.tz)

213 else:

214 if cal.tz is not None:

215 cal = cal.tz_convert(None)

216 return cal.intersection(index)

217

218

219def _infer_periods_per_year(index: pd.DatetimeIndex) -> float:

220 if len(index) < 2: 220 ↛ 221line 220 didn't jump to line 221 because the condition on line 220 was never true

221 return 1.0

222 diffs = index.to_series().diff().dropna()

223 if diffs.empty: 223 ↛ 224line 223 didn't jump to line 224 because the condition on line 223 was never true

224 return 1.0

225 avg_days = diffs.dt.total_seconds().mean() / 86_400.0

226 if avg_days <= 0: 226 ↛ 227line 226 didn't jump to line 227 because the condition on line 226 was never true

227 return float(len(index))

228 return 365.25 / avg_days

229

230

231def _simulate_rebalanced_portfolio(

232 returns: pd.DataFrame,

233 weights: pd.Series,

234 scenario: RebalanceScenario,

235 *,

236 initial_nav: float,

237) -> _ScenarioPath:

238 if returns.empty: 238 ↛ 239line 238 didn't jump to line 239 because the condition on line 238 was never true

239 empty = pd.Series(dtype=float, index=returns.index)

240 return _ScenarioPath(nav=empty, returns=empty, total_cost=0.0)

241

242 clean_returns = returns.fillna(0.0)

243 weights = _normalise_weights(weights, clean_returns.columns)

244 calendar = _prepare_calendar(scenario.calendar, clean_returns.index)

245 rebalance_mask = pd.Series(clean_returns.index.isin(calendar), index=clean_returns.index)

246

247 nav = float(initial_nav)

248 holdings = weights * nav

249 nav_path: list[float] = []

250 period_returns: list[float] = []

251 total_cost = 0.0

252 cost_rate = float(scenario.cost_bps) / 10_000.0

253

254 for timestamp, row in clean_returns.iterrows():

255 nav_before = nav

256 holdings = holdings * (1.0 + row)

257 nav = float(holdings.sum())

258

259 if rebalance_mask.loc[timestamp]:

260 if nav > 0.0: 260 ↛ 265line 260 didn't jump to line 265 because the condition on line 260 was always true

261 current_weights = holdings / nav

262 diff = weights - current_weights

263 traded_value = float(diff.abs().sum()) * nav

264 else:

265 traded_value = 0.0

266 cost = traded_value * cost_rate

267 if cost:

268 nav -= cost

269 total_cost += cost

270 holdings = weights * nav

271

272 period_return = (nav - nav_before) / nav_before if nav_before != 0 else 0.0

273 nav_path.append(nav)

274 period_returns.append(period_return)

275

276 nav_series = pd.Series(nav_path, index=clean_returns.index, name="nav")

277 returns_series = pd.Series(period_returns, index=clean_returns.index, name="return")

278 return _ScenarioPath(nav=nav_series, returns=returns_series, total_cost=total_cost)

279

280

281def run_rebalance_scenarios(

282 returns: pd.DataFrame,

283 weights: pd.Series,

284 scenarios: Mapping[str, RebalanceScenario],

285 *,

286 benchmark: str | None = None,

287 initial_nav: float = 1.0,

288) -> ScenarioRunResult:

289 """Evaluate portfolio performance under alternative rebalance calendars.

290

291 Parameters

292 ----------

293 returns:

294 Wide DataFrame of periodic simple returns with datetime index.

295 weights:

296 Target portfolio weights aligned with ``returns`` columns.

297 scenarios:

298 Mapping of scenario name to calendar/cost assumptions.

299 benchmark:

300 Scenario name used as reference for tracking error. When ``None`` the

301 benchmark is a frictionless strategy that rebalances every period.

302 initial_nav:

303 Starting portfolio value.

304

305 Returns

306 -------

307 ScenarioRunResult

308 Object containing per-scenario metrics plus NAV/return trajectories.

309 """

310

311 if returns.empty:

312 empty = pd.DataFrame(index=returns.index)

313 return ScenarioRunResult(

314 metrics=pd.DataFrame(

315 columns=["realized_apy", "total_cost", "tracking_error", "terminal_nav"]

316 ),

317 navs=empty,

318 returns=empty,

319 )

320

321 if not scenarios: 321 ↛ 322line 321 didn't jump to line 322 because the condition on line 321 was never true

322 raise ValueError("at least one scenario must be provided")

323

324 paths: dict[str, _ScenarioPath] = {}

325 for name, scenario in scenarios.items():

326 paths[name] = _simulate_rebalanced_portfolio(

327 returns,

328 weights,

329 scenario,

330 initial_nav=initial_nav,

331 )

332

333 if benchmark is not None: 333 ↛ 338line 333 didn't jump to line 338 because the condition on line 333 was always true

334 if benchmark not in paths: 334 ↛ 335line 334 didn't jump to line 335 because the condition on line 334 was never true

335 raise KeyError(f"benchmark '{benchmark}' not found in scenarios")

336 benchmark_returns = paths[benchmark].returns

337 else:

338 benchmark_path = _simulate_rebalanced_portfolio(

339 returns,

340 weights,

341 RebalanceScenario(calendar=returns.index, cost_bps=0.0),

342 initial_nav=initial_nav,

343 )

344 benchmark_returns = benchmark_path.returns

345

346 benchmark_returns = benchmark_returns.reindex(returns.index, fill_value=0.0)

347 periods_per_year = _infer_periods_per_year(returns.index)

348

349 metrics_rows: list[dict[str, float | str]] = []

350 nav_data: dict[str, pd.Series] = {}

351 return_data: dict[str, pd.Series] = {}

352

353 for name, path in paths.items():

354 nav_series = path.nav.reindex(returns.index, fill_value=float("nan"))

355 return_series = path.returns.reindex(returns.index, fill_value=0.0)

356 nav_data[name] = nav_series

357 return_data[name] = return_series

358

359 total_periods = len(return_series)

360 total_growth = nav_series.iloc[-1] / initial_nav if total_periods else float("nan")

361 if total_periods and total_growth > 0.0: 361 ↛ 364line 361 didn't jump to line 364 because the condition on line 361 was always true

362 realized_apy = total_growth ** (periods_per_year / total_periods) - 1.0

363 else:

364 realized_apy = float("nan")

365

366 diff = (return_series - benchmark_returns).fillna(0.0)

367 if len(diff) > 1: 367 ↛ 370line 367 didn't jump to line 370 because the condition on line 367 was always true

368 tracking_error = float(diff.std(ddof=0) * sqrt(periods_per_year))

369 else:

370 tracking_error = 0.0

371

372 metrics_rows.append(

373 {

374 "scenario": name,

375 "realized_apy": float(realized_apy),

376 "total_cost": float(path.total_cost),

377 "tracking_error": tracking_error,

378 "terminal_nav": float(nav_series.iloc[-1]),

379 }

380 )

381

382 metrics = pd.DataFrame(metrics_rows).set_index("scenario")

383 navs = pd.DataFrame(nav_data)

384 returns_df = pd.DataFrame(return_data)

385 return ScenarioRunResult(metrics=metrics, navs=navs, returns=returns_df)