Coverage for src/stable_yield_lab/analytics/performance.py: 81%

149 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-04 20:38 +0000

1r"""Performance analytics helpers for discrete-compounding return data. 

2 

3The functions in this module operate on periodic **simple returns** and assume 

4that gains are reinvested every period without external cash flows. Under these 

5assumptions the cumulative growth of each series follows the discrete 

6compounding identity :math:`G_t = \prod_{i=1}^t (1 + r_i)`. 

7""" 

8 

9from __future__ import annotations 

10 

11from dataclasses import dataclass 

12from math import sqrt 

13from typing import Iterable, Mapping 

14 

15import pandas as pd 

16 

17 

18def cumulative_return(series: pd.Series) -> pd.Series: 

19 r"""Compute the cumulative return of a single return series. 

20 

21 The cumulative return through period :math:`t` is defined as 

22 

23 .. math:: 

24 R_t = \prod_{i=1}^t (1 + r_i) - 1, 

25 

26 where :math:`r_i` are periodic simple returns expressed as decimal 

27 fractions (``0.01`` corresponds to +1%). The calculation assumes discrete 

28 compounding with full reinvestment of gains and losses. 

29 

30 Parameters 

31 ---------- 

32 series: 

33 Periodic simple returns indexed by time. 

34 

35 Returns 

36 ------- 

37 pandas.Series 

38 Cumulative returns aligned with ``series``. 

39 """ 

40 if series.empty: 

41 return series.copy() 

42 return (1.0 + series).cumprod() - 1.0 

43 

44 

45def nav_series( 

46 returns: pd.DataFrame, 

47 weights: pd.Series | None = None, 

48 initial: float = 1.0, 

49) -> pd.Series: 

50 r"""Generate a portfolio net asset value (NAV) path from asset returns. 

51 

52 Given asset returns :math:`r_{i,t}` and target portfolio weights 

53 :math:`w_i`, the portfolio's simple return per period is the weighted sum 

54 

55 .. math:: 

56 r_{p,t} = \sum_i w_i r_{i,t}. 

57 

58 With discrete compounding and rebalancing back to ``weights`` each period, 

59 the NAV recursion is 

60 

61 .. math:: 

62 \text{NAV}_t = \text{NAV}_{t-1} (1 + r_{p,t}), \qquad \text{NAV}_0 = \text{initial}. 

63 

64 Parameters 

65 ---------- 

66 returns: 

67 Wide DataFrame of periodic returns (rows are timestamps, columns are 

68 assets) expressed as decimal fractions. 

69 weights: 

70 Target portfolio weights. If ``None`` an equally weighted portfolio is 

71 assumed. Missing assets receive a weight of zero. The weights must sum 

72 to a non-zero value and are normalised to 1. 

73 initial: 

74 Starting NAV value. Units are preserved in the output. 

75 

76 Returns 

77 ------- 

78 pandas.Series 

79 NAV values for each period. 

80 

81 Raises 

82 ------ 

83 ValueError 

84 If the provided weights sum to zero after alignment with ``returns``. 

85 """ 

86 if returns.empty: 

87 return pd.Series(dtype=float) 

88 

89 if weights is None: 

90 weights = pd.Series(1.0 / returns.shape[1], index=returns.columns) 

91 else: 

92 weights = weights.reindex(returns.columns).fillna(0.0) 

93 

94 total = float(weights.sum()) 

95 if total == 0.0: 

96 raise ValueError("weights sum to zero") 

97 norm_weights = weights / total 

98 

99 clean_returns = returns.fillna(0.0) 

100 portfolio_ret = clean_returns.mul(norm_weights, axis=1).sum(axis=1) 

101 compounded = cumulative_return(portfolio_ret) 

102 return float(initial) * (1.0 + compounded) 

103 

104 

105def nav_trajectories(returns: pd.DataFrame, *, initial_investment: float) -> pd.DataFrame: 

106 r"""Compute individual asset NAV trajectories from periodic returns. 

107 

108 Each asset is assumed to start with the same capital ``initial_investment`` 

109 and evolves according to 

110 

111 .. math:: 

112 \text{NAV}_{i,t} = \text{NAV}_{i,t-1} (1 + r_{i,t}), 

113 

114 where :math:`r_{i,t}` are the asset's periodic simple returns. Missing 

115 returns are treated as zero performance for the corresponding period. 

116 

117 

118 Parameters 

119 ---------- 

120 returns: 

121 Wide DataFrame of periodic returns (rows are timestamps, columns are 

122 assets) expressed as decimal fractions. 

123 initial_investment: 

124 Starting capital per asset. Units are preserved in the output. 

125 

126 Returns 

127 ------- 

128 pandas.DataFrame 

129 NAV values for each asset over time. 

130 """ 

131 if returns.empty: 131 ↛ 132line 131 didn't jump to line 132 because the condition on line 131 was never true

132 return returns.copy() 

133 

134 growth = (1.0 + returns.fillna(0.0)).cumprod() 

135 return growth * float(initial_investment) 

136 

137 

138def yield_trajectories(returns: pd.DataFrame) -> pd.DataFrame: 

139 r"""Compute cumulative yield trajectories for each asset. 

140 

141 The cumulative yield through period :math:`t` for each asset is the 

142 discrete compounding of periodic returns: 

143 

144 .. math:: 

145 Y_{i,t} = \prod_{k=1}^t (1 + r_{i,k}) - 1. 

146 

147 Missing returns are interpreted as zero for the corresponding period. 

148 

149 Parameters 

150 ---------- 

151 returns: 

152 Wide DataFrame of periodic returns (rows are timestamps, columns are 

153 assets) expressed as decimal fractions. 

154 

155 Returns 

156 ------- 

157 pandas.DataFrame 

158 Cumulative return for each asset as decimal fractions. 

159 

160 """ 

161 if returns.empty: 161 ↛ 162line 161 didn't jump to line 162 because the condition on line 161 was never true

162 return returns.copy() 

163 return (1.0 + returns.fillna(0.0)).cumprod() - 1.0 

164 

165 

166@dataclass(frozen=True) 

167class RebalanceScenario: 

168 """Parameters describing a portfolio rebalance experiment.""" 

169 

170 calendar: Iterable[pd.Timestamp] | pd.DatetimeIndex 

171 cost_bps: float = 0.0 

172 

173 

174@dataclass(frozen=True) 

175class ScenarioRunResult: 

176 """Container for aggregated scenario metrics and paths.""" 

177 

178 metrics: pd.DataFrame 

179 navs: pd.DataFrame 

180 returns: pd.DataFrame 

181 

182 

183@dataclass(frozen=True) 

184class _ScenarioPath: 

185 nav: pd.Series 

186 returns: pd.Series 

187 total_cost: float 

188 

189 

190def _normalise_weights(weights: pd.Series, columns: pd.Index) -> pd.Series: 

191 aligned = weights.reindex(columns).fillna(0.0) 

192 total = float(aligned.sum()) 

193 if total == 0.0: 193 ↛ 194line 193 didn't jump to line 194 because the condition on line 193 was never true

194 raise ValueError("weights sum to zero after alignment with returns") 

195 return aligned / total 

196 

197 

198def _prepare_calendar( 

199 calendar: Iterable[pd.Timestamp] | pd.DatetimeIndex | None, 

200 index: pd.DatetimeIndex, 

201) -> pd.DatetimeIndex: 

202 if calendar is None: 202 ↛ 203line 202 didn't jump to line 203 because the condition on line 202 was never true

203 return pd.DatetimeIndex([], tz=index.tz) 

204 if isinstance(calendar, pd.DatetimeIndex): 204 ↛ 207line 204 didn't jump to line 207 because the condition on line 204 was always true

205 cal = calendar 

206 else: 

207 cal = pd.DatetimeIndex(pd.to_datetime(list(calendar))) 

208 if index.tz is not None: 208 ↛ 214line 208 didn't jump to line 214 because the condition on line 208 was always true

209 if cal.tz is None: 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true

210 cal = cal.tz_localize(index.tz) 

211 else: 

212 cal = cal.tz_convert(index.tz) 

213 else: 

214 if cal.tz is not None: 

215 cal = cal.tz_convert(None) 

216 return cal.intersection(index) 

217 

218 

219def _infer_periods_per_year(index: pd.DatetimeIndex) -> float: 

220 if len(index) < 2: 220 ↛ 221line 220 didn't jump to line 221 because the condition on line 220 was never true

221 return 1.0 

222 diffs = index.to_series().diff().dropna() 

223 if diffs.empty: 223 ↛ 224line 223 didn't jump to line 224 because the condition on line 223 was never true

224 return 1.0 

225 avg_days = diffs.dt.total_seconds().mean() / 86_400.0 

226 if avg_days <= 0: 226 ↛ 227line 226 didn't jump to line 227 because the condition on line 226 was never true

227 return float(len(index)) 

228 return 365.25 / avg_days 

229 

230 

231def _simulate_rebalanced_portfolio( 

232 returns: pd.DataFrame, 

233 weights: pd.Series, 

234 scenario: RebalanceScenario, 

235 *, 

236 initial_nav: float, 

237) -> _ScenarioPath: 

238 if returns.empty: 238 ↛ 239line 238 didn't jump to line 239 because the condition on line 238 was never true

239 empty = pd.Series(dtype=float, index=returns.index) 

240 return _ScenarioPath(nav=empty, returns=empty, total_cost=0.0) 

241 

242 clean_returns = returns.fillna(0.0) 

243 weights = _normalise_weights(weights, clean_returns.columns) 

244 calendar = _prepare_calendar(scenario.calendar, clean_returns.index) 

245 rebalance_mask = pd.Series(clean_returns.index.isin(calendar), index=clean_returns.index) 

246 

247 nav = float(initial_nav) 

248 holdings = weights * nav 

249 nav_path: list[float] = [] 

250 period_returns: list[float] = [] 

251 total_cost = 0.0 

252 cost_rate = float(scenario.cost_bps) / 10_000.0 

253 

254 for timestamp, row in clean_returns.iterrows(): 

255 nav_before = nav 

256 holdings = holdings * (1.0 + row) 

257 nav = float(holdings.sum()) 

258 

259 if rebalance_mask.loc[timestamp]: 

260 if nav > 0.0: 260 ↛ 265line 260 didn't jump to line 265 because the condition on line 260 was always true

261 current_weights = holdings / nav 

262 diff = weights - current_weights 

263 traded_value = float(diff.abs().sum()) * nav 

264 else: 

265 traded_value = 0.0 

266 cost = traded_value * cost_rate 

267 if cost: 

268 nav -= cost 

269 total_cost += cost 

270 holdings = weights * nav 

271 

272 period_return = (nav - nav_before) / nav_before if nav_before != 0 else 0.0 

273 nav_path.append(nav) 

274 period_returns.append(period_return) 

275 

276 nav_series = pd.Series(nav_path, index=clean_returns.index, name="nav") 

277 returns_series = pd.Series(period_returns, index=clean_returns.index, name="return") 

278 return _ScenarioPath(nav=nav_series, returns=returns_series, total_cost=total_cost) 

279 

280 

281def run_rebalance_scenarios( 

282 returns: pd.DataFrame, 

283 weights: pd.Series, 

284 scenarios: Mapping[str, RebalanceScenario], 

285 *, 

286 benchmark: str | None = None, 

287 initial_nav: float = 1.0, 

288) -> ScenarioRunResult: 

289 """Evaluate portfolio performance under alternative rebalance calendars. 

290 

291 Parameters 

292 ---------- 

293 returns: 

294 Wide DataFrame of periodic simple returns with datetime index. 

295 weights: 

296 Target portfolio weights aligned with ``returns`` columns. 

297 scenarios: 

298 Mapping of scenario name to calendar/cost assumptions. 

299 benchmark: 

300 Scenario name used as reference for tracking error. When ``None`` the 

301 benchmark is a frictionless strategy that rebalances every period. 

302 initial_nav: 

303 Starting portfolio value. 

304 

305 Returns 

306 ------- 

307 ScenarioRunResult 

308 Object containing per-scenario metrics plus NAV/return trajectories. 

309 """ 

310 

311 if returns.empty: 

312 empty = pd.DataFrame(index=returns.index) 

313 return ScenarioRunResult( 

314 metrics=pd.DataFrame( 

315 columns=["realized_apy", "total_cost", "tracking_error", "terminal_nav"] 

316 ), 

317 navs=empty, 

318 returns=empty, 

319 ) 

320 

321 if not scenarios: 321 ↛ 322line 321 didn't jump to line 322 because the condition on line 321 was never true

322 raise ValueError("at least one scenario must be provided") 

323 

324 paths: dict[str, _ScenarioPath] = {} 

325 for name, scenario in scenarios.items(): 

326 paths[name] = _simulate_rebalanced_portfolio( 

327 returns, 

328 weights, 

329 scenario, 

330 initial_nav=initial_nav, 

331 ) 

332 

333 if benchmark is not None: 333 ↛ 338line 333 didn't jump to line 338 because the condition on line 333 was always true

334 if benchmark not in paths: 334 ↛ 335line 334 didn't jump to line 335 because the condition on line 334 was never true

335 raise KeyError(f"benchmark '{benchmark}' not found in scenarios") 

336 benchmark_returns = paths[benchmark].returns 

337 else: 

338 benchmark_path = _simulate_rebalanced_portfolio( 

339 returns, 

340 weights, 

341 RebalanceScenario(calendar=returns.index, cost_bps=0.0), 

342 initial_nav=initial_nav, 

343 ) 

344 benchmark_returns = benchmark_path.returns 

345 

346 benchmark_returns = benchmark_returns.reindex(returns.index, fill_value=0.0) 

347 periods_per_year = _infer_periods_per_year(returns.index) 

348 

349 metrics_rows: list[dict[str, float | str]] = [] 

350 nav_data: dict[str, pd.Series] = {} 

351 return_data: dict[str, pd.Series] = {} 

352 

353 for name, path in paths.items(): 

354 nav_series = path.nav.reindex(returns.index, fill_value=float("nan")) 

355 return_series = path.returns.reindex(returns.index, fill_value=0.0) 

356 nav_data[name] = nav_series 

357 return_data[name] = return_series 

358 

359 total_periods = len(return_series) 

360 total_growth = nav_series.iloc[-1] / initial_nav if total_periods else float("nan") 

361 if total_periods and total_growth > 0.0: 361 ↛ 364line 361 didn't jump to line 364 because the condition on line 361 was always true

362 realized_apy = total_growth ** (periods_per_year / total_periods) - 1.0 

363 else: 

364 realized_apy = float("nan") 

365 

366 diff = (return_series - benchmark_returns).fillna(0.0) 

367 if len(diff) > 1: 367 ↛ 370line 367 didn't jump to line 370 because the condition on line 367 was always true

368 tracking_error = float(diff.std(ddof=0) * sqrt(periods_per_year)) 

369 else: 

370 tracking_error = 0.0 

371 

372 metrics_rows.append( 

373 { 

374 "scenario": name, 

375 "realized_apy": float(realized_apy), 

376 "total_cost": float(path.total_cost), 

377 "tracking_error": tracking_error, 

378 "terminal_nav": float(nav_series.iloc[-1]), 

379 } 

380 ) 

381 

382 metrics = pd.DataFrame(metrics_rows).set_index("scenario") 

383 navs = pd.DataFrame(nav_data) 

384 returns_df = pd.DataFrame(return_data) 

385 return ScenarioRunResult(metrics=metrics, navs=navs, returns=returns_df)