Coverage for src/stable_yield_lab/analytics/metrics.py: 77%

115 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-04 20:38 +0000

1from __future__ import annotations 

2 

3from collections.abc import Iterable, Sequence 

4import math 

5from typing import TYPE_CHECKING 

6 

7import pandas as pd 

8 

9if TYPE_CHECKING: 9 ↛ 10line 9 didn't jump to line 10 because the condition on line 9 was never true

10 from ..core import Pool, PoolRepository 

11 

12 

13def _coerce_float(value: object) -> float: 

14 """Best-effort conversion to ``float`` returning ``nan`` on failure.""" 

15 

16 try: 

17 return float(value) # type: ignore[arg-type] 

18 except (TypeError, ValueError): 

19 return float("nan") 

20 

21 

22def weighted_mean(values: Sequence[object], weights: Sequence[object]) -> float: 

23 """Compute a weighted mean while skipping ``NaN`` pairs and zero weight sums.""" 

24 

25 vals = list(values) 

26 wts = list(weights) 

27 if not vals or not wts or len(vals) != len(wts): 

28 return float("nan") 

29 

30 contributions: list[float] = [] 

31 cleaned_weights: list[float] = [] 

32 for raw_value, raw_weight in zip(vals, wts): 

33 value = _coerce_float(raw_value) 

34 weight = _coerce_float(raw_weight) 

35 if math.isnan(value) or math.isnan(weight): 

36 continue 

37 contributions.append(value * weight) 

38 cleaned_weights.append(weight) 

39 

40 if not contributions: 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true

41 return float("nan") 

42 

43 weight_sum = math.fsum(cleaned_weights) 

44 if not math.isfinite(weight_sum) or weight_sum == 0.0: 

45 return float("nan") 

46 

47 numerator = math.fsum(contributions) 

48 if not math.isfinite(numerator): 48 ↛ 49line 48 didn't jump to line 49 because the condition on line 48 was never true

49 return float("nan") 

50 

51 return numerator / weight_sum 

52 

53 

54def net_apy( 

55 base_apy: float, 

56 reward_apy: float = 0.0, 

57 *, 

58 perf_fee_bps: float = 0.0, 

59 mgmt_fee_bps: float = 0.0, 

60) -> float: 

61 """Compute net APY after applying performance and management fees.""" 

62 

63 base = _coerce_float(base_apy) 

64 reward = _coerce_float(reward_apy) 

65 perf = _coerce_float(perf_fee_bps) 

66 mgmt = _coerce_float(mgmt_fee_bps) 

67 

68 if not all(math.isfinite(component) for component in (base, reward, perf, mgmt)): 

69 return float("nan") 

70 

71 gross = base + reward 

72 fee_fraction = (perf + mgmt) / 10_000.0 

73 if not math.isfinite(fee_fraction): 73 ↛ 74line 73 didn't jump to line 74 because the condition on line 73 was never true

74 return float("nan") 

75 

76 net_growth = (1.0 + gross) * (1.0 - fee_fraction) 

77 if not math.isfinite(net_growth): 77 ↛ 78line 77 didn't jump to line 78 because the condition on line 77 was never true

78 return float("nan") 

79 

80 return max(net_growth - 1.0, -1.0) 

81 

82 

83def add_net_apy_column( 

84 df: pd.DataFrame, 

85 *, 

86 perf_fee_bps: float = 0.0, 

87 mgmt_fee_bps: float = 0.0, 

88 out_col: str = "net_apy", 

89) -> pd.DataFrame: 

90 """Append a net APY column computed via :func:`net_apy` for each row.""" 

91 

92 if df.empty: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true

93 return df.copy() 

94 

95 out = df.copy() 

96 out[out_col] = [ 

97 net_apy( 

98 row.get("base_apy", 0.0), 

99 row.get("reward_apy", 0.0), 

100 perf_fee_bps=perf_fee_bps, 

101 mgmt_fee_bps=mgmt_fee_bps, 

102 ) 

103 for _, row in out.iterrows() 

104 ] 

105 return out 

106 

107 

108def hhi(df: pd.DataFrame, value_col: str, group_col: str | None = None) -> pd.DataFrame: 

109 """Compute the Herfindahl–Hirschman Index of concentration.""" 

110 

111 if df.empty: 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true

112 if group_col is None: 

113 return pd.DataFrame({"hhi": pd.Series(dtype=float)}) 

114 return pd.DataFrame(columns=[group_col, "hhi"], dtype=float) 

115 

116 values = pd.to_numeric(df[value_col], errors="coerce") 

117 

118 if group_col is None: 

119 valid = values.dropna() 

120 total = float(valid.sum()) 

121 if total <= 0.0: 

122 return pd.DataFrame({"hhi": [float("nan")]}) 

123 shares = (valid / total) ** 2 

124 return pd.DataFrame({"hhi": [float(shares.sum())]}) 

125 

126 data = pd.DataFrame({group_col: df[group_col], value_col: values}) 

127 

128 def _group_hhi(series: pd.Series) -> float: 

129 valid = pd.to_numeric(series, errors="coerce").dropna() 

130 total = float(valid.sum()) 

131 if total <= 0.0: 

132 return float("nan") 

133 shares = (valid / total) ** 2 

134 return float(shares.sum()) 

135 

136 result = data.groupby(group_col)[value_col].apply(_group_hhi).reset_index(name="hhi") 

137 return result 

138 

139 

140class Metrics: 

141 """Namespace exposing common analytics helpers for backwards compatibility.""" 

142 

143 @staticmethod 

144 def weighted_mean(values: Sequence[object], weights: Sequence[object]) -> float: 

145 return weighted_mean(values, weights) 

146 

147 @staticmethod 

148 def portfolio_apr(pools: Iterable[Pool], weights: Sequence[object] | None = None) -> float: 

149 arr = list(pools) 

150 if not arr: 

151 return float("nan") 

152 vals = [p.base_apy for p in arr] 

153 if weights is None: 

154 weights = [p.tvl_usd for p in arr] 

155 return weighted_mean(vals, list(weights)) 

156 

157 @staticmethod 

158 def groupby_chain(repo: PoolRepository) -> pd.DataFrame: 

159 df = repo.to_dataframe() 

160 if df.empty: 160 ↛ 161line 160 didn't jump to line 161 because the condition on line 160 was never true

161 return df 

162 g = ( 

163 df.groupby("chain") 

164 .agg( 

165 pools=("name", "count"), 

166 tvl=("tvl_usd", "sum"), 

167 apr_avg=("base_apy", "mean"), 

168 apr_wavg=( 

169 "base_apy", 

170 lambda x: weighted_mean( 

171 x.tolist(), 

172 df.loc[x.index, "tvl_usd"].tolist(), 

173 ), 

174 ), 

175 ) 

176 .reset_index() 

177 ) 

178 return g 

179 

180 @staticmethod 

181 def top_n(repo: PoolRepository, n: int = 10, key: str = "base_apy") -> pd.DataFrame: 

182 df = repo.to_dataframe() 

183 if df.empty: 183 ↛ 184line 183 didn't jump to line 184 because the condition on line 183 was never true

184 return df 

185 return df.sort_values(key, ascending=False).head(n) 

186 

187 @staticmethod 

188 def net_apy( 

189 base_apy: float, 

190 reward_apy: float = 0.0, 

191 *, 

192 perf_fee_bps: float = 0.0, 

193 mgmt_fee_bps: float = 0.0, 

194 ) -> float: 

195 return net_apy( 

196 base_apy, 

197 reward_apy, 

198 perf_fee_bps=perf_fee_bps, 

199 mgmt_fee_bps=mgmt_fee_bps, 

200 ) 

201 

202 @staticmethod 

203 def add_net_apy_column( 

204 df: pd.DataFrame, 

205 *, 

206 perf_fee_bps: float = 0.0, 

207 mgmt_fee_bps: float = 0.0, 

208 out_col: str = "net_apy", 

209 ) -> pd.DataFrame: 

210 return add_net_apy_column( 

211 df, 

212 perf_fee_bps=perf_fee_bps, 

213 mgmt_fee_bps=mgmt_fee_bps, 

214 out_col=out_col, 

215 ) 

216 

217 @staticmethod 

218 def hhi(df: pd.DataFrame, value_col: str, group_col: str | None = None) -> pd.DataFrame: 

219 return hhi(df, value_col=value_col, group_col=group_col) 

220 

221 

222__all__ = [ 

223 "Metrics", 

224 "add_net_apy_column", 

225 "hhi", 

226 "net_apy", 

227 "weighted_mean", 

228]