MyMidas/backend/app/ml/monte_carlo.py
megaproxy 2940b120e0 ML predictions Phase 5: Monte Carlo contribution fix and milestone table
Fix contribution compounding: monthly contributions are now added to
asset values before each GBM step so they grow with market returns,
rather than being summed as a static lump at each period.

Add year-by-year milestone table below the fan chart showing P10/P50/P90
portfolio values at each annual checkpoint up to the selected horizon.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 11:02:41 +00:00

149 lines
5.3 KiB
Python

from __future__ import annotations
from datetime import date
from dateutil.relativedelta import relativedelta
import numpy as np
import pandas as pd
DEFAULT_MU = 0.07 / 12 # 7% annual expected return, monthly
DEFAULT_SIGMA = 0.15 / (12 ** 0.5) # 15% annual vol, monthly
DT = 1.0 / 12
def _project_months(from_date: date, n: int) -> list[str]:
d = from_date.replace(day=1)
return [(d + relativedelta(months=i + 1)).strftime("%Y-%m") for i in range(n)]
def run_monte_carlo(
prices_df: pd.DataFrame,
holdings: list[dict],
years: int = 5,
n_sims: int = 1000,
annual_contribution: float = 0.0,
) -> dict:
"""
prices_df: columns [symbol, month, close]
holdings: [{"symbol": str, "quantity": float, "current_value": float}]
Returns percentile paths and summary stats.
"""
n_months = years * 12
today = date.today()
future_dates = _project_months(today, n_months)
monthly_contribution = annual_contribution / 12.0
symbols = [h["symbol"] for h in holdings]
current_values = np.array([float(h.get("current_value") or 0) for h in holdings])
total_value = float(current_values.sum())
if total_value <= 0:
return {
"dates": future_dates,
"percentiles": {},
"current_value": 0.0,
"expected_value": 0.0,
"probability_of_gain": 0.5,
"insufficient_data": True,
}
# Compute per-asset parameters from price history
n_assets = len(symbols)
mus = np.full(n_assets, DEFAULT_MU)
sigmas = np.full(n_assets, DEFAULT_SIGMA)
corr = np.eye(n_assets)
if not prices_df.empty:
for i, sym in enumerate(symbols):
sym_prices = prices_df[prices_df["symbol"] == sym].sort_values("month")
if len(sym_prices) >= 3:
closes = sym_prices["close"].values.astype(float)
log_rets = np.diff(np.log(closes[closes > 0]))
if len(log_rets) >= 2:
mus[i] = float(np.mean(log_rets))
sigmas[i] = float(np.std(log_rets))
# Build correlation matrix from overlapping return series
if n_assets > 1:
ret_series = {}
for sym in symbols:
sym_prices = prices_df[prices_df["symbol"] == sym].sort_values("month")
if len(sym_prices) >= 3:
closes = sym_prices["close"].values.astype(float)
log_rets = np.diff(np.log(closes[closes > 0]))
ret_series[sym] = log_rets
if len(ret_series) == n_assets:
min_len = min(len(v) for v in ret_series.values())
if min_len >= 3:
matrix = np.array([v[-min_len:] for v in ret_series.values()])
corr = np.corrcoef(matrix)
corr = np.clip(corr, -0.99, 0.99)
np.fill_diagonal(corr, 1.0)
# Covariance matrix and Cholesky decomposition
cov = np.outer(sigmas, sigmas) * corr
try:
L = np.linalg.cholesky(cov)
except np.linalg.LinAlgError:
# Fall back to diagonal covariance
L = np.diag(sigmas)
# Portfolio weights
weights = current_values / total_value
# GBM simulation
rng = np.random.default_rng(42)
portfolio_paths = np.zeros((n_sims, n_months))
for sim in range(n_sims):
asset_values = current_values.copy()
for t in range(n_months):
# Add monthly contribution before GBM step so it compounds
if monthly_contribution > 0:
asset_values = asset_values + weights * monthly_contribution
Z = rng.standard_normal(n_assets)
corr_Z = L @ Z
asset_values = asset_values * np.exp(
(mus - 0.5 * sigmas ** 2) * DT + sigmas * np.sqrt(DT) * corr_Z
)
portfolio_paths[sim, t] = max(0.0, float(asset_values.sum()))
# Compute percentile paths
pcts = {
"p10": np.percentile(portfolio_paths, 10, axis=0),
"p25": np.percentile(portfolio_paths, 25, axis=0),
"p50": np.percentile(portfolio_paths, 50, axis=0),
"p75": np.percentile(portfolio_paths, 75, axis=0),
"p90": np.percentile(portfolio_paths, 90, axis=0),
}
final_values = portfolio_paths[:, -1]
prob_gain = float(np.mean(final_values > total_value))
expected_value = float(np.median(final_values))
# Year-by-year milestones (at each 12-month boundary)
milestones = []
for yr in range(1, years + 1):
idx = min(yr * 12 - 1, n_months - 1)
milestones.append({
"year": yr,
"date": future_dates[idx],
"p10": round(float(np.percentile(portfolio_paths[:, idx], 10)), 2),
"p50": round(float(np.percentile(portfolio_paths[:, idx], 50)), 2),
"p90": round(float(np.percentile(portfolio_paths[:, idx], 90)), 2),
})
return {
"dates": future_dates,
"percentiles": {
k: [{"date": d, "value": round(float(v), 2)} for d, v in zip(future_dates, arr)]
for k, arr in pcts.items()
},
"current_value": round(total_value, 2),
"expected_value": round(expected_value, 2),
"probability_of_gain": round(prob_gain, 3),
"milestones": milestones,
"insufficient_data": False,
}