Initial commit: MyMidas personal finance tracker

Full-stack self-hosted finance app with FastAPI backend and React frontend. Features: - Accounts, transactions, budgets, investments with GBP base currency - CSV import with auto-detection for 10 UK bank formats - ML predictions: spending forecast, net worth projection, Monte Carlo - 7 selectable themes (Obsidian, Arctic, Midnight, Vault, Terminal, Synthwave, Ledger) - Receipt/document attachments on transactions (JPEG, PNG, WebP, PDF) - AES-256-GCM field encryption, RS256 JWT, TOTP 2FA, RLS, audit log - Encrypted nightly backups + key rotation script - Mobile-responsive layout with bottom nav Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 11:56:10 +00:00 · 2026-04-21 11:56:10 +00:00 · 61a7884ee5
commit 61a7884ee5
127 changed files with 13323 additions and 0 deletions
--- a/backend/app/ml/monte_carlo.py
+++ b/backend/app/ml/monte_carlo.py
@ -0,0 +1,135 @@
+from __future__ import annotations
+
+from datetime import date
+from dateutil.relativedelta import relativedelta
+
+import numpy as np
+import pandas as pd
+
+
+DEFAULT_MU = 0.07 / 12       # 7% annual expected return, monthly
+DEFAULT_SIGMA = 0.15 / (12 ** 0.5)   # 15% annual vol, monthly
+DT = 1.0 / 12
+
+
+def _project_months(from_date: date, n: int) -> list[str]:
+    d = from_date.replace(day=1)
+    return [(d + relativedelta(months=i + 1)).strftime("%Y-%m") for i in range(n)]
+
+
+def run_monte_carlo(
+    prices_df: pd.DataFrame,
+    holdings: list[dict],
+    years: int = 5,
+    n_sims: int = 1000,
+    annual_contribution: float = 0.0,
+) -> dict:
+    """
+    prices_df: columns [symbol, month, close]
+    holdings: [{"symbol": str, "quantity": float, "current_value": float}]
+    Returns percentile paths and summary stats.
+    """
+    n_months = years * 12
+    today = date.today()
+    future_dates = _project_months(today, n_months)
+    monthly_contribution = annual_contribution / 12.0
+
+    symbols = [h["symbol"] for h in holdings]
+    current_values = np.array([float(h.get("current_value") or 0) for h in holdings])
+    total_value = float(current_values.sum())
+
+    if total_value <= 0:
+        return {
+            "dates": future_dates,
+            "percentiles": {},
+            "current_value": 0.0,
+            "expected_value": 0.0,
+            "probability_of_gain": 0.5,
+            "insufficient_data": True,
+        }
+
+    # Compute per-asset parameters from price history
+    n_assets = len(symbols)
+    mus = np.full(n_assets, DEFAULT_MU)
+    sigmas = np.full(n_assets, DEFAULT_SIGMA)
+    corr = np.eye(n_assets)
+
+    if not prices_df.empty:
+        for i, sym in enumerate(symbols):
+            sym_prices = prices_df[prices_df["symbol"] == sym].sort_values("month")
+            if len(sym_prices) >= 3:
+                closes = sym_prices["close"].values.astype(float)
+                log_rets = np.diff(np.log(closes[closes > 0]))
+                if len(log_rets) >= 2:
+                    mus[i] = float(np.mean(log_rets))
+                    sigmas[i] = float(np.std(log_rets))
+
+        # Build correlation matrix from overlapping return series
+        if n_assets > 1:
+            ret_series = {}
+            for sym in symbols:
+                sym_prices = prices_df[prices_df["symbol"] == sym].sort_values("month")
+                if len(sym_prices) >= 3:
+                    closes = sym_prices["close"].values.astype(float)
+                    log_rets = np.diff(np.log(closes[closes > 0]))
+                    ret_series[sym] = log_rets
+
+            if len(ret_series) == n_assets:
+                min_len = min(len(v) for v in ret_series.values())
+                if min_len >= 3:
+                    matrix = np.array([v[-min_len:] for v in ret_series.values()])
+                    corr = np.corrcoef(matrix)
+                    corr = np.clip(corr, -0.99, 0.99)
+                    np.fill_diagonal(corr, 1.0)
+
+    # Covariance matrix and Cholesky decomposition
+    cov = np.outer(sigmas, sigmas) * corr
+    try:
+        L = np.linalg.cholesky(cov)
+    except np.linalg.LinAlgError:
+        # Fall back to diagonal covariance
+        L = np.diag(sigmas)
+
+    # Portfolio weights
+    weights = current_values / total_value
+
+    # GBM simulation
+    rng = np.random.default_rng(42)
+    portfolio_paths = np.zeros((n_sims, n_months))
+
+    for sim in range(n_sims):
+        asset_values = current_values.copy()
+        for t in range(n_months):
+            Z = rng.standard_normal(n_assets)
+            corr_Z = L @ Z
+            # GBM step for each asset
+            asset_values = asset_values * np.exp(
+                (mus - 0.5 * sigmas ** 2) * DT + sigmas * np.sqrt(DT) * corr_Z
+            )
+            port_val = float(asset_values.sum()) + monthly_contribution * (t + 1)
+            portfolio_paths[sim, t] = max(0.0, port_val)
+
+    # Compute percentile paths
+    pcts = {
+        "p10": np.percentile(portfolio_paths, 10, axis=0),
+        "p25": np.percentile(portfolio_paths, 25, axis=0),
+        "p50": np.percentile(portfolio_paths, 50, axis=0),
+        "p75": np.percentile(portfolio_paths, 75, axis=0),
+        "p90": np.percentile(portfolio_paths, 90, axis=0),
+    }
+
+    final_values = portfolio_paths[:, -1]
+    prob_gain = float(np.mean(final_values > total_value))
+    expected_value = float(np.median(final_values))
+
+    return {
+        "dates": future_dates,
+        "percentiles": {
+            k: [{"date": d, "value": round(float(v), 2)} for d, v in zip(future_dates, arr)]
+            for k, arr in pcts.items()
+        },
+        "current_value": round(total_value, 2),
+        "expected_value": round(expected_value, 2),
+        "probability_of_gain": round(prob_gain, 3),
+        "insufficient_data": False,
+    }