Initial commit: MyMidas personal finance tracker
Full-stack self-hosted finance app with FastAPI backend and React frontend. Features: - Accounts, transactions, budgets, investments with GBP base currency - CSV import with auto-detection for 10 UK bank formats - ML predictions: spending forecast, net worth projection, Monte Carlo - 7 selectable themes (Obsidian, Arctic, Midnight, Vault, Terminal, Synthwave, Ledger) - Receipt/document attachments on transactions (JPEG, PNG, WebP, PDF) - AES-256-GCM field encryption, RS256 JWT, TOTP 2FA, RLS, audit log - Encrypted nightly backups + key rotation script - Mobile-responsive layout with bottom nav Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
61a7884ee5
127 changed files with 13323 additions and 0 deletions
0
backend/app/ml/__init__.py
Normal file
0
backend/app/ml/__init__.py
Normal file
119
backend/app/ml/feature_engineering.py
Normal file
119
backend/app/ml/feature_engineering.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
|
||||
async def get_monthly_category_spending(db: AsyncSession, user_id: str) -> pd.DataFrame:
|
||||
result = await db.execute(text("""
|
||||
SELECT
|
||||
COALESCE(t.category_id::text, 'uncategorised') AS category_id,
|
||||
COALESCE(c.name, 'Uncategorised') AS category_name,
|
||||
DATE_TRUNC('month', t.date)::date AS ds,
|
||||
SUM(ABS(t.amount))::float AS y
|
||||
FROM transactions t
|
||||
LEFT JOIN categories c ON c.id = t.category_id
|
||||
WHERE t.user_id = CAST(:uid AS uuid)
|
||||
AND t.type = 'expense'
|
||||
AND t.deleted_at IS NULL
|
||||
AND t.status != 'void'
|
||||
GROUP BY t.category_id, c.name, DATE_TRUNC('month', t.date)
|
||||
ORDER BY ds ASC
|
||||
"""), {"uid": str(user_id)})
|
||||
rows = result.fetchall()
|
||||
if not rows:
|
||||
return pd.DataFrame(columns=["category_id", "category_name", "ds", "y"])
|
||||
df = pd.DataFrame(rows, columns=["category_id", "category_name", "ds", "y"])
|
||||
df["ds"] = pd.to_datetime(df["ds"])
|
||||
df["y"] = df["y"].astype(float)
|
||||
return df
|
||||
|
||||
|
||||
async def get_monthly_net_worth(db: AsyncSession, user_id: str) -> pd.DataFrame:
|
||||
result = await db.execute(text("""
|
||||
SELECT date::text AS ds, net_worth::float AS y
|
||||
FROM net_worth_snapshots
|
||||
WHERE user_id = CAST(:uid AS uuid)
|
||||
ORDER BY date ASC
|
||||
"""), {"uid": str(user_id)})
|
||||
rows = result.fetchall()
|
||||
if not rows:
|
||||
return pd.DataFrame(columns=["ds", "y"])
|
||||
df = pd.DataFrame(rows, columns=["ds", "y"])
|
||||
df["ds"] = pd.to_datetime(df["ds"])
|
||||
df["y"] = df["y"].astype(float)
|
||||
# Resample to monthly end, keeping last value
|
||||
df = df.set_index("ds").resample("ME").last().dropna().reset_index()
|
||||
df.columns = ["ds", "y"]
|
||||
return df
|
||||
|
||||
|
||||
async def get_current_month_spending(db: AsyncSession, user_id: str) -> pd.DataFrame:
|
||||
result = await db.execute(text("""
|
||||
SELECT
|
||||
COALESCE(t.category_id::text, 'uncategorised') AS category_id,
|
||||
COALESCE(c.name, 'Uncategorised') AS category_name,
|
||||
SUM(ABS(t.amount))::float AS spent
|
||||
FROM transactions t
|
||||
LEFT JOIN categories c ON c.id = t.category_id
|
||||
WHERE t.user_id = CAST(:uid AS uuid)
|
||||
AND t.type = 'expense'
|
||||
AND t.deleted_at IS NULL
|
||||
AND t.status != 'void'
|
||||
AND DATE_TRUNC('month', t.date) = DATE_TRUNC('month', CURRENT_DATE)
|
||||
GROUP BY t.category_id, c.name
|
||||
"""), {"uid": str(user_id)})
|
||||
rows = result.fetchall()
|
||||
if not rows:
|
||||
return pd.DataFrame(columns=["category_id", "category_name", "spent"])
|
||||
df = pd.DataFrame(rows, columns=["category_id", "category_name", "spent"])
|
||||
df["spent"] = df["spent"].astype(float)
|
||||
return df
|
||||
|
||||
|
||||
async def get_portfolio_monthly_returns(db: AsyncSession, user_id: str) -> pd.DataFrame:
|
||||
"""Monthly close prices for each asset in user's portfolio."""
|
||||
result = await db.execute(text("""
|
||||
SELECT
|
||||
a.symbol,
|
||||
DATE_TRUNC('month', ap.date)::date AS month,
|
||||
(ARRAY_AGG(ap.close ORDER BY ap.date DESC))[1]::float AS close
|
||||
FROM investment_holdings h
|
||||
JOIN assets a ON a.id = h.asset_id
|
||||
JOIN asset_prices ap ON ap.asset_id = h.asset_id
|
||||
WHERE h.user_id = CAST(:uid AS uuid)
|
||||
AND h.deleted_at IS NULL
|
||||
GROUP BY a.symbol, DATE_TRUNC('month', ap.date)
|
||||
ORDER BY a.symbol, month ASC
|
||||
"""), {"uid": str(user_id)})
|
||||
rows = result.fetchall()
|
||||
if not rows:
|
||||
return pd.DataFrame(columns=["symbol", "month", "close"])
|
||||
df = pd.DataFrame(rows, columns=["symbol", "month", "close"])
|
||||
df["month"] = pd.to_datetime(df["month"])
|
||||
df["close"] = df["close"].astype(float)
|
||||
return df
|
||||
|
||||
|
||||
async def get_daily_cash_flow(db: AsyncSession, user_id: str, days: int = 90) -> pd.DataFrame:
|
||||
result = await db.execute(text("""
|
||||
SELECT
|
||||
t.date::date AS ds,
|
||||
SUM(CASE WHEN t.amount > 0 THEN t.amount ELSE 0 END)::float AS inflow,
|
||||
SUM(CASE WHEN t.amount < 0 THEN ABS(t.amount) ELSE 0 END)::float AS outflow
|
||||
FROM transactions t
|
||||
WHERE t.user_id = CAST(:uid AS uuid)
|
||||
AND t.deleted_at IS NULL
|
||||
AND t.status != 'void'
|
||||
AND t.type IN ('income', 'expense')
|
||||
AND t.date >= CURRENT_DATE - :days
|
||||
GROUP BY t.date
|
||||
ORDER BY t.date ASC
|
||||
"""), {"uid": str(user_id), "days": days})
|
||||
rows = result.fetchall()
|
||||
if not rows:
|
||||
return pd.DataFrame(columns=["ds", "inflow", "outflow"])
|
||||
df = pd.DataFrame(rows, columns=["ds", "inflow", "outflow"])
|
||||
df["ds"] = pd.to_datetime(df["ds"])
|
||||
return df
|
||||
135
backend/app/ml/monte_carlo.py
Normal file
135
backend/app/ml/monte_carlo.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
DEFAULT_MU = 0.07 / 12 # 7% annual expected return, monthly
|
||||
DEFAULT_SIGMA = 0.15 / (12 ** 0.5) # 15% annual vol, monthly
|
||||
DT = 1.0 / 12
|
||||
|
||||
|
||||
def _project_months(from_date: date, n: int) -> list[str]:
|
||||
d = from_date.replace(day=1)
|
||||
return [(d + relativedelta(months=i + 1)).strftime("%Y-%m") for i in range(n)]
|
||||
|
||||
|
||||
def run_monte_carlo(
|
||||
prices_df: pd.DataFrame,
|
||||
holdings: list[dict],
|
||||
years: int = 5,
|
||||
n_sims: int = 1000,
|
||||
annual_contribution: float = 0.0,
|
||||
) -> dict:
|
||||
"""
|
||||
prices_df: columns [symbol, month, close]
|
||||
holdings: [{"symbol": str, "quantity": float, "current_value": float}]
|
||||
Returns percentile paths and summary stats.
|
||||
"""
|
||||
n_months = years * 12
|
||||
today = date.today()
|
||||
future_dates = _project_months(today, n_months)
|
||||
monthly_contribution = annual_contribution / 12.0
|
||||
|
||||
symbols = [h["symbol"] for h in holdings]
|
||||
current_values = np.array([float(h.get("current_value") or 0) for h in holdings])
|
||||
total_value = float(current_values.sum())
|
||||
|
||||
if total_value <= 0:
|
||||
return {
|
||||
"dates": future_dates,
|
||||
"percentiles": {},
|
||||
"current_value": 0.0,
|
||||
"expected_value": 0.0,
|
||||
"probability_of_gain": 0.5,
|
||||
"insufficient_data": True,
|
||||
}
|
||||
|
||||
# Compute per-asset parameters from price history
|
||||
n_assets = len(symbols)
|
||||
mus = np.full(n_assets, DEFAULT_MU)
|
||||
sigmas = np.full(n_assets, DEFAULT_SIGMA)
|
||||
corr = np.eye(n_assets)
|
||||
|
||||
if not prices_df.empty:
|
||||
for i, sym in enumerate(symbols):
|
||||
sym_prices = prices_df[prices_df["symbol"] == sym].sort_values("month")
|
||||
if len(sym_prices) >= 3:
|
||||
closes = sym_prices["close"].values.astype(float)
|
||||
log_rets = np.diff(np.log(closes[closes > 0]))
|
||||
if len(log_rets) >= 2:
|
||||
mus[i] = float(np.mean(log_rets))
|
||||
sigmas[i] = float(np.std(log_rets))
|
||||
|
||||
# Build correlation matrix from overlapping return series
|
||||
if n_assets > 1:
|
||||
ret_series = {}
|
||||
for sym in symbols:
|
||||
sym_prices = prices_df[prices_df["symbol"] == sym].sort_values("month")
|
||||
if len(sym_prices) >= 3:
|
||||
closes = sym_prices["close"].values.astype(float)
|
||||
log_rets = np.diff(np.log(closes[closes > 0]))
|
||||
ret_series[sym] = log_rets
|
||||
|
||||
if len(ret_series) == n_assets:
|
||||
min_len = min(len(v) for v in ret_series.values())
|
||||
if min_len >= 3:
|
||||
matrix = np.array([v[-min_len:] for v in ret_series.values()])
|
||||
corr = np.corrcoef(matrix)
|
||||
corr = np.clip(corr, -0.99, 0.99)
|
||||
np.fill_diagonal(corr, 1.0)
|
||||
|
||||
# Covariance matrix and Cholesky decomposition
|
||||
cov = np.outer(sigmas, sigmas) * corr
|
||||
try:
|
||||
L = np.linalg.cholesky(cov)
|
||||
except np.linalg.LinAlgError:
|
||||
# Fall back to diagonal covariance
|
||||
L = np.diag(sigmas)
|
||||
|
||||
# Portfolio weights
|
||||
weights = current_values / total_value
|
||||
|
||||
# GBM simulation
|
||||
rng = np.random.default_rng(42)
|
||||
portfolio_paths = np.zeros((n_sims, n_months))
|
||||
|
||||
for sim in range(n_sims):
|
||||
asset_values = current_values.copy()
|
||||
for t in range(n_months):
|
||||
Z = rng.standard_normal(n_assets)
|
||||
corr_Z = L @ Z
|
||||
# GBM step for each asset
|
||||
asset_values = asset_values * np.exp(
|
||||
(mus - 0.5 * sigmas ** 2) * DT + sigmas * np.sqrt(DT) * corr_Z
|
||||
)
|
||||
port_val = float(asset_values.sum()) + monthly_contribution * (t + 1)
|
||||
portfolio_paths[sim, t] = max(0.0, port_val)
|
||||
|
||||
# Compute percentile paths
|
||||
pcts = {
|
||||
"p10": np.percentile(portfolio_paths, 10, axis=0),
|
||||
"p25": np.percentile(portfolio_paths, 25, axis=0),
|
||||
"p50": np.percentile(portfolio_paths, 50, axis=0),
|
||||
"p75": np.percentile(portfolio_paths, 75, axis=0),
|
||||
"p90": np.percentile(portfolio_paths, 90, axis=0),
|
||||
}
|
||||
|
||||
final_values = portfolio_paths[:, -1]
|
||||
prob_gain = float(np.mean(final_values > total_value))
|
||||
expected_value = float(np.median(final_values))
|
||||
|
||||
return {
|
||||
"dates": future_dates,
|
||||
"percentiles": {
|
||||
k: [{"date": d, "value": round(float(v), 2)} for d, v in zip(future_dates, arr)]
|
||||
for k, arr in pcts.items()
|
||||
},
|
||||
"current_value": round(total_value, 2),
|
||||
"expected_value": round(expected_value, 2),
|
||||
"probability_of_gain": round(prob_gain, 3),
|
||||
"insufficient_data": False,
|
||||
}
|
||||
102
backend/app/ml/net_worth_projection.py
Normal file
102
backend/app/ml/net_worth_projection.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from datetime import date
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
|
||||
def _project_months(from_date: date, n: int) -> list[str]:
|
||||
months = []
|
||||
d = from_date.replace(day=1)
|
||||
for i in range(1, n + 1):
|
||||
months.append((d + relativedelta(months=i)).strftime("%Y-%m"))
|
||||
return months
|
||||
|
||||
|
||||
def project_net_worth(df: pd.DataFrame, years: int = 5) -> dict:
|
||||
"""
|
||||
df columns: ds (monthly datetime), y (net_worth float)
|
||||
Returns history + 3-scenario projections.
|
||||
"""
|
||||
n_months = years * 12
|
||||
today = date.today()
|
||||
future_dates = _project_months(today, n_months)
|
||||
|
||||
history = [
|
||||
{"date": row["ds"].strftime("%Y-%m"), "value": round(float(row["y"]), 2)}
|
||||
for _, row in df.iterrows()
|
||||
]
|
||||
|
||||
if df.empty or len(df) < 2:
|
||||
# No data — return flat projection from 0
|
||||
last_val = float(df["y"].iloc[-1]) if not df.empty else 0.0
|
||||
flat = [{"date": d, "value": round(last_val, 2)} for d in future_dates]
|
||||
return {
|
||||
"history": history,
|
||||
"projections": {"conservative": flat, "base": flat, "optimistic": flat},
|
||||
"insufficient_data": True,
|
||||
}
|
||||
|
||||
try:
|
||||
from statsmodels.tsa.holtwinters import ExponentialSmoothing
|
||||
|
||||
values = df["y"].tolist()
|
||||
|
||||
if len(values) >= 12:
|
||||
model = ExponentialSmoothing(values, trend="add", seasonal="add", seasonal_periods=12)
|
||||
elif len(values) >= 4:
|
||||
model = ExponentialSmoothing(values, trend="add", seasonal=None)
|
||||
else:
|
||||
model = ExponentialSmoothing(values, trend="add", seasonal=None)
|
||||
|
||||
fit = model.fit(optimized=True, disp=False)
|
||||
base_fcast = fit.forecast(n_months)
|
||||
|
||||
# Estimate monthly trend from the fit
|
||||
monthly_trend = float(np.mean(np.diff(base_fcast[:12]))) if len(base_fcast) >= 12 else 0.0
|
||||
last_val = float(values[-1])
|
||||
|
||||
# Scale trends for scenarios
|
||||
def build_scenario(scale: float) -> list[dict]:
|
||||
pts = []
|
||||
v = last_val
|
||||
for i, d in enumerate(future_dates):
|
||||
v = float(base_fcast[i]) + (scale - 1.0) * monthly_trend * (i + 1)
|
||||
pts.append({"date": d, "value": round(v, 2)})
|
||||
return pts
|
||||
|
||||
return {
|
||||
"history": history,
|
||||
"projections": {
|
||||
"conservative": build_scenario(0.5),
|
||||
"base": [{"date": d, "value": round(float(v), 2)} for d, v in zip(future_dates, base_fcast)],
|
||||
"optimistic": build_scenario(1.5),
|
||||
},
|
||||
"insufficient_data": False,
|
||||
}
|
||||
|
||||
except Exception:
|
||||
# Fallback: linear trend from last 2 values
|
||||
trend = float(df["y"].iloc[-1]) - float(df["y"].iloc[-2])
|
||||
last_val = float(df["y"].iloc[-1])
|
||||
|
||||
def linear_scenario(t_scale: float) -> list[dict]:
|
||||
return [
|
||||
{"date": d, "value": round(last_val + t_scale * trend * (i + 1), 2)}
|
||||
for i, d in enumerate(future_dates)
|
||||
]
|
||||
|
||||
return {
|
||||
"history": history,
|
||||
"projections": {
|
||||
"conservative": linear_scenario(0.5),
|
||||
"base": linear_scenario(1.0),
|
||||
"optimistic": linear_scenario(1.5),
|
||||
},
|
||||
"insufficient_data": False,
|
||||
}
|
||||
91
backend/app/ml/spending_forecast.py
Normal file
91
backend/app/ml/spending_forecast.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from datetime import date
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
MIN_POINTS = 3
|
||||
FORECAST_MONTHS = 3
|
||||
|
||||
|
||||
def _next_month_starts(from_date: date, n: int) -> list[str]:
|
||||
months = []
|
||||
d = (from_date.replace(day=1) + relativedelta(months=1))
|
||||
for _ in range(n):
|
||||
months.append(d.strftime("%Y-%m-%d"))
|
||||
d += relativedelta(months=1)
|
||||
return months
|
||||
|
||||
|
||||
def _fit_holt(values: list[float], n: int) -> tuple[list[float], list[float], list[float]]:
|
||||
from statsmodels.tsa.holtwinters import ExponentialSmoothing
|
||||
|
||||
try:
|
||||
if len(values) >= 12:
|
||||
model = ExponentialSmoothing(values, trend="add", seasonal="add", seasonal_periods=12)
|
||||
elif len(values) >= 4:
|
||||
model = ExponentialSmoothing(values, trend="add", seasonal=None)
|
||||
else:
|
||||
model = ExponentialSmoothing(values, trend=None, seasonal=None)
|
||||
|
||||
fit = model.fit(optimized=True, disp=False)
|
||||
forecast = fit.forecast(n)
|
||||
sigma = float(np.std(fit.resid)) if len(fit.resid) > 1 else float(np.mean(values) * 0.15)
|
||||
lower = np.maximum(0, forecast - 1.28 * sigma)
|
||||
upper = forecast + 1.28 * sigma
|
||||
return forecast.tolist(), lower.tolist(), upper.tolist()
|
||||
except Exception:
|
||||
avg = float(np.mean(values))
|
||||
sigma = float(np.std(values)) if len(values) > 1 else avg * 0.15
|
||||
return [avg] * n, [max(0, avg - 1.28 * sigma)] * n, [(avg + 1.28 * sigma)] * n
|
||||
|
||||
|
||||
def forecast_spending(df: pd.DataFrame) -> list[dict]:
|
||||
"""
|
||||
df columns: category_id, category_name, ds (monthly), y (amount)
|
||||
Returns list of category forecast dicts.
|
||||
"""
|
||||
if df.empty:
|
||||
return []
|
||||
|
||||
today = date.today()
|
||||
future_dates = _next_month_starts(today, FORECAST_MONTHS)
|
||||
results = []
|
||||
|
||||
for (cat_id, cat_name), group in df.groupby(["category_id", "category_name"]):
|
||||
group = group.sort_values("ds")
|
||||
values = group["y"].tolist()
|
||||
actuals = [
|
||||
{"date": row["ds"].strftime("%Y-%m-%d"), "amount": row["y"]}
|
||||
for _, row in group.iterrows()
|
||||
]
|
||||
|
||||
if len(values) < MIN_POINTS:
|
||||
avg = float(np.mean(values))
|
||||
forecast_pts = [
|
||||
{"date": d, "amount": round(avg, 2), "lower": round(avg * 0.7, 2), "upper": round(avg * 1.3, 2)}
|
||||
for d in future_dates
|
||||
]
|
||||
else:
|
||||
fcast, lower, upper = _fit_holt(values, FORECAST_MONTHS)
|
||||
forecast_pts = [
|
||||
{"date": d, "amount": round(max(0, f), 2), "lower": round(l, 2), "upper": round(u, 2)}
|
||||
for d, f, l, u in zip(future_dates, fcast, lower, upper)
|
||||
]
|
||||
|
||||
results.append({
|
||||
"category_id": cat_id,
|
||||
"category_name": cat_name,
|
||||
"monthly_avg": round(float(np.mean(values)), 2),
|
||||
"actuals": actuals[-6:], # last 6 months for display
|
||||
"forecast": forecast_pts,
|
||||
})
|
||||
|
||||
# Sort by monthly_avg descending (highest spend first)
|
||||
results.sort(key=lambda x: x["monthly_avg"], reverse=True)
|
||||
return results
|
||||
Loading…
Add table
Add a link
Reference in a new issue