Initial commit: MyMidas personal finance tracker

Full-stack self-hosted finance app with FastAPI backend and React frontend.

Features:
- Accounts, transactions, budgets, investments with GBP base currency
- CSV import with auto-detection for 10 UK bank formats
- ML predictions: spending forecast, net worth projection, Monte Carlo
- 7 selectable themes (Obsidian, Arctic, Midnight, Vault, Terminal, Synthwave, Ledger)
- Receipt/document attachments on transactions (JPEG, PNG, WebP, PDF)
- AES-256-GCM field encryption, RS256 JWT, TOTP 2FA, RLS, audit log
- Encrypted nightly backups + key rotation script
- Mobile-responsive layout with bottom nav

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
megaproxy 2026-04-21 11:56:10 +00:00
commit 61a7884ee5
127 changed files with 13323 additions and 0 deletions

View file

View file

@ -0,0 +1,119 @@
from __future__ import annotations
import pandas as pd
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
async def get_monthly_category_spending(db: AsyncSession, user_id: str) -> pd.DataFrame:
result = await db.execute(text("""
SELECT
COALESCE(t.category_id::text, 'uncategorised') AS category_id,
COALESCE(c.name, 'Uncategorised') AS category_name,
DATE_TRUNC('month', t.date)::date AS ds,
SUM(ABS(t.amount))::float AS y
FROM transactions t
LEFT JOIN categories c ON c.id = t.category_id
WHERE t.user_id = CAST(:uid AS uuid)
AND t.type = 'expense'
AND t.deleted_at IS NULL
AND t.status != 'void'
GROUP BY t.category_id, c.name, DATE_TRUNC('month', t.date)
ORDER BY ds ASC
"""), {"uid": str(user_id)})
rows = result.fetchall()
if not rows:
return pd.DataFrame(columns=["category_id", "category_name", "ds", "y"])
df = pd.DataFrame(rows, columns=["category_id", "category_name", "ds", "y"])
df["ds"] = pd.to_datetime(df["ds"])
df["y"] = df["y"].astype(float)
return df
async def get_monthly_net_worth(db: AsyncSession, user_id: str) -> pd.DataFrame:
result = await db.execute(text("""
SELECT date::text AS ds, net_worth::float AS y
FROM net_worth_snapshots
WHERE user_id = CAST(:uid AS uuid)
ORDER BY date ASC
"""), {"uid": str(user_id)})
rows = result.fetchall()
if not rows:
return pd.DataFrame(columns=["ds", "y"])
df = pd.DataFrame(rows, columns=["ds", "y"])
df["ds"] = pd.to_datetime(df["ds"])
df["y"] = df["y"].astype(float)
# Resample to monthly end, keeping last value
df = df.set_index("ds").resample("ME").last().dropna().reset_index()
df.columns = ["ds", "y"]
return df
async def get_current_month_spending(db: AsyncSession, user_id: str) -> pd.DataFrame:
result = await db.execute(text("""
SELECT
COALESCE(t.category_id::text, 'uncategorised') AS category_id,
COALESCE(c.name, 'Uncategorised') AS category_name,
SUM(ABS(t.amount))::float AS spent
FROM transactions t
LEFT JOIN categories c ON c.id = t.category_id
WHERE t.user_id = CAST(:uid AS uuid)
AND t.type = 'expense'
AND t.deleted_at IS NULL
AND t.status != 'void'
AND DATE_TRUNC('month', t.date) = DATE_TRUNC('month', CURRENT_DATE)
GROUP BY t.category_id, c.name
"""), {"uid": str(user_id)})
rows = result.fetchall()
if not rows:
return pd.DataFrame(columns=["category_id", "category_name", "spent"])
df = pd.DataFrame(rows, columns=["category_id", "category_name", "spent"])
df["spent"] = df["spent"].astype(float)
return df
async def get_portfolio_monthly_returns(db: AsyncSession, user_id: str) -> pd.DataFrame:
"""Monthly close prices for each asset in user's portfolio."""
result = await db.execute(text("""
SELECT
a.symbol,
DATE_TRUNC('month', ap.date)::date AS month,
(ARRAY_AGG(ap.close ORDER BY ap.date DESC))[1]::float AS close
FROM investment_holdings h
JOIN assets a ON a.id = h.asset_id
JOIN asset_prices ap ON ap.asset_id = h.asset_id
WHERE h.user_id = CAST(:uid AS uuid)
AND h.deleted_at IS NULL
GROUP BY a.symbol, DATE_TRUNC('month', ap.date)
ORDER BY a.symbol, month ASC
"""), {"uid": str(user_id)})
rows = result.fetchall()
if not rows:
return pd.DataFrame(columns=["symbol", "month", "close"])
df = pd.DataFrame(rows, columns=["symbol", "month", "close"])
df["month"] = pd.to_datetime(df["month"])
df["close"] = df["close"].astype(float)
return df
async def get_daily_cash_flow(db: AsyncSession, user_id: str, days: int = 90) -> pd.DataFrame:
result = await db.execute(text("""
SELECT
t.date::date AS ds,
SUM(CASE WHEN t.amount > 0 THEN t.amount ELSE 0 END)::float AS inflow,
SUM(CASE WHEN t.amount < 0 THEN ABS(t.amount) ELSE 0 END)::float AS outflow
FROM transactions t
WHERE t.user_id = CAST(:uid AS uuid)
AND t.deleted_at IS NULL
AND t.status != 'void'
AND t.type IN ('income', 'expense')
AND t.date >= CURRENT_DATE - :days
GROUP BY t.date
ORDER BY t.date ASC
"""), {"uid": str(user_id), "days": days})
rows = result.fetchall()
if not rows:
return pd.DataFrame(columns=["ds", "inflow", "outflow"])
df = pd.DataFrame(rows, columns=["ds", "inflow", "outflow"])
df["ds"] = pd.to_datetime(df["ds"])
return df

View file

@ -0,0 +1,135 @@
from __future__ import annotations
from datetime import date
from dateutil.relativedelta import relativedelta
import numpy as np
import pandas as pd
DEFAULT_MU = 0.07 / 12 # 7% annual expected return, monthly
DEFAULT_SIGMA = 0.15 / (12 ** 0.5) # 15% annual vol, monthly
DT = 1.0 / 12
def _project_months(from_date: date, n: int) -> list[str]:
d = from_date.replace(day=1)
return [(d + relativedelta(months=i + 1)).strftime("%Y-%m") for i in range(n)]
def run_monte_carlo(
prices_df: pd.DataFrame,
holdings: list[dict],
years: int = 5,
n_sims: int = 1000,
annual_contribution: float = 0.0,
) -> dict:
"""
prices_df: columns [symbol, month, close]
holdings: [{"symbol": str, "quantity": float, "current_value": float}]
Returns percentile paths and summary stats.
"""
n_months = years * 12
today = date.today()
future_dates = _project_months(today, n_months)
monthly_contribution = annual_contribution / 12.0
symbols = [h["symbol"] for h in holdings]
current_values = np.array([float(h.get("current_value") or 0) for h in holdings])
total_value = float(current_values.sum())
if total_value <= 0:
return {
"dates": future_dates,
"percentiles": {},
"current_value": 0.0,
"expected_value": 0.0,
"probability_of_gain": 0.5,
"insufficient_data": True,
}
# Compute per-asset parameters from price history
n_assets = len(symbols)
mus = np.full(n_assets, DEFAULT_MU)
sigmas = np.full(n_assets, DEFAULT_SIGMA)
corr = np.eye(n_assets)
if not prices_df.empty:
for i, sym in enumerate(symbols):
sym_prices = prices_df[prices_df["symbol"] == sym].sort_values("month")
if len(sym_prices) >= 3:
closes = sym_prices["close"].values.astype(float)
log_rets = np.diff(np.log(closes[closes > 0]))
if len(log_rets) >= 2:
mus[i] = float(np.mean(log_rets))
sigmas[i] = float(np.std(log_rets))
# Build correlation matrix from overlapping return series
if n_assets > 1:
ret_series = {}
for sym in symbols:
sym_prices = prices_df[prices_df["symbol"] == sym].sort_values("month")
if len(sym_prices) >= 3:
closes = sym_prices["close"].values.astype(float)
log_rets = np.diff(np.log(closes[closes > 0]))
ret_series[sym] = log_rets
if len(ret_series) == n_assets:
min_len = min(len(v) for v in ret_series.values())
if min_len >= 3:
matrix = np.array([v[-min_len:] for v in ret_series.values()])
corr = np.corrcoef(matrix)
corr = np.clip(corr, -0.99, 0.99)
np.fill_diagonal(corr, 1.0)
# Covariance matrix and Cholesky decomposition
cov = np.outer(sigmas, sigmas) * corr
try:
L = np.linalg.cholesky(cov)
except np.linalg.LinAlgError:
# Fall back to diagonal covariance
L = np.diag(sigmas)
# Portfolio weights
weights = current_values / total_value
# GBM simulation
rng = np.random.default_rng(42)
portfolio_paths = np.zeros((n_sims, n_months))
for sim in range(n_sims):
asset_values = current_values.copy()
for t in range(n_months):
Z = rng.standard_normal(n_assets)
corr_Z = L @ Z
# GBM step for each asset
asset_values = asset_values * np.exp(
(mus - 0.5 * sigmas ** 2) * DT + sigmas * np.sqrt(DT) * corr_Z
)
port_val = float(asset_values.sum()) + monthly_contribution * (t + 1)
portfolio_paths[sim, t] = max(0.0, port_val)
# Compute percentile paths
pcts = {
"p10": np.percentile(portfolio_paths, 10, axis=0),
"p25": np.percentile(portfolio_paths, 25, axis=0),
"p50": np.percentile(portfolio_paths, 50, axis=0),
"p75": np.percentile(portfolio_paths, 75, axis=0),
"p90": np.percentile(portfolio_paths, 90, axis=0),
}
final_values = portfolio_paths[:, -1]
prob_gain = float(np.mean(final_values > total_value))
expected_value = float(np.median(final_values))
return {
"dates": future_dates,
"percentiles": {
k: [{"date": d, "value": round(float(v), 2)} for d, v in zip(future_dates, arr)]
for k, arr in pcts.items()
},
"current_value": round(total_value, 2),
"expected_value": round(expected_value, 2),
"probability_of_gain": round(prob_gain, 3),
"insufficient_data": False,
}

View file

@ -0,0 +1,102 @@
from __future__ import annotations
import warnings
from datetime import date
from dateutil.relativedelta import relativedelta
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")
def _project_months(from_date: date, n: int) -> list[str]:
months = []
d = from_date.replace(day=1)
for i in range(1, n + 1):
months.append((d + relativedelta(months=i)).strftime("%Y-%m"))
return months
def project_net_worth(df: pd.DataFrame, years: int = 5) -> dict:
"""
df columns: ds (monthly datetime), y (net_worth float)
Returns history + 3-scenario projections.
"""
n_months = years * 12
today = date.today()
future_dates = _project_months(today, n_months)
history = [
{"date": row["ds"].strftime("%Y-%m"), "value": round(float(row["y"]), 2)}
for _, row in df.iterrows()
]
if df.empty or len(df) < 2:
# No data — return flat projection from 0
last_val = float(df["y"].iloc[-1]) if not df.empty else 0.0
flat = [{"date": d, "value": round(last_val, 2)} for d in future_dates]
return {
"history": history,
"projections": {"conservative": flat, "base": flat, "optimistic": flat},
"insufficient_data": True,
}
try:
from statsmodels.tsa.holtwinters import ExponentialSmoothing
values = df["y"].tolist()
if len(values) >= 12:
model = ExponentialSmoothing(values, trend="add", seasonal="add", seasonal_periods=12)
elif len(values) >= 4:
model = ExponentialSmoothing(values, trend="add", seasonal=None)
else:
model = ExponentialSmoothing(values, trend="add", seasonal=None)
fit = model.fit(optimized=True, disp=False)
base_fcast = fit.forecast(n_months)
# Estimate monthly trend from the fit
monthly_trend = float(np.mean(np.diff(base_fcast[:12]))) if len(base_fcast) >= 12 else 0.0
last_val = float(values[-1])
# Scale trends for scenarios
def build_scenario(scale: float) -> list[dict]:
pts = []
v = last_val
for i, d in enumerate(future_dates):
v = float(base_fcast[i]) + (scale - 1.0) * monthly_trend * (i + 1)
pts.append({"date": d, "value": round(v, 2)})
return pts
return {
"history": history,
"projections": {
"conservative": build_scenario(0.5),
"base": [{"date": d, "value": round(float(v), 2)} for d, v in zip(future_dates, base_fcast)],
"optimistic": build_scenario(1.5),
},
"insufficient_data": False,
}
except Exception:
# Fallback: linear trend from last 2 values
trend = float(df["y"].iloc[-1]) - float(df["y"].iloc[-2])
last_val = float(df["y"].iloc[-1])
def linear_scenario(t_scale: float) -> list[dict]:
return [
{"date": d, "value": round(last_val + t_scale * trend * (i + 1), 2)}
for i, d in enumerate(future_dates)
]
return {
"history": history,
"projections": {
"conservative": linear_scenario(0.5),
"base": linear_scenario(1.0),
"optimistic": linear_scenario(1.5),
},
"insufficient_data": False,
}

View file

@ -0,0 +1,91 @@
from __future__ import annotations
import warnings
from datetime import date
from dateutil.relativedelta import relativedelta
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")
MIN_POINTS = 3
FORECAST_MONTHS = 3
def _next_month_starts(from_date: date, n: int) -> list[str]:
months = []
d = (from_date.replace(day=1) + relativedelta(months=1))
for _ in range(n):
months.append(d.strftime("%Y-%m-%d"))
d += relativedelta(months=1)
return months
def _fit_holt(values: list[float], n: int) -> tuple[list[float], list[float], list[float]]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing
try:
if len(values) >= 12:
model = ExponentialSmoothing(values, trend="add", seasonal="add", seasonal_periods=12)
elif len(values) >= 4:
model = ExponentialSmoothing(values, trend="add", seasonal=None)
else:
model = ExponentialSmoothing(values, trend=None, seasonal=None)
fit = model.fit(optimized=True, disp=False)
forecast = fit.forecast(n)
sigma = float(np.std(fit.resid)) if len(fit.resid) > 1 else float(np.mean(values) * 0.15)
lower = np.maximum(0, forecast - 1.28 * sigma)
upper = forecast + 1.28 * sigma
return forecast.tolist(), lower.tolist(), upper.tolist()
except Exception:
avg = float(np.mean(values))
sigma = float(np.std(values)) if len(values) > 1 else avg * 0.15
return [avg] * n, [max(0, avg - 1.28 * sigma)] * n, [(avg + 1.28 * sigma)] * n
def forecast_spending(df: pd.DataFrame) -> list[dict]:
"""
df columns: category_id, category_name, ds (monthly), y (amount)
Returns list of category forecast dicts.
"""
if df.empty:
return []
today = date.today()
future_dates = _next_month_starts(today, FORECAST_MONTHS)
results = []
for (cat_id, cat_name), group in df.groupby(["category_id", "category_name"]):
group = group.sort_values("ds")
values = group["y"].tolist()
actuals = [
{"date": row["ds"].strftime("%Y-%m-%d"), "amount": row["y"]}
for _, row in group.iterrows()
]
if len(values) < MIN_POINTS:
avg = float(np.mean(values))
forecast_pts = [
{"date": d, "amount": round(avg, 2), "lower": round(avg * 0.7, 2), "upper": round(avg * 1.3, 2)}
for d in future_dates
]
else:
fcast, lower, upper = _fit_holt(values, FORECAST_MONTHS)
forecast_pts = [
{"date": d, "amount": round(max(0, f), 2), "lower": round(l, 2), "upper": round(u, 2)}
for d, f, l, u in zip(future_dates, fcast, lower, upper)
]
results.append({
"category_id": cat_id,
"category_name": cat_name,
"monthly_avg": round(float(np.mean(values)), 2),
"actuals": actuals[-6:], # last 6 months for display
"forecast": forecast_pts,
})
# Sort by monthly_avg descending (highest spend first)
results.sort(key=lambda x: x["monthly_avg"], reverse=True)
return results