Initial commit: MyMidas personal finance tracker

Full-stack self-hosted finance app with FastAPI backend and React frontend. Features: - Accounts, transactions, budgets, investments with GBP base currency - CSV import with auto-detection for 10 UK bank formats - ML predictions: spending forecast, net worth projection, Monte Carlo - 7 selectable themes (Obsidian, Arctic, Midnight, Vault, Terminal, Synthwave, Ledger) - Receipt/document attachments on transactions (JPEG, PNG, WebP, PDF) - AES-256-GCM field encryption, RS256 JWT, TOTP 2FA, RLS, audit log - Encrypted nightly backups + key rotation script - Mobile-responsive layout with bottom nav Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 11:56:10 +00:00 · 2026-04-21 11:56:10 +00:00 · 61a7884ee5
commit 61a7884ee5
127 changed files with 13323 additions and 0 deletions
--- a/backend/app/services/csv_detector.py
+++ b/backend/app/services/csv_detector.py
@ -0,0 +1,237 @@
+"""
+Auto-detect CSV bank export formats and produce a column mapping.
+Supports: Monzo, Starling, Revolut, Barclays, Lloyds, NatWest/RBS, HSBC, Santander.
+Falls back to a generic best-effort mapping for unknown formats.
+"""
+from __future__ import annotations
+
+import csv
+import io
+from dataclasses import dataclass, field
+from typing import Literal
+
+
+@dataclass
+class CsvMapping:
+    date: str
+    description: str
+    amount: str | None = None        # single signed amount column
+    debit: str | None = None         # separate debit column (positive value = money out)
+    credit: str | None = None        # separate credit column (positive value = money in)
+    balance: str | None = None
+    reference: str | None = None
+    detected_format: str | None = None
+
+    def is_split(self) -> bool:
+        return self.debit is not None and self.credit is not None
+
+
+KNOWN_FORMATS: list[dict] = [
+    {
+        "name": "Monzo",
+        "detect": lambda h: {"transaction id", "emoji"}.issubset(h),
+        "date": "Date",
+        "description": "Name",
+        "amount": "Amount",
+        "balance": None,
+        "reference": "Notes and #tags",
+    },
+    {
+        "name": "Starling",
+        "detect": lambda h: {"counter party", "spending category"}.issubset(h),
+        "date": "Date",
+        "description": "Counter Party",
+        "amount": "Amount (GBP)",
+        "balance": "Balance (GBP)",
+        "reference": "Reference",
+    },
+    {
+        "name": "Revolut",
+        "detect": lambda h: {"product", "started date", "completed date"}.issubset(h),
+        "date": "Started Date",
+        "description": "Description",
+        "amount": "Amount",
+        "balance": "Balance",
+        "reference": None,
+    },
+    {
+        "name": "Barclays",
+        "detect": lambda h: {"subcategory", "memo", "number"}.issubset(h),
+        "date": "Date",
+        "description": "Memo",
+        "amount": "Amount",
+        "balance": None,
+        "reference": "Subcategory",
+    },
+    {
+        "name": "Lloyds Bank",
+        "detect": lambda h: {"transaction date", "debit amount", "credit amount", "transaction description"}.issubset(h),
+        "date": "Transaction Date",
+        "description": "Transaction Description",
+        "debit": "Debit Amount",
+        "credit": "Credit Amount",
+        "balance": "Balance",
+        "reference": None,
+    },
+    {
+        "name": "Halifax",
+        "detect": lambda h: {"transaction date", "debit amount", "credit amount", "transaction description"}.issubset(h),
+        "date": "Transaction Date",
+        "description": "Transaction Description",
+        "debit": "Debit Amount",
+        "credit": "Credit Amount",
+        "balance": "Balance",
+        "reference": None,
+    },
+    {
+        "name": "NatWest / RBS",
+        "detect": lambda h: {"date", "type", "description", "value", "balance"}.issubset(h) and "value" in h,
+        "date": "Date",
+        "description": "Description",
+        "amount": "Value",
+        "balance": "Balance",
+        "reference": None,
+    },
+    {
+        "name": "HSBC",
+        "detect": lambda h: h == {"date", "description", "amount"} or h == {"date", "description", "debit", "credit", "balance"},
+        "date": "Date",
+        "description": "Description",
+        "amount": "Amount",
+        "balance": None,
+        "reference": None,
+    },
+    {
+        "name": "Santander",
+        "detect": lambda h: {"date", "description", "debit", "credit", "balance"}.issubset(h),
+        "date": "Date",
+        "description": "Description",
+        "debit": "Debit",
+        "credit": "Credit",
+        "balance": "Balance",
+        "reference": None,
+    },
+    {
+        "name": "Nationwide",
+        "detect": lambda h: {"date", "transaction", "payments out", "payments in", "balance"}.issubset(h),
+        "date": "Date",
+        "description": "Transaction",
+        "debit": "Payments Out",
+        "credit": "Payments In",
+        "balance": "Balance",
+        "reference": None,
+    },
+]
+
+
+def _normalise_headers(raw_headers: list[str]) -> dict[str, str]:
+    """Return {normalised_key: original_header}."""
+    return {h.strip().lower(): h.strip() for h in raw_headers if h}
+
+
+def detect_format(raw_headers: list[str]) -> CsvMapping:
+    norm = _normalise_headers(raw_headers)
+    norm_set = set(norm.keys())
+
+    for fmt in KNOWN_FORMATS:
+        if fmt["detect"](norm_set):
+            # Map logical names → actual header using case-insensitive lookup
+            def resolve(col: str | None) -> str | None:
+                if col is None:
+                    return None
+                return norm.get(col.strip().lower(), col)
+
+            if "debit" in fmt:
+                return CsvMapping(
+                    date=resolve(fmt["date"]) or fmt["date"],
+                    description=resolve(fmt["description"]) or fmt["description"],
+                    debit=resolve(fmt["debit"]),
+                    credit=resolve(fmt["credit"]),
+                    balance=resolve(fmt.get("balance")),
+                    reference=resolve(fmt.get("reference")),
+                    detected_format=fmt["name"],
+                )
+            else:
+                return CsvMapping(
+                    date=resolve(fmt["date"]) or fmt["date"],
+                    description=resolve(fmt["description"]) or fmt["description"],
+                    amount=resolve(fmt["amount"]),
+                    balance=resolve(fmt.get("balance")),
+                    reference=resolve(fmt.get("reference")),
+                    detected_format=fmt["name"],
+                )
+
+    # Generic fallback: guess by common column name patterns
+    return _generic_mapping(norm)
+
+
+def _generic_mapping(norm: dict[str, str]) -> CsvMapping:
+    def find(*candidates: str) -> str | None:
+        for c in candidates:
+            if c in norm:
+                return norm[c]
+        return None
+
+    date_col = find("date", "transaction date", "trans date", "value date", "posting date")
+    desc_col = find("description", "narrative", "details", "memo", "payee", "merchant", "name", "counter party")
+    amt_col = find("amount", "value", "net amount", "transaction amount")
+    debit_col = find("debit", "debit amount", "payments out", "money out", "withdrawal")
+    credit_col = find("credit", "credit amount", "payments in", "money in", "deposit")
+    bal_col = find("balance", "running balance")
+    ref_col = find("reference", "notes", "tags", "category")
+
+    if not date_col:
+        date_col = list(norm.values())[0] if norm else "date"
+    if not desc_col:
+        desc_col = list(norm.values())[1] if len(norm) > 1 else "description"
+
+    if debit_col and credit_col:
+        return CsvMapping(
+            date=date_col,
+            description=desc_col,
+            debit=debit_col,
+            credit=credit_col,
+            balance=bal_col,
+            reference=ref_col,
+            detected_format=None,
+        )
+
+    return CsvMapping(
+        date=date_col,
+        description=desc_col,
+        amount=amt_col or (list(norm.values())[2] if len(norm) > 2 else "amount"),
+        balance=bal_col,
+        reference=ref_col,
+        detected_format=None,
+    )
+
+
+def parse_csv_content(content: bytes) -> tuple[list[str], list[dict]]:
+    """Decode and return (headers, rows)."""
+    for enc in ("utf-8-sig", "utf-8", "latin-1"):
+        try:
+            text = content.decode(enc)
+            break
+        except UnicodeDecodeError:
+            continue
+    else:
+        raise ValueError("Cannot decode file — try saving as UTF-8")
+
+    # Some bank exports (Lloyds, Barclays) include preamble lines before the header
+    lines = text.splitlines()
+    header_idx = 0
+    for i, line in enumerate(lines):
+        if "," in line and len(line.split(",")) >= 2:
+            header_idx = i
+            break
+
+    cleaned = "\n".join(lines[header_idx:])
+    reader = csv.DictReader(io.StringIO(cleaned))
+    headers = [h.strip() for h in (reader.fieldnames or []) if h and h.strip()]
+    rows = []
+    for row in reader:
+        clean_row = {k.strip(): (v.strip() if v else "") for k, v in row.items() if k and k.strip()}
+        if any(clean_row.values()):
+            rows.append(clean_row)
+
+    return headers, rows