Initial commit: MyMidas personal finance tracker
Full-stack self-hosted finance app with FastAPI backend and React frontend. Features: - Accounts, transactions, budgets, investments with GBP base currency - CSV import with auto-detection for 10 UK bank formats - ML predictions: spending forecast, net worth projection, Monte Carlo - 7 selectable themes (Obsidian, Arctic, Midnight, Vault, Terminal, Synthwave, Ledger) - Receipt/document attachments on transactions (JPEG, PNG, WebP, PDF) - AES-256-GCM field encryption, RS256 JWT, TOTP 2FA, RLS, audit log - Encrypted nightly backups + key rotation script - Mobile-responsive layout with bottom nav Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
61a7884ee5
127 changed files with 13323 additions and 0 deletions
237
backend/app/services/csv_detector.py
Normal file
237
backend/app/services/csv_detector.py
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
"""
|
||||
Auto-detect CSV bank export formats and produce a column mapping.
|
||||
Supports: Monzo, Starling, Revolut, Barclays, Lloyds, NatWest/RBS, HSBC, Santander.
|
||||
Falls back to a generic best-effort mapping for unknown formats.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Literal
|
||||
|
||||
|
||||
@dataclass
|
||||
class CsvMapping:
|
||||
date: str
|
||||
description: str
|
||||
amount: str | None = None # single signed amount column
|
||||
debit: str | None = None # separate debit column (positive value = money out)
|
||||
credit: str | None = None # separate credit column (positive value = money in)
|
||||
balance: str | None = None
|
||||
reference: str | None = None
|
||||
detected_format: str | None = None
|
||||
|
||||
def is_split(self) -> bool:
|
||||
return self.debit is not None and self.credit is not None
|
||||
|
||||
|
||||
KNOWN_FORMATS: list[dict] = [
|
||||
{
|
||||
"name": "Monzo",
|
||||
"detect": lambda h: {"transaction id", "emoji"}.issubset(h),
|
||||
"date": "Date",
|
||||
"description": "Name",
|
||||
"amount": "Amount",
|
||||
"balance": None,
|
||||
"reference": "Notes and #tags",
|
||||
},
|
||||
{
|
||||
"name": "Starling",
|
||||
"detect": lambda h: {"counter party", "spending category"}.issubset(h),
|
||||
"date": "Date",
|
||||
"description": "Counter Party",
|
||||
"amount": "Amount (GBP)",
|
||||
"balance": "Balance (GBP)",
|
||||
"reference": "Reference",
|
||||
},
|
||||
{
|
||||
"name": "Revolut",
|
||||
"detect": lambda h: {"product", "started date", "completed date"}.issubset(h),
|
||||
"date": "Started Date",
|
||||
"description": "Description",
|
||||
"amount": "Amount",
|
||||
"balance": "Balance",
|
||||
"reference": None,
|
||||
},
|
||||
{
|
||||
"name": "Barclays",
|
||||
"detect": lambda h: {"subcategory", "memo", "number"}.issubset(h),
|
||||
"date": "Date",
|
||||
"description": "Memo",
|
||||
"amount": "Amount",
|
||||
"balance": None,
|
||||
"reference": "Subcategory",
|
||||
},
|
||||
{
|
||||
"name": "Lloyds Bank",
|
||||
"detect": lambda h: {"transaction date", "debit amount", "credit amount", "transaction description"}.issubset(h),
|
||||
"date": "Transaction Date",
|
||||
"description": "Transaction Description",
|
||||
"debit": "Debit Amount",
|
||||
"credit": "Credit Amount",
|
||||
"balance": "Balance",
|
||||
"reference": None,
|
||||
},
|
||||
{
|
||||
"name": "Halifax",
|
||||
"detect": lambda h: {"transaction date", "debit amount", "credit amount", "transaction description"}.issubset(h),
|
||||
"date": "Transaction Date",
|
||||
"description": "Transaction Description",
|
||||
"debit": "Debit Amount",
|
||||
"credit": "Credit Amount",
|
||||
"balance": "Balance",
|
||||
"reference": None,
|
||||
},
|
||||
{
|
||||
"name": "NatWest / RBS",
|
||||
"detect": lambda h: {"date", "type", "description", "value", "balance"}.issubset(h) and "value" in h,
|
||||
"date": "Date",
|
||||
"description": "Description",
|
||||
"amount": "Value",
|
||||
"balance": "Balance",
|
||||
"reference": None,
|
||||
},
|
||||
{
|
||||
"name": "HSBC",
|
||||
"detect": lambda h: h == {"date", "description", "amount"} or h == {"date", "description", "debit", "credit", "balance"},
|
||||
"date": "Date",
|
||||
"description": "Description",
|
||||
"amount": "Amount",
|
||||
"balance": None,
|
||||
"reference": None,
|
||||
},
|
||||
{
|
||||
"name": "Santander",
|
||||
"detect": lambda h: {"date", "description", "debit", "credit", "balance"}.issubset(h),
|
||||
"date": "Date",
|
||||
"description": "Description",
|
||||
"debit": "Debit",
|
||||
"credit": "Credit",
|
||||
"balance": "Balance",
|
||||
"reference": None,
|
||||
},
|
||||
{
|
||||
"name": "Nationwide",
|
||||
"detect": lambda h: {"date", "transaction", "payments out", "payments in", "balance"}.issubset(h),
|
||||
"date": "Date",
|
||||
"description": "Transaction",
|
||||
"debit": "Payments Out",
|
||||
"credit": "Payments In",
|
||||
"balance": "Balance",
|
||||
"reference": None,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def _normalise_headers(raw_headers: list[str]) -> dict[str, str]:
|
||||
"""Return {normalised_key: original_header}."""
|
||||
return {h.strip().lower(): h.strip() for h in raw_headers if h}
|
||||
|
||||
|
||||
def detect_format(raw_headers: list[str]) -> CsvMapping:
|
||||
norm = _normalise_headers(raw_headers)
|
||||
norm_set = set(norm.keys())
|
||||
|
||||
for fmt in KNOWN_FORMATS:
|
||||
if fmt["detect"](norm_set):
|
||||
# Map logical names → actual header using case-insensitive lookup
|
||||
def resolve(col: str | None) -> str | None:
|
||||
if col is None:
|
||||
return None
|
||||
return norm.get(col.strip().lower(), col)
|
||||
|
||||
if "debit" in fmt:
|
||||
return CsvMapping(
|
||||
date=resolve(fmt["date"]) or fmt["date"],
|
||||
description=resolve(fmt["description"]) or fmt["description"],
|
||||
debit=resolve(fmt["debit"]),
|
||||
credit=resolve(fmt["credit"]),
|
||||
balance=resolve(fmt.get("balance")),
|
||||
reference=resolve(fmt.get("reference")),
|
||||
detected_format=fmt["name"],
|
||||
)
|
||||
else:
|
||||
return CsvMapping(
|
||||
date=resolve(fmt["date"]) or fmt["date"],
|
||||
description=resolve(fmt["description"]) or fmt["description"],
|
||||
amount=resolve(fmt["amount"]),
|
||||
balance=resolve(fmt.get("balance")),
|
||||
reference=resolve(fmt.get("reference")),
|
||||
detected_format=fmt["name"],
|
||||
)
|
||||
|
||||
# Generic fallback: guess by common column name patterns
|
||||
return _generic_mapping(norm)
|
||||
|
||||
|
||||
def _generic_mapping(norm: dict[str, str]) -> CsvMapping:
|
||||
def find(*candidates: str) -> str | None:
|
||||
for c in candidates:
|
||||
if c in norm:
|
||||
return norm[c]
|
||||
return None
|
||||
|
||||
date_col = find("date", "transaction date", "trans date", "value date", "posting date")
|
||||
desc_col = find("description", "narrative", "details", "memo", "payee", "merchant", "name", "counter party")
|
||||
amt_col = find("amount", "value", "net amount", "transaction amount")
|
||||
debit_col = find("debit", "debit amount", "payments out", "money out", "withdrawal")
|
||||
credit_col = find("credit", "credit amount", "payments in", "money in", "deposit")
|
||||
bal_col = find("balance", "running balance")
|
||||
ref_col = find("reference", "notes", "tags", "category")
|
||||
|
||||
if not date_col:
|
||||
date_col = list(norm.values())[0] if norm else "date"
|
||||
if not desc_col:
|
||||
desc_col = list(norm.values())[1] if len(norm) > 1 else "description"
|
||||
|
||||
if debit_col and credit_col:
|
||||
return CsvMapping(
|
||||
date=date_col,
|
||||
description=desc_col,
|
||||
debit=debit_col,
|
||||
credit=credit_col,
|
||||
balance=bal_col,
|
||||
reference=ref_col,
|
||||
detected_format=None,
|
||||
)
|
||||
|
||||
return CsvMapping(
|
||||
date=date_col,
|
||||
description=desc_col,
|
||||
amount=amt_col or (list(norm.values())[2] if len(norm) > 2 else "amount"),
|
||||
balance=bal_col,
|
||||
reference=ref_col,
|
||||
detected_format=None,
|
||||
)
|
||||
|
||||
|
||||
def parse_csv_content(content: bytes) -> tuple[list[str], list[dict]]:
|
||||
"""Decode and return (headers, rows)."""
|
||||
for enc in ("utf-8-sig", "utf-8", "latin-1"):
|
||||
try:
|
||||
text = content.decode(enc)
|
||||
break
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
else:
|
||||
raise ValueError("Cannot decode file — try saving as UTF-8")
|
||||
|
||||
# Some bank exports (Lloyds, Barclays) include preamble lines before the header
|
||||
lines = text.splitlines()
|
||||
header_idx = 0
|
||||
for i, line in enumerate(lines):
|
||||
if "," in line and len(line.split(",")) >= 2:
|
||||
header_idx = i
|
||||
break
|
||||
|
||||
cleaned = "\n".join(lines[header_idx:])
|
||||
reader = csv.DictReader(io.StringIO(cleaned))
|
||||
headers = [h.strip() for h in (reader.fieldnames or []) if h and h.strip()]
|
||||
rows = []
|
||||
for row in reader:
|
||||
clean_row = {k.strip(): (v.strip() if v else "") for k, v in row.items() if k and k.strip()}
|
||||
if any(clean_row.values()):
|
||||
rows.append(clean_row)
|
||||
|
||||
return headers, rows
|
||||
Loading…
Add table
Add a link
Reference in a new issue