Full-stack self-hosted finance app with FastAPI backend and React frontend. Features: - Accounts, transactions, budgets, investments with GBP base currency - CSV import with auto-detection for 10 UK bank formats - ML predictions: spending forecast, net worth projection, Monte Carlo - 7 selectable themes (Obsidian, Arctic, Midnight, Vault, Terminal, Synthwave, Ledger) - Receipt/document attachments on transactions (JPEG, PNG, WebP, PDF) - AES-256-GCM field encryption, RS256 JWT, TOTP 2FA, RLS, audit log - Encrypted nightly backups + key rotation script - Mobile-responsive layout with bottom nav Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
237 lines
8 KiB
Python
237 lines
8 KiB
Python
"""
|
|
Auto-detect CSV bank export formats and produce a column mapping.
|
|
Supports: Monzo, Starling, Revolut, Barclays, Lloyds, NatWest/RBS, HSBC, Santander.
|
|
Falls back to a generic best-effort mapping for unknown formats.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import io
|
|
from dataclasses import dataclass, field
|
|
from typing import Literal
|
|
|
|
|
|
@dataclass
|
|
class CsvMapping:
|
|
date: str
|
|
description: str
|
|
amount: str | None = None # single signed amount column
|
|
debit: str | None = None # separate debit column (positive value = money out)
|
|
credit: str | None = None # separate credit column (positive value = money in)
|
|
balance: str | None = None
|
|
reference: str | None = None
|
|
detected_format: str | None = None
|
|
|
|
def is_split(self) -> bool:
|
|
return self.debit is not None and self.credit is not None
|
|
|
|
|
|
KNOWN_FORMATS: list[dict] = [
|
|
{
|
|
"name": "Monzo",
|
|
"detect": lambda h: {"transaction id", "emoji"}.issubset(h),
|
|
"date": "Date",
|
|
"description": "Name",
|
|
"amount": "Amount",
|
|
"balance": None,
|
|
"reference": "Notes and #tags",
|
|
},
|
|
{
|
|
"name": "Starling",
|
|
"detect": lambda h: {"counter party", "spending category"}.issubset(h),
|
|
"date": "Date",
|
|
"description": "Counter Party",
|
|
"amount": "Amount (GBP)",
|
|
"balance": "Balance (GBP)",
|
|
"reference": "Reference",
|
|
},
|
|
{
|
|
"name": "Revolut",
|
|
"detect": lambda h: {"product", "started date", "completed date"}.issubset(h),
|
|
"date": "Started Date",
|
|
"description": "Description",
|
|
"amount": "Amount",
|
|
"balance": "Balance",
|
|
"reference": None,
|
|
},
|
|
{
|
|
"name": "Barclays",
|
|
"detect": lambda h: {"subcategory", "memo", "number"}.issubset(h),
|
|
"date": "Date",
|
|
"description": "Memo",
|
|
"amount": "Amount",
|
|
"balance": None,
|
|
"reference": "Subcategory",
|
|
},
|
|
{
|
|
"name": "Lloyds Bank",
|
|
"detect": lambda h: {"transaction date", "debit amount", "credit amount", "transaction description"}.issubset(h),
|
|
"date": "Transaction Date",
|
|
"description": "Transaction Description",
|
|
"debit": "Debit Amount",
|
|
"credit": "Credit Amount",
|
|
"balance": "Balance",
|
|
"reference": None,
|
|
},
|
|
{
|
|
"name": "Halifax",
|
|
"detect": lambda h: {"transaction date", "debit amount", "credit amount", "transaction description"}.issubset(h),
|
|
"date": "Transaction Date",
|
|
"description": "Transaction Description",
|
|
"debit": "Debit Amount",
|
|
"credit": "Credit Amount",
|
|
"balance": "Balance",
|
|
"reference": None,
|
|
},
|
|
{
|
|
"name": "NatWest / RBS",
|
|
"detect": lambda h: {"date", "type", "description", "value", "balance"}.issubset(h) and "value" in h,
|
|
"date": "Date",
|
|
"description": "Description",
|
|
"amount": "Value",
|
|
"balance": "Balance",
|
|
"reference": None,
|
|
},
|
|
{
|
|
"name": "HSBC",
|
|
"detect": lambda h: h == {"date", "description", "amount"} or h == {"date", "description", "debit", "credit", "balance"},
|
|
"date": "Date",
|
|
"description": "Description",
|
|
"amount": "Amount",
|
|
"balance": None,
|
|
"reference": None,
|
|
},
|
|
{
|
|
"name": "Santander",
|
|
"detect": lambda h: {"date", "description", "debit", "credit", "balance"}.issubset(h),
|
|
"date": "Date",
|
|
"description": "Description",
|
|
"debit": "Debit",
|
|
"credit": "Credit",
|
|
"balance": "Balance",
|
|
"reference": None,
|
|
},
|
|
{
|
|
"name": "Nationwide",
|
|
"detect": lambda h: {"date", "transaction", "payments out", "payments in", "balance"}.issubset(h),
|
|
"date": "Date",
|
|
"description": "Transaction",
|
|
"debit": "Payments Out",
|
|
"credit": "Payments In",
|
|
"balance": "Balance",
|
|
"reference": None,
|
|
},
|
|
]
|
|
|
|
|
|
def _normalise_headers(raw_headers: list[str]) -> dict[str, str]:
|
|
"""Return {normalised_key: original_header}."""
|
|
return {h.strip().lower(): h.strip() for h in raw_headers if h}
|
|
|
|
|
|
def detect_format(raw_headers: list[str]) -> CsvMapping:
|
|
norm = _normalise_headers(raw_headers)
|
|
norm_set = set(norm.keys())
|
|
|
|
for fmt in KNOWN_FORMATS:
|
|
if fmt["detect"](norm_set):
|
|
# Map logical names → actual header using case-insensitive lookup
|
|
def resolve(col: str | None) -> str | None:
|
|
if col is None:
|
|
return None
|
|
return norm.get(col.strip().lower(), col)
|
|
|
|
if "debit" in fmt:
|
|
return CsvMapping(
|
|
date=resolve(fmt["date"]) or fmt["date"],
|
|
description=resolve(fmt["description"]) or fmt["description"],
|
|
debit=resolve(fmt["debit"]),
|
|
credit=resolve(fmt["credit"]),
|
|
balance=resolve(fmt.get("balance")),
|
|
reference=resolve(fmt.get("reference")),
|
|
detected_format=fmt["name"],
|
|
)
|
|
else:
|
|
return CsvMapping(
|
|
date=resolve(fmt["date"]) or fmt["date"],
|
|
description=resolve(fmt["description"]) or fmt["description"],
|
|
amount=resolve(fmt["amount"]),
|
|
balance=resolve(fmt.get("balance")),
|
|
reference=resolve(fmt.get("reference")),
|
|
detected_format=fmt["name"],
|
|
)
|
|
|
|
# Generic fallback: guess by common column name patterns
|
|
return _generic_mapping(norm)
|
|
|
|
|
|
def _generic_mapping(norm: dict[str, str]) -> CsvMapping:
|
|
def find(*candidates: str) -> str | None:
|
|
for c in candidates:
|
|
if c in norm:
|
|
return norm[c]
|
|
return None
|
|
|
|
date_col = find("date", "transaction date", "trans date", "value date", "posting date")
|
|
desc_col = find("description", "narrative", "details", "memo", "payee", "merchant", "name", "counter party")
|
|
amt_col = find("amount", "value", "net amount", "transaction amount")
|
|
debit_col = find("debit", "debit amount", "payments out", "money out", "withdrawal")
|
|
credit_col = find("credit", "credit amount", "payments in", "money in", "deposit")
|
|
bal_col = find("balance", "running balance")
|
|
ref_col = find("reference", "notes", "tags", "category")
|
|
|
|
if not date_col:
|
|
date_col = list(norm.values())[0] if norm else "date"
|
|
if not desc_col:
|
|
desc_col = list(norm.values())[1] if len(norm) > 1 else "description"
|
|
|
|
if debit_col and credit_col:
|
|
return CsvMapping(
|
|
date=date_col,
|
|
description=desc_col,
|
|
debit=debit_col,
|
|
credit=credit_col,
|
|
balance=bal_col,
|
|
reference=ref_col,
|
|
detected_format=None,
|
|
)
|
|
|
|
return CsvMapping(
|
|
date=date_col,
|
|
description=desc_col,
|
|
amount=amt_col or (list(norm.values())[2] if len(norm) > 2 else "amount"),
|
|
balance=bal_col,
|
|
reference=ref_col,
|
|
detected_format=None,
|
|
)
|
|
|
|
|
|
def parse_csv_content(content: bytes) -> tuple[list[str], list[dict]]:
|
|
"""Decode and return (headers, rows)."""
|
|
for enc in ("utf-8-sig", "utf-8", "latin-1"):
|
|
try:
|
|
text = content.decode(enc)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
else:
|
|
raise ValueError("Cannot decode file — try saving as UTF-8")
|
|
|
|
# Some bank exports (Lloyds, Barclays) include preamble lines before the header
|
|
lines = text.splitlines()
|
|
header_idx = 0
|
|
for i, line in enumerate(lines):
|
|
if "," in line and len(line.split(",")) >= 2:
|
|
header_idx = i
|
|
break
|
|
|
|
cleaned = "\n".join(lines[header_idx:])
|
|
reader = csv.DictReader(io.StringIO(cleaned))
|
|
headers = [h.strip() for h in (reader.fieldnames or []) if h and h.strip()]
|
|
rows = []
|
|
for row in reader:
|
|
clean_row = {k.strip(): (v.strip() if v else "") for k, v in row.items() if k and k.strip()}
|
|
if any(clean_row.values()):
|
|
rows.append(clean_row)
|
|
|
|
return headers, rows
|