MyMidas/backend/app/api/v1/transactions.py
megaproxy 26e2a055db Add AI receipt scanning with OCR pipeline and debug toggle
- OCR pipeline: Tesseract (images) + pdfplumber (PDFs) → AI text prompt →
  rule-based regex fallback; works with any text model, not just vision models
- Scan Receipt toolbar button parses a photo and pre-fills the transaction form;
  receipt image is automatically attached to the created transaction
- AI settings page: provider, API key (AES-256-GCM encrypted), custom URL,
  model, and per-user debug toggle that gates the OCR/AI debug panel
- Fix CSRF cookie secure=False so HTTP deployments work; add 7-day max_age
- Fix attachment_refs missing from _to_response (attachments never appeared in UI)
- Fix multipart boundary lost when Content-Type was set manually in axios calls
- nginx: raise client_max_body_size to 15 MB, add 120s proxy timeout for OCR
- Migration 0005: add ai_debug boolean to users table
- Update README and CLAUDE.md with AI scanning docs and architecture notes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 22:07:38 +00:00

619 lines
22 KiB
Python

import csv
import io
import mimetypes
import os
import uuid
from pathlib import Path
from typing import Annotated
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import get_settings
from app.core.audit import write_audit
from app.dependencies import get_current_user, get_db
from app.schemas.transaction import TransactionCreate, TransactionFilter, TransactionUpdate
from app.services.transaction_service import (
TransactionError,
create_transaction,
delete_transaction,
get_transaction,
import_csv,
list_transactions,
update_transaction,
_to_response,
)
MAX_IMPORT_FILE_BYTES = 10 * 1024 * 1024 # 10 MB
MAX_IMPORT_ROWS = 50_000
ALLOWED_MIME_TYPES = {
"image/jpeg",
"image/png",
"image/webp",
"application/pdf",
}
ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp", ".pdf"}
router = APIRouter()
@router.get("")
async def get_transactions(
account_id: uuid.UUID | None = None,
category_id: uuid.UUID | None = None,
type: str | None = None,
status: str | None = None,
date_from: str | None = None,
date_to: str | None = None,
search: str | None = None,
is_recurring: bool | None = None,
page: int = Query(default=1, ge=1),
page_size: int = Query(default=50, ge=1, le=200),
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
from datetime import date
filters = TransactionFilter(
account_id=account_id,
category_id=category_id,
type=type,
status=status,
date_from=date.fromisoformat(date_from) if date_from else None,
date_to=date.fromisoformat(date_to) if date_to else None,
search=search,
is_recurring=is_recurring,
page=page,
page_size=page_size,
)
return await list_transactions(db, user.id, filters)
@router.post("", status_code=201)
async def create(
body: TransactionCreate,
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
try:
result = await create_transaction(db, user.id, body, user.base_currency)
await write_audit(db, user_id=user.id, action="transaction_create")
await db.commit()
return result
except TransactionError as e:
raise HTTPException(status_code=e.status_code, detail=e.detail)
@router.get("/{txn_id}")
async def get_one(
txn_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
try:
txn = await get_transaction(db, txn_id, user.id)
return _to_response(txn)
except TransactionError as e:
raise HTTPException(status_code=e.status_code, detail=e.detail)
@router.put("/{txn_id}")
async def update(
txn_id: uuid.UUID,
body: TransactionUpdate,
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
try:
result = await update_transaction(db, txn_id, user.id, body, user.base_currency)
await write_audit(db, user_id=user.id, action="transaction_update", resource_type="transaction", resource_id=txn_id)
await db.commit()
return result
except TransactionError as e:
raise HTTPException(status_code=e.status_code, detail=e.detail)
@router.delete("/{txn_id}", status_code=204)
async def delete(
txn_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
try:
await delete_transaction(db, txn_id, user.id)
await write_audit(db, user_id=user.id, action="transaction_delete", resource_type="transaction", resource_id=txn_id)
await db.commit()
except TransactionError as e:
raise HTTPException(status_code=e.status_code, detail=e.detail)
@router.post("/{txn_id}/attachments")
async def upload_attachment(
txn_id: uuid.UUID,
file: UploadFile = File(...),
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
settings = get_settings()
# Validate extension
filename = file.filename or "upload"
ext = Path(filename).suffix.lower()
if ext not in ALLOWED_EXTENSIONS:
raise HTTPException(status_code=400, detail="Unsupported file type. Allowed: JPG, PNG, WebP, PDF")
# Verify transaction ownership
try:
txn = await get_transaction(db, txn_id, user.id)
except TransactionError as e:
raise HTTPException(status_code=e.status_code, detail=e.detail)
current_refs: list = txn.get("attachment_refs", []) if isinstance(txn, dict) else []
# Fetch raw model for JSONB mutation
from sqlalchemy import select
from app.db.models.transaction import Transaction as TxnModel
result = await db.execute(
select(TxnModel).where(TxnModel.id == txn_id, TxnModel.user_id == user.id)
)
txn_row = result.scalar_one_or_none()
if not txn_row:
raise HTTPException(status_code=404, detail="Transaction not found")
current_refs = list(txn_row.attachment_refs or [])
if len(current_refs) >= settings.max_attachments_per_txn:
raise HTTPException(status_code=400, detail=f"Maximum {settings.max_attachments_per_txn} attachments per transaction")
# Read and size-check
content = await file.read(settings.max_attachment_bytes + 1)
if len(content) > settings.max_attachment_bytes:
raise HTTPException(status_code=413, detail="File too large (max 10 MB)")
# Sniff MIME from content
import magic # python-magic
detected_mime = magic.from_buffer(content[:2048], mime=True)
if detected_mime not in ALLOWED_MIME_TYPES:
raise HTTPException(status_code=400, detail="File content does not match an allowed type (JPEG, PNG, WebP, PDF)")
# Store file
attachment_id = str(uuid.uuid4())
user_upload_dir = Path(settings.upload_dir) / str(user.id)
user_upload_dir.mkdir(parents=True, exist_ok=True)
stored_name = f"{attachment_id}{ext}"
stored_path = user_upload_dir / stored_name
stored_path.write_bytes(content)
# Update attachment_refs
ref = {
"id": attachment_id,
"filename": filename,
"mime_type": detected_mime,
"size": len(content),
"stored_name": stored_name,
}
from sqlalchemy import update as sql_update
import copy
new_refs = copy.copy(current_refs)
new_refs.append(ref)
await db.execute(
sql_update(TxnModel)
.where(TxnModel.id == txn_id)
.values(attachment_refs=new_refs)
)
await write_audit(db, user_id=user.id, action="transaction_update", resource_type="transaction", resource_id=txn_id, metadata={"attachment_added": attachment_id})
await db.commit()
return ref
@router.get("/{txn_id}/attachments/{attachment_id}")
async def download_attachment(
txn_id: uuid.UUID,
attachment_id: str,
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
settings = get_settings()
from sqlalchemy import select
from app.db.models.transaction import Transaction as TxnModel
result = await db.execute(
select(TxnModel).where(TxnModel.id == txn_id, TxnModel.user_id == user.id)
)
txn_row = result.scalar_one_or_none()
if not txn_row:
raise HTTPException(status_code=404, detail="Transaction not found")
ref = next((r for r in (txn_row.attachment_refs or []) if r["id"] == attachment_id), None)
if not ref:
raise HTTPException(status_code=404, detail="Attachment not found")
path = Path(settings.upload_dir) / str(user.id) / ref["stored_name"]
if not path.exists():
raise HTTPException(status_code=404, detail="Attachment file missing")
return FileResponse(
path=str(path),
media_type=ref["mime_type"],
filename=ref["filename"],
)
@router.delete("/{txn_id}/attachments/{attachment_id}", status_code=204)
async def delete_attachment(
txn_id: uuid.UUID,
attachment_id: str,
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
settings = get_settings()
from sqlalchemy import select, update as sql_update
from app.db.models.transaction import Transaction as TxnModel
result = await db.execute(
select(TxnModel).where(TxnModel.id == txn_id, TxnModel.user_id == user.id)
)
txn_row = result.scalar_one_or_none()
if not txn_row:
raise HTTPException(status_code=404, detail="Transaction not found")
refs = list(txn_row.attachment_refs or [])
ref = next((r for r in refs if r["id"] == attachment_id), None)
if not ref:
raise HTTPException(status_code=404, detail="Attachment not found")
# Delete file
path = Path(settings.upload_dir) / str(user.id) / ref["stored_name"]
try:
path.unlink(missing_ok=True)
except OSError:
pass
new_refs = [r for r in refs if r["id"] != attachment_id]
await db.execute(
sql_update(TxnModel)
.where(TxnModel.id == txn_id)
.values(attachment_refs=new_refs)
)
await write_audit(db, user_id=user.id, action="transaction_update", resource_type="transaction", resource_id=txn_id, metadata={"attachment_deleted": attachment_id})
await db.commit()
_RECEIPT_TEXT_PROMPT = (
"You are a receipt parser. Below is the raw text extracted from a receipt via OCR.\n\n"
"Receipt text:\n{ocr_text}\n\n"
"Extract the information and return ONLY a JSON object with exactly these keys "
"(use null for any field you cannot determine):\n"
'{{"merchant": "store name", "amount": 0.00, "currency": "GBP", '
'"date": "YYYY-MM-DD", "description": "brief description", '
'"category": "one of: Food & Drink, Transport, Shopping, Entertainment, Health, Travel, Bills & Utilities, Other"}}\n'
"Return ONLY the JSON object. No markdown, no explanation, no code fences."
)
_EMPTY_RESULT: dict = {
"merchant": None, "amount": None, "currency": None,
"date": None, "description": None, "category": None,
"raw": None, "ocr_text": None,
}
def _extract_ocr_text(file_bytes: bytes, mime_type: str) -> str:
"""Extract text from an image or PDF. Returns empty string on failure."""
if mime_type == "application/pdf":
import io
import pdfplumber
try:
with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
pages_text = [page.extract_text() or "" for page in pdf.pages[:4]]
text = "\n".join(pages_text).strip()
if text:
return text
except Exception:
pass
# Scanned PDF — convert first page to image then OCR
try:
from pdf2image import convert_from_bytes
import pytesseract
images = convert_from_bytes(file_bytes, first_page=1, last_page=1, dpi=200)
if images:
return pytesseract.image_to_string(images[0])
except Exception:
pass
return ""
else:
import io
import pytesseract
from PIL import Image
try:
img = Image.open(io.BytesIO(file_bytes))
return pytesseract.image_to_string(img)
except Exception:
return ""
def _rule_based_parse(ocr_text: str) -> dict:
"""Extract receipt fields from OCR text using regex. Best-effort."""
import re
from datetime import datetime
lines = [ln.strip() for ln in ocr_text.splitlines() if ln.strip()]
# Merchant: skip very short lines and lines that look like addresses/phone numbers
merchant = None
for ln in lines[:5]:
if len(ln) > 2 and not re.match(r"^[\d\s\-\+\(\)]+$", ln) and not re.match(r"^\d+\s+\w+", ln):
merchant = ln
break
# Currency from symbols
currency = None
if "£" in ocr_text:
currency = "GBP"
elif "" in ocr_text:
currency = "EUR"
elif "$" in ocr_text:
currency = "USD"
# Amount: prefer lines containing total/amount keywords, then fall back to largest number
amount = None
total_line_pat = re.compile(
r"(?:total|amount\s*due|grand\s*total|balance\s*due|subtotal|net\s*total)"
r"[^\d£$€]*([£$€]?\s*\d{1,6}[.,]\d{2})\b",
re.IGNORECASE,
)
all_amount_pat = re.compile(r"[£$€]?\s*(\d{1,6}[.,]\d{2})\b")
for m in total_line_pat.finditer(ocr_text):
raw = re.sub(r"[£$€\s]", "", m.group(1)).replace(",", ".")
try:
amount = float(raw)
break
except ValueError:
pass
if amount is None:
candidates = []
for m in all_amount_pat.finditer(ocr_text):
try:
candidates.append(float(m.group(1).replace(",", ".")))
except ValueError:
pass
if candidates:
amount = max(candidates)
# Date: try common formats
date = None
date_patterns = [
(r"\b(\d{4}[-/]\d{2}[-/]\d{2})\b", ["%Y-%m-%d", "%Y/%m/%d"]),
(r"\b(\d{2}[-/]\d{2}[-/]\d{4})\b", ["%d-%m-%Y", "%d/%m/%Y", "%m/%d/%Y"]),
(r"\b(\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{4})\b", ["%d %B %Y", "%d %b %Y"]),
(r"\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},?\s+\d{4})\b", ["%B %d, %Y", "%b %d, %Y"]),
]
for pattern, fmts in date_patterns:
m = re.search(pattern, ocr_text, re.IGNORECASE)
if m:
raw_date = m.group(1).rstrip(".")
for fmt in fmts:
try:
date = datetime.strptime(raw_date, fmt).strftime("%Y-%m-%d")
break
except ValueError:
pass
if date:
break
description = merchant # simple default
return {
"merchant": merchant,
"amount": amount,
"currency": currency,
"date": date,
"description": description,
"category": None,
"raw": None,
"ocr_text": ocr_text,
}
def _strip_code_fence(text: str) -> str:
if text.startswith("```"):
parts = text.split("```")
text = parts[1] if len(parts) > 1 else text
if text.startswith("json"):
text = text[4:]
return text.strip()
async def _call_ai_parse(file_bytes: bytes, mime_type: str, user_row) -> dict:
"""
Parse a receipt: OCR text extraction → AI (text prompt) → rule-based fallback.
AI is optional; rules always run as fallback if AI is unconfigured or fails.
"""
import json
import httpx
from app.core.security import decrypt_field
# Step 1: extract text via OCR / PDF text layer
ocr_text = _extract_ocr_text(file_bytes, mime_type)
has_ai = bool(user_row and user_row.ai_provider and user_row.ai_api_key_enc)
# Step 2: attempt AI parse if configured
if has_ai and ocr_text.strip():
api_key = decrypt_field(user_row.ai_api_key_enc)
custom_base_url = (user_row.ai_base_url or "").rstrip("/")
custom_model = (user_row.ai_model or "").strip()
prompt = _RECEIPT_TEXT_PROMPT.format(ocr_text=ocr_text)
try:
if user_row.ai_provider == "anthropic":
base_url = custom_base_url or "https://api.anthropic.com"
model = custom_model or "claude-haiku-4-5-20251001"
async with httpx.AsyncClient(timeout=60) as client:
resp = await client.post(
f"{base_url}/v1/messages",
headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json"},
json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]},
)
resp.raise_for_status()
raw = resp.json()["content"][0]["text"].strip()
elif user_row.ai_provider == "openai":
base_url = custom_base_url or "https://api.openai.com"
model = custom_model or "gpt-4o-mini"
async with httpx.AsyncClient(timeout=60) as client:
resp = await client.post(
f"{base_url}/v1/chat/completions",
headers={"Authorization": f"Bearer {api_key}", "content-type": "application/json"},
json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]},
)
resp.raise_for_status()
raw = resp.json()["choices"][0]["message"]["content"].strip()
else:
raw = None
if raw:
cleaned = _strip_code_fence(raw)
try:
parsed = json.loads(cleaned)
return {
"merchant": parsed.get("merchant"),
"amount": parsed.get("amount"),
"currency": parsed.get("currency"),
"date": parsed.get("date"),
"description": parsed.get("description"),
"category": parsed.get("category"),
"raw": raw,
"ocr_text": ocr_text,
}
except json.JSONDecodeError:
# AI returned something non-JSON — fall through to rules, keep raw for debug
pass
except (httpx.HTTPStatusError, httpx.RequestError):
pass # fall through to rule-based
# Step 3: rule-based fallback (also used when AI is not configured)
if ocr_text.strip():
return _rule_based_parse(ocr_text)
# Nothing worked
if has_ai:
raise HTTPException(status_code=400, detail="Could not extract any text from the file. Try a clearer image.")
raise HTTPException(status_code=400, detail="No AI configured and OCR extracted no text. Add an API key in Settings → AI or try a clearer image.")
@router.post("/parse-receipt")
async def parse_receipt_upload(
file: UploadFile = File(...),
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
"""Upload a receipt image and parse it with AI — no existing transaction required."""
from app.db.models.user import User as UserModel
settings = get_settings()
filename = file.filename or "upload"
ext = Path(filename).suffix.lower()
if ext not in ALLOWED_EXTENSIONS:
raise HTTPException(status_code=400, detail="Unsupported file type. Allowed: JPG, PNG, WebP, PDF")
content = await file.read(settings.max_attachment_bytes + 1)
if len(content) > settings.max_attachment_bytes:
raise HTTPException(status_code=413, detail="File too large (max 10 MB)")
import magic
mime_type = magic.from_buffer(content[:2048], mime=True)
if mime_type not in ALLOWED_MIME_TYPES:
raise HTTPException(status_code=400, detail="File content does not match an allowed type")
user_row = await db.get(UserModel, user.id)
return await _call_ai_parse(content, mime_type, user_row)
@router.post("/{txn_id}/attachments/{attachment_id}/parse")
async def parse_attachment(
txn_id: uuid.UUID,
attachment_id: str,
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
"""Parse an already-uploaded attachment with AI."""
from sqlalchemy import select
from app.db.models.transaction import Transaction as TxnModel
from app.db.models.user import User as UserModel
settings = get_settings()
user_row = await db.get(UserModel, user.id)
result = await db.execute(select(TxnModel).where(TxnModel.id == txn_id, TxnModel.user_id == user.id))
txn_row = result.scalar_one_or_none()
if not txn_row:
raise HTTPException(status_code=404, detail="Transaction not found")
ref = next((r for r in (txn_row.attachment_refs or []) if r["id"] == attachment_id), None)
if not ref:
raise HTTPException(status_code=404, detail="Attachment not found")
path = Path(settings.upload_dir) / str(user.id) / ref["stored_name"]
if not path.exists():
raise HTTPException(status_code=404, detail="Attachment file missing")
return await _call_ai_parse(path.read_bytes(), ref["mime_type"], user_row)
@router.post("/import")
async def import_transactions(
file: UploadFile = File(...),
account_id: uuid.UUID = Form(...),
date_col: str = Form(default="date"),
description_col: str = Form(default="description"),
amount_col: str = Form(default="amount"),
db: AsyncSession = Depends(get_db),
user=Depends(get_current_user),
):
if not file.filename or not file.filename.lower().endswith(".csv"):
raise HTTPException(status_code=400, detail="Only CSV files are supported")
content = await file.read(MAX_IMPORT_FILE_BYTES + 1)
if len(content) > MAX_IMPORT_FILE_BYTES:
raise HTTPException(status_code=413, detail="File too large (max 10 MB)")
try:
text = content.decode("utf-8-sig") # handle BOM
except UnicodeDecodeError:
text = content.decode("latin-1")
reader = csv.DictReader(io.StringIO(text))
rows = []
for row in reader:
if len(rows) >= MAX_IMPORT_ROWS:
raise HTTPException(status_code=400, detail=f"File contains too many rows (max {MAX_IMPORT_ROWS:,})")
mapped = {}
# Flexible column mapping
for key, col in [("date", date_col), ("description", description_col), ("amount", amount_col)]:
val = row.get(col) or row.get(col.lower()) or row.get(col.upper())
if val is not None:
mapped[key] = val.strip()
if "date" in mapped and "amount" in mapped:
mapped.setdefault("description", "Imported transaction")
rows.append(mapped)
if not rows:
raise HTTPException(status_code=400, detail="No valid rows found. Check column names.")
result = await import_csv(db, user.id, account_id, rows, user.base_currency)
await write_audit(db, user_id=user.id, action="import_data", metadata=result)
await db.commit()
return result
@router.get("/import/template")
async def import_template():
from fastapi.responses import Response
csv_content = "date,description,amount,merchant,notes\n2026-01-15,Tesco Groceries,-45.67,Tesco,\n2026-01-14,Salary,2500.00,Employer,January salary\n"
return Response(
content=csv_content,
media_type="text/csv",
headers={"Content-Disposition": "attachment; filename=import_template.csv"},
)