- OCR pipeline: Tesseract (images) + pdfplumber (PDFs) → AI text prompt → rule-based regex fallback; works with any text model, not just vision models - Scan Receipt toolbar button parses a photo and pre-fills the transaction form; receipt image is automatically attached to the created transaction - AI settings page: provider, API key (AES-256-GCM encrypted), custom URL, model, and per-user debug toggle that gates the OCR/AI debug panel - Fix CSRF cookie secure=False so HTTP deployments work; add 7-day max_age - Fix attachment_refs missing from _to_response (attachments never appeared in UI) - Fix multipart boundary lost when Content-Type was set manually in axios calls - nginx: raise client_max_body_size to 15 MB, add 120s proxy timeout for OCR - Migration 0005: add ai_debug boolean to users table - Update README and CLAUDE.md with AI scanning docs and architecture notes Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
619 lines
22 KiB
Python
619 lines
22 KiB
Python
import csv
|
|
import io
|
|
import mimetypes
|
|
import os
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Annotated
|
|
|
|
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
|
|
from fastapi.responses import FileResponse
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.config import get_settings
|
|
from app.core.audit import write_audit
|
|
from app.dependencies import get_current_user, get_db
|
|
from app.schemas.transaction import TransactionCreate, TransactionFilter, TransactionUpdate
|
|
from app.services.transaction_service import (
|
|
TransactionError,
|
|
create_transaction,
|
|
delete_transaction,
|
|
get_transaction,
|
|
import_csv,
|
|
list_transactions,
|
|
update_transaction,
|
|
_to_response,
|
|
)
|
|
|
|
MAX_IMPORT_FILE_BYTES = 10 * 1024 * 1024 # 10 MB
|
|
MAX_IMPORT_ROWS = 50_000
|
|
|
|
ALLOWED_MIME_TYPES = {
|
|
"image/jpeg",
|
|
"image/png",
|
|
"image/webp",
|
|
"application/pdf",
|
|
}
|
|
ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp", ".pdf"}
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("")
|
|
async def get_transactions(
|
|
account_id: uuid.UUID | None = None,
|
|
category_id: uuid.UUID | None = None,
|
|
type: str | None = None,
|
|
status: str | None = None,
|
|
date_from: str | None = None,
|
|
date_to: str | None = None,
|
|
search: str | None = None,
|
|
is_recurring: bool | None = None,
|
|
page: int = Query(default=1, ge=1),
|
|
page_size: int = Query(default=50, ge=1, le=200),
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
from datetime import date
|
|
filters = TransactionFilter(
|
|
account_id=account_id,
|
|
category_id=category_id,
|
|
type=type,
|
|
status=status,
|
|
date_from=date.fromisoformat(date_from) if date_from else None,
|
|
date_to=date.fromisoformat(date_to) if date_to else None,
|
|
search=search,
|
|
is_recurring=is_recurring,
|
|
page=page,
|
|
page_size=page_size,
|
|
)
|
|
return await list_transactions(db, user.id, filters)
|
|
|
|
|
|
@router.post("", status_code=201)
|
|
async def create(
|
|
body: TransactionCreate,
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
try:
|
|
result = await create_transaction(db, user.id, body, user.base_currency)
|
|
await write_audit(db, user_id=user.id, action="transaction_create")
|
|
await db.commit()
|
|
return result
|
|
except TransactionError as e:
|
|
raise HTTPException(status_code=e.status_code, detail=e.detail)
|
|
|
|
|
|
@router.get("/{txn_id}")
|
|
async def get_one(
|
|
txn_id: uuid.UUID,
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
try:
|
|
txn = await get_transaction(db, txn_id, user.id)
|
|
return _to_response(txn)
|
|
except TransactionError as e:
|
|
raise HTTPException(status_code=e.status_code, detail=e.detail)
|
|
|
|
|
|
@router.put("/{txn_id}")
|
|
async def update(
|
|
txn_id: uuid.UUID,
|
|
body: TransactionUpdate,
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
try:
|
|
result = await update_transaction(db, txn_id, user.id, body, user.base_currency)
|
|
await write_audit(db, user_id=user.id, action="transaction_update", resource_type="transaction", resource_id=txn_id)
|
|
await db.commit()
|
|
return result
|
|
except TransactionError as e:
|
|
raise HTTPException(status_code=e.status_code, detail=e.detail)
|
|
|
|
|
|
@router.delete("/{txn_id}", status_code=204)
|
|
async def delete(
|
|
txn_id: uuid.UUID,
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
try:
|
|
await delete_transaction(db, txn_id, user.id)
|
|
await write_audit(db, user_id=user.id, action="transaction_delete", resource_type="transaction", resource_id=txn_id)
|
|
await db.commit()
|
|
except TransactionError as e:
|
|
raise HTTPException(status_code=e.status_code, detail=e.detail)
|
|
|
|
|
|
@router.post("/{txn_id}/attachments")
|
|
async def upload_attachment(
|
|
txn_id: uuid.UUID,
|
|
file: UploadFile = File(...),
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
settings = get_settings()
|
|
|
|
# Validate extension
|
|
filename = file.filename or "upload"
|
|
ext = Path(filename).suffix.lower()
|
|
if ext not in ALLOWED_EXTENSIONS:
|
|
raise HTTPException(status_code=400, detail="Unsupported file type. Allowed: JPG, PNG, WebP, PDF")
|
|
|
|
# Verify transaction ownership
|
|
try:
|
|
txn = await get_transaction(db, txn_id, user.id)
|
|
except TransactionError as e:
|
|
raise HTTPException(status_code=e.status_code, detail=e.detail)
|
|
|
|
current_refs: list = txn.get("attachment_refs", []) if isinstance(txn, dict) else []
|
|
# Fetch raw model for JSONB mutation
|
|
from sqlalchemy import select
|
|
from app.db.models.transaction import Transaction as TxnModel
|
|
result = await db.execute(
|
|
select(TxnModel).where(TxnModel.id == txn_id, TxnModel.user_id == user.id)
|
|
)
|
|
txn_row = result.scalar_one_or_none()
|
|
if not txn_row:
|
|
raise HTTPException(status_code=404, detail="Transaction not found")
|
|
|
|
current_refs = list(txn_row.attachment_refs or [])
|
|
if len(current_refs) >= settings.max_attachments_per_txn:
|
|
raise HTTPException(status_code=400, detail=f"Maximum {settings.max_attachments_per_txn} attachments per transaction")
|
|
|
|
# Read and size-check
|
|
content = await file.read(settings.max_attachment_bytes + 1)
|
|
if len(content) > settings.max_attachment_bytes:
|
|
raise HTTPException(status_code=413, detail="File too large (max 10 MB)")
|
|
|
|
# Sniff MIME from content
|
|
import magic # python-magic
|
|
detected_mime = magic.from_buffer(content[:2048], mime=True)
|
|
if detected_mime not in ALLOWED_MIME_TYPES:
|
|
raise HTTPException(status_code=400, detail="File content does not match an allowed type (JPEG, PNG, WebP, PDF)")
|
|
|
|
# Store file
|
|
attachment_id = str(uuid.uuid4())
|
|
user_upload_dir = Path(settings.upload_dir) / str(user.id)
|
|
user_upload_dir.mkdir(parents=True, exist_ok=True)
|
|
stored_name = f"{attachment_id}{ext}"
|
|
stored_path = user_upload_dir / stored_name
|
|
stored_path.write_bytes(content)
|
|
|
|
# Update attachment_refs
|
|
ref = {
|
|
"id": attachment_id,
|
|
"filename": filename,
|
|
"mime_type": detected_mime,
|
|
"size": len(content),
|
|
"stored_name": stored_name,
|
|
}
|
|
from sqlalchemy import update as sql_update
|
|
import copy
|
|
new_refs = copy.copy(current_refs)
|
|
new_refs.append(ref)
|
|
await db.execute(
|
|
sql_update(TxnModel)
|
|
.where(TxnModel.id == txn_id)
|
|
.values(attachment_refs=new_refs)
|
|
)
|
|
await write_audit(db, user_id=user.id, action="transaction_update", resource_type="transaction", resource_id=txn_id, metadata={"attachment_added": attachment_id})
|
|
await db.commit()
|
|
return ref
|
|
|
|
|
|
@router.get("/{txn_id}/attachments/{attachment_id}")
|
|
async def download_attachment(
|
|
txn_id: uuid.UUID,
|
|
attachment_id: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
settings = get_settings()
|
|
|
|
from sqlalchemy import select
|
|
from app.db.models.transaction import Transaction as TxnModel
|
|
result = await db.execute(
|
|
select(TxnModel).where(TxnModel.id == txn_id, TxnModel.user_id == user.id)
|
|
)
|
|
txn_row = result.scalar_one_or_none()
|
|
if not txn_row:
|
|
raise HTTPException(status_code=404, detail="Transaction not found")
|
|
|
|
ref = next((r for r in (txn_row.attachment_refs or []) if r["id"] == attachment_id), None)
|
|
if not ref:
|
|
raise HTTPException(status_code=404, detail="Attachment not found")
|
|
|
|
path = Path(settings.upload_dir) / str(user.id) / ref["stored_name"]
|
|
if not path.exists():
|
|
raise HTTPException(status_code=404, detail="Attachment file missing")
|
|
|
|
return FileResponse(
|
|
path=str(path),
|
|
media_type=ref["mime_type"],
|
|
filename=ref["filename"],
|
|
)
|
|
|
|
|
|
@router.delete("/{txn_id}/attachments/{attachment_id}", status_code=204)
|
|
async def delete_attachment(
|
|
txn_id: uuid.UUID,
|
|
attachment_id: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
settings = get_settings()
|
|
|
|
from sqlalchemy import select, update as sql_update
|
|
from app.db.models.transaction import Transaction as TxnModel
|
|
result = await db.execute(
|
|
select(TxnModel).where(TxnModel.id == txn_id, TxnModel.user_id == user.id)
|
|
)
|
|
txn_row = result.scalar_one_or_none()
|
|
if not txn_row:
|
|
raise HTTPException(status_code=404, detail="Transaction not found")
|
|
|
|
refs = list(txn_row.attachment_refs or [])
|
|
ref = next((r for r in refs if r["id"] == attachment_id), None)
|
|
if not ref:
|
|
raise HTTPException(status_code=404, detail="Attachment not found")
|
|
|
|
# Delete file
|
|
path = Path(settings.upload_dir) / str(user.id) / ref["stored_name"]
|
|
try:
|
|
path.unlink(missing_ok=True)
|
|
except OSError:
|
|
pass
|
|
|
|
new_refs = [r for r in refs if r["id"] != attachment_id]
|
|
await db.execute(
|
|
sql_update(TxnModel)
|
|
.where(TxnModel.id == txn_id)
|
|
.values(attachment_refs=new_refs)
|
|
)
|
|
await write_audit(db, user_id=user.id, action="transaction_update", resource_type="transaction", resource_id=txn_id, metadata={"attachment_deleted": attachment_id})
|
|
await db.commit()
|
|
|
|
|
|
_RECEIPT_TEXT_PROMPT = (
|
|
"You are a receipt parser. Below is the raw text extracted from a receipt via OCR.\n\n"
|
|
"Receipt text:\n{ocr_text}\n\n"
|
|
"Extract the information and return ONLY a JSON object with exactly these keys "
|
|
"(use null for any field you cannot determine):\n"
|
|
'{{"merchant": "store name", "amount": 0.00, "currency": "GBP", '
|
|
'"date": "YYYY-MM-DD", "description": "brief description", '
|
|
'"category": "one of: Food & Drink, Transport, Shopping, Entertainment, Health, Travel, Bills & Utilities, Other"}}\n'
|
|
"Return ONLY the JSON object. No markdown, no explanation, no code fences."
|
|
)
|
|
|
|
_EMPTY_RESULT: dict = {
|
|
"merchant": None, "amount": None, "currency": None,
|
|
"date": None, "description": None, "category": None,
|
|
"raw": None, "ocr_text": None,
|
|
}
|
|
|
|
|
|
def _extract_ocr_text(file_bytes: bytes, mime_type: str) -> str:
|
|
"""Extract text from an image or PDF. Returns empty string on failure."""
|
|
if mime_type == "application/pdf":
|
|
import io
|
|
import pdfplumber
|
|
try:
|
|
with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
|
|
pages_text = [page.extract_text() or "" for page in pdf.pages[:4]]
|
|
text = "\n".join(pages_text).strip()
|
|
if text:
|
|
return text
|
|
except Exception:
|
|
pass
|
|
# Scanned PDF — convert first page to image then OCR
|
|
try:
|
|
from pdf2image import convert_from_bytes
|
|
import pytesseract
|
|
images = convert_from_bytes(file_bytes, first_page=1, last_page=1, dpi=200)
|
|
if images:
|
|
return pytesseract.image_to_string(images[0])
|
|
except Exception:
|
|
pass
|
|
return ""
|
|
else:
|
|
import io
|
|
import pytesseract
|
|
from PIL import Image
|
|
try:
|
|
img = Image.open(io.BytesIO(file_bytes))
|
|
return pytesseract.image_to_string(img)
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def _rule_based_parse(ocr_text: str) -> dict:
|
|
"""Extract receipt fields from OCR text using regex. Best-effort."""
|
|
import re
|
|
from datetime import datetime
|
|
|
|
lines = [ln.strip() for ln in ocr_text.splitlines() if ln.strip()]
|
|
|
|
# Merchant: skip very short lines and lines that look like addresses/phone numbers
|
|
merchant = None
|
|
for ln in lines[:5]:
|
|
if len(ln) > 2 and not re.match(r"^[\d\s\-\+\(\)]+$", ln) and not re.match(r"^\d+\s+\w+", ln):
|
|
merchant = ln
|
|
break
|
|
|
|
# Currency from symbols
|
|
currency = None
|
|
if "£" in ocr_text:
|
|
currency = "GBP"
|
|
elif "€" in ocr_text:
|
|
currency = "EUR"
|
|
elif "$" in ocr_text:
|
|
currency = "USD"
|
|
|
|
# Amount: prefer lines containing total/amount keywords, then fall back to largest number
|
|
amount = None
|
|
total_line_pat = re.compile(
|
|
r"(?:total|amount\s*due|grand\s*total|balance\s*due|subtotal|net\s*total)"
|
|
r"[^\d£$€]*([£$€]?\s*\d{1,6}[.,]\d{2})\b",
|
|
re.IGNORECASE,
|
|
)
|
|
all_amount_pat = re.compile(r"[£$€]?\s*(\d{1,6}[.,]\d{2})\b")
|
|
|
|
for m in total_line_pat.finditer(ocr_text):
|
|
raw = re.sub(r"[£$€\s]", "", m.group(1)).replace(",", ".")
|
|
try:
|
|
amount = float(raw)
|
|
break
|
|
except ValueError:
|
|
pass
|
|
|
|
if amount is None:
|
|
candidates = []
|
|
for m in all_amount_pat.finditer(ocr_text):
|
|
try:
|
|
candidates.append(float(m.group(1).replace(",", ".")))
|
|
except ValueError:
|
|
pass
|
|
if candidates:
|
|
amount = max(candidates)
|
|
|
|
# Date: try common formats
|
|
date = None
|
|
date_patterns = [
|
|
(r"\b(\d{4}[-/]\d{2}[-/]\d{2})\b", ["%Y-%m-%d", "%Y/%m/%d"]),
|
|
(r"\b(\d{2}[-/]\d{2}[-/]\d{4})\b", ["%d-%m-%Y", "%d/%m/%Y", "%m/%d/%Y"]),
|
|
(r"\b(\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{4})\b", ["%d %B %Y", "%d %b %Y"]),
|
|
(r"\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},?\s+\d{4})\b", ["%B %d, %Y", "%b %d, %Y"]),
|
|
]
|
|
for pattern, fmts in date_patterns:
|
|
m = re.search(pattern, ocr_text, re.IGNORECASE)
|
|
if m:
|
|
raw_date = m.group(1).rstrip(".")
|
|
for fmt in fmts:
|
|
try:
|
|
date = datetime.strptime(raw_date, fmt).strftime("%Y-%m-%d")
|
|
break
|
|
except ValueError:
|
|
pass
|
|
if date:
|
|
break
|
|
|
|
description = merchant # simple default
|
|
|
|
return {
|
|
"merchant": merchant,
|
|
"amount": amount,
|
|
"currency": currency,
|
|
"date": date,
|
|
"description": description,
|
|
"category": None,
|
|
"raw": None,
|
|
"ocr_text": ocr_text,
|
|
}
|
|
|
|
|
|
def _strip_code_fence(text: str) -> str:
|
|
if text.startswith("```"):
|
|
parts = text.split("```")
|
|
text = parts[1] if len(parts) > 1 else text
|
|
if text.startswith("json"):
|
|
text = text[4:]
|
|
return text.strip()
|
|
|
|
|
|
async def _call_ai_parse(file_bytes: bytes, mime_type: str, user_row) -> dict:
|
|
"""
|
|
Parse a receipt: OCR text extraction → AI (text prompt) → rule-based fallback.
|
|
AI is optional; rules always run as fallback if AI is unconfigured or fails.
|
|
"""
|
|
import json
|
|
import httpx
|
|
from app.core.security import decrypt_field
|
|
|
|
# Step 1: extract text via OCR / PDF text layer
|
|
ocr_text = _extract_ocr_text(file_bytes, mime_type)
|
|
|
|
has_ai = bool(user_row and user_row.ai_provider and user_row.ai_api_key_enc)
|
|
|
|
# Step 2: attempt AI parse if configured
|
|
if has_ai and ocr_text.strip():
|
|
api_key = decrypt_field(user_row.ai_api_key_enc)
|
|
custom_base_url = (user_row.ai_base_url or "").rstrip("/")
|
|
custom_model = (user_row.ai_model or "").strip()
|
|
prompt = _RECEIPT_TEXT_PROMPT.format(ocr_text=ocr_text)
|
|
|
|
try:
|
|
if user_row.ai_provider == "anthropic":
|
|
base_url = custom_base_url or "https://api.anthropic.com"
|
|
model = custom_model or "claude-haiku-4-5-20251001"
|
|
async with httpx.AsyncClient(timeout=60) as client:
|
|
resp = await client.post(
|
|
f"{base_url}/v1/messages",
|
|
headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json"},
|
|
json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]},
|
|
)
|
|
resp.raise_for_status()
|
|
raw = resp.json()["content"][0]["text"].strip()
|
|
|
|
elif user_row.ai_provider == "openai":
|
|
base_url = custom_base_url or "https://api.openai.com"
|
|
model = custom_model or "gpt-4o-mini"
|
|
async with httpx.AsyncClient(timeout=60) as client:
|
|
resp = await client.post(
|
|
f"{base_url}/v1/chat/completions",
|
|
headers={"Authorization": f"Bearer {api_key}", "content-type": "application/json"},
|
|
json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]},
|
|
)
|
|
resp.raise_for_status()
|
|
raw = resp.json()["choices"][0]["message"]["content"].strip()
|
|
|
|
else:
|
|
raw = None
|
|
|
|
if raw:
|
|
cleaned = _strip_code_fence(raw)
|
|
try:
|
|
parsed = json.loads(cleaned)
|
|
return {
|
|
"merchant": parsed.get("merchant"),
|
|
"amount": parsed.get("amount"),
|
|
"currency": parsed.get("currency"),
|
|
"date": parsed.get("date"),
|
|
"description": parsed.get("description"),
|
|
"category": parsed.get("category"),
|
|
"raw": raw,
|
|
"ocr_text": ocr_text,
|
|
}
|
|
except json.JSONDecodeError:
|
|
# AI returned something non-JSON — fall through to rules, keep raw for debug
|
|
pass
|
|
|
|
except (httpx.HTTPStatusError, httpx.RequestError):
|
|
pass # fall through to rule-based
|
|
|
|
# Step 3: rule-based fallback (also used when AI is not configured)
|
|
if ocr_text.strip():
|
|
return _rule_based_parse(ocr_text)
|
|
|
|
# Nothing worked
|
|
if has_ai:
|
|
raise HTTPException(status_code=400, detail="Could not extract any text from the file. Try a clearer image.")
|
|
raise HTTPException(status_code=400, detail="No AI configured and OCR extracted no text. Add an API key in Settings → AI or try a clearer image.")
|
|
|
|
|
|
@router.post("/parse-receipt")
|
|
async def parse_receipt_upload(
|
|
file: UploadFile = File(...),
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
"""Upload a receipt image and parse it with AI — no existing transaction required."""
|
|
from app.db.models.user import User as UserModel
|
|
|
|
settings = get_settings()
|
|
filename = file.filename or "upload"
|
|
ext = Path(filename).suffix.lower()
|
|
if ext not in ALLOWED_EXTENSIONS:
|
|
raise HTTPException(status_code=400, detail="Unsupported file type. Allowed: JPG, PNG, WebP, PDF")
|
|
|
|
content = await file.read(settings.max_attachment_bytes + 1)
|
|
if len(content) > settings.max_attachment_bytes:
|
|
raise HTTPException(status_code=413, detail="File too large (max 10 MB)")
|
|
|
|
import magic
|
|
mime_type = magic.from_buffer(content[:2048], mime=True)
|
|
if mime_type not in ALLOWED_MIME_TYPES:
|
|
raise HTTPException(status_code=400, detail="File content does not match an allowed type")
|
|
|
|
user_row = await db.get(UserModel, user.id)
|
|
return await _call_ai_parse(content, mime_type, user_row)
|
|
|
|
|
|
@router.post("/{txn_id}/attachments/{attachment_id}/parse")
|
|
async def parse_attachment(
|
|
txn_id: uuid.UUID,
|
|
attachment_id: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
"""Parse an already-uploaded attachment with AI."""
|
|
from sqlalchemy import select
|
|
from app.db.models.transaction import Transaction as TxnModel
|
|
from app.db.models.user import User as UserModel
|
|
|
|
settings = get_settings()
|
|
user_row = await db.get(UserModel, user.id)
|
|
|
|
result = await db.execute(select(TxnModel).where(TxnModel.id == txn_id, TxnModel.user_id == user.id))
|
|
txn_row = result.scalar_one_or_none()
|
|
if not txn_row:
|
|
raise HTTPException(status_code=404, detail="Transaction not found")
|
|
|
|
ref = next((r for r in (txn_row.attachment_refs or []) if r["id"] == attachment_id), None)
|
|
if not ref:
|
|
raise HTTPException(status_code=404, detail="Attachment not found")
|
|
|
|
path = Path(settings.upload_dir) / str(user.id) / ref["stored_name"]
|
|
if not path.exists():
|
|
raise HTTPException(status_code=404, detail="Attachment file missing")
|
|
|
|
return await _call_ai_parse(path.read_bytes(), ref["mime_type"], user_row)
|
|
|
|
|
|
@router.post("/import")
|
|
async def import_transactions(
|
|
file: UploadFile = File(...),
|
|
account_id: uuid.UUID = Form(...),
|
|
date_col: str = Form(default="date"),
|
|
description_col: str = Form(default="description"),
|
|
amount_col: str = Form(default="amount"),
|
|
db: AsyncSession = Depends(get_db),
|
|
user=Depends(get_current_user),
|
|
):
|
|
if not file.filename or not file.filename.lower().endswith(".csv"):
|
|
raise HTTPException(status_code=400, detail="Only CSV files are supported")
|
|
|
|
content = await file.read(MAX_IMPORT_FILE_BYTES + 1)
|
|
if len(content) > MAX_IMPORT_FILE_BYTES:
|
|
raise HTTPException(status_code=413, detail="File too large (max 10 MB)")
|
|
try:
|
|
text = content.decode("utf-8-sig") # handle BOM
|
|
except UnicodeDecodeError:
|
|
text = content.decode("latin-1")
|
|
|
|
reader = csv.DictReader(io.StringIO(text))
|
|
rows = []
|
|
for row in reader:
|
|
if len(rows) >= MAX_IMPORT_ROWS:
|
|
raise HTTPException(status_code=400, detail=f"File contains too many rows (max {MAX_IMPORT_ROWS:,})")
|
|
mapped = {}
|
|
# Flexible column mapping
|
|
for key, col in [("date", date_col), ("description", description_col), ("amount", amount_col)]:
|
|
val = row.get(col) or row.get(col.lower()) or row.get(col.upper())
|
|
if val is not None:
|
|
mapped[key] = val.strip()
|
|
if "date" in mapped and "amount" in mapped:
|
|
mapped.setdefault("description", "Imported transaction")
|
|
rows.append(mapped)
|
|
|
|
if not rows:
|
|
raise HTTPException(status_code=400, detail="No valid rows found. Check column names.")
|
|
|
|
result = await import_csv(db, user.id, account_id, rows, user.base_currency)
|
|
await write_audit(db, user_id=user.id, action="import_data", metadata=result)
|
|
await db.commit()
|
|
return result
|
|
|
|
|
|
@router.get("/import/template")
|
|
async def import_template():
|
|
from fastapi.responses import Response
|
|
csv_content = "date,description,amount,merchant,notes\n2026-01-15,Tesco Groceries,-45.67,Tesco,\n2026-01-14,Salary,2500.00,Employer,January salary\n"
|
|
return Response(
|
|
content=csv_content,
|
|
media_type="text/csv",
|
|
headers={"Content-Disposition": "attachment; filename=import_template.csv"},
|
|
)
|