From 26e2a055db3690331af6719c33391c6efb0745c7 Mon Sep 17 00:00:00 2001
From: megaproxy
Date: Wed, 22 Apr 2026 22:07:38 +0000
Subject: [PATCH] Add AI receipt scanning with OCR pipeline and debug toggle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- OCR pipeline: Tesseract (images) + pdfplumber (PDFs) → AI text prompt →
rule-based regex fallback; works with any text model, not just vision models
- Scan Receipt toolbar button parses a photo and pre-fills the transaction form;
receipt image is automatically attached to the created transaction
- AI settings page: provider, API key (AES-256-GCM encrypted), custom URL,
model, and per-user debug toggle that gates the OCR/AI debug panel
- Fix CSRF cookie secure=False so HTTP deployments work; add 7-day max_age
- Fix attachment_refs missing from _to_response (attachments never appeared in UI)
- Fix multipart boundary lost when Content-Type was set manually in axios calls
- nginx: raise client_max_body_size to 15 MB, add 120s proxy timeout for OCR
- Migration 0005: add ai_debug boolean to users table
- Update README and CLAUDE.md with AI scanning docs and architecture notes
Co-Authored-By: Claude Sonnet 4.6
---
CLAUDE.md | 17 +-
README.md | 30 ++
backend/Dockerfile | 3 +
backend/alembic/versions/0005_ai_debug.py | 21 ++
backend/app/api/v1/settings.py | 5 +
backend/app/api/v1/transactions.py | 277 +++++++++++++-----
backend/app/core/middleware.py | 6 +-
backend/app/db/models/user.py | 1 +
backend/app/services/transaction_service.py | 1 +
backend/pyproject.toml | 3 +
frontend/nginx.conf | 5 +
frontend/src/api/settings.ts | 8 +-
frontend/src/api/transactions.ts | 5 +-
frontend/src/pages/settings/SettingsPage.tsx | 25 ++
.../transactions/TransactionFormModal.tsx | 33 ++-
.../pages/transactions/TransactionList.tsx | 56 ++--
16 files changed, 397 insertions(+), 99 deletions(-)
create mode 100644 backend/alembic/versions/0005_ai_debug.py
diff --git a/CLAUDE.md b/CLAUDE.md
index a7edd5d..599de25 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -70,7 +70,7 @@ backend/app/
dependencies.py — get_db, get_redis, get_current_user
api/
router.py — Central router; investments/reports/budgets have no prefix (paths self-contained)
- v1/ — One file per domain (auth, accounts, transactions, budgets, reports, investments, predictions)
+ v1/ — One file per domain (auth, accounts, transactions, budgets, reports, investments, predictions, settings)
db/models/ — SQLAlchemy 2.0 Mapped models
schemas/ — Pydantic request/response models (separate Create/Update/Response per domain)
services/ — Business logic; each service owns one domain
@@ -88,6 +88,21 @@ backend/app/
- Import deduplication via SHA-256 `import_hash` on transactions
- Every mutation writes to `AuditLog` (append-only; app role has no UPDATE/DELETE on that table)
- Soft deletes: `deleted_at` timestamp; all queries must filter `WHERE deleted_at IS NULL`
+- `_to_response()` in `transaction_service.py` must include all fields returned to the frontend — omitting a field here makes it invisible to the UI even if it's in the DB
+
+### AI / receipt parsing (`api/v1/settings.py`, `api/v1/transactions.py`)
+- User AI config (provider, encrypted API key, base URL, model, debug flag) lives on the `users` table; managed via `GET/PUT/DELETE /settings/ai`
+- `ai_api_key_enc` is AES-256-GCM encrypted with `encrypt_field`/`decrypt_field`
+- Receipt parsing pipeline in `_call_ai_parse()`: OCR text extraction (`_extract_ocr_text`) → AI text prompt → rule-based fallback (`_rule_based_parse`)
+ - Images: pytesseract; PDFs: pdfplumber (text layer) → pdf2image + tesseract (scanned fallback)
+ - AI receives OCR text, not the image — works with any text model, not just vision models
+ - `_RECEIPT_TEXT_PROMPT` uses `.format(ocr_text=...)` — escape literal braces in the JSON example with `{{` and `}}`
+- `POST /transactions/parse-receipt` — scan without an existing transaction (used by "Scan Receipt" toolbar button)
+- `POST /transactions/{id}/attachments/{att_id}/parse` — parse an already-uploaded attachment
+- `ai_debug` boolean on user controls whether the OCR/AI debug panel shows in the transaction form; check `aiSettings?.debug` on the frontend via the `["ai-settings"]` query key
+
+### CSRF cookie
+- Set with `secure=False` and `max_age=604800` (7 days) intentionally — the CSRF token is a public value readable by JS; `Secure` would break HTTP deployments. Session/auth cookies remain properly secured.
### Frontend layout
```
diff --git a/README.md b/README.md
index 24cd41b..7632e44 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@ Runs entirely on your own hardware via Docker Compose. Designed for LAN access w
- Transfer detection between accounts
- Recurring transaction rules (rrule)
- Receipt and document attachments on transactions (JPEG, PNG, WebP, PDF — up to 10 MB each)
+- **AI receipt scanning** — photograph a receipt to auto-extract merchant, amount, date, and description into a new transaction; receipt is automatically attached
- CSV import with **auto-detection** for 10 UK bank formats: Monzo, Starling, Revolut, Barclays, Lloyds, NatWest, HSBC, Santander, Nationwide, and generic fallback
- SHA-256 deduplication prevents re-importing the same transactions
@@ -101,6 +102,7 @@ Ten independent security layers:
| Database | PostgreSQL 16 with pgcrypto and RLS |
| Cache / Sessions | Redis 7 |
| ML | Prophet, statsmodels, NumPy, SciPy |
+| OCR | Tesseract 5, pdfplumber, pdf2image |
| Background jobs | APScheduler (in-process) |
| Containerisation | Docker Compose |
@@ -172,6 +174,34 @@ Forward your domain to `http://:4000`. The frontend nginx serves the React
---
+## AI Receipt Scanning
+
+Receipt scanning uses OCR (Tesseract) to extract text from the image first, then optionally passes that text to an AI model to parse it into structured fields. This means **any text-capable LLM works** — you're not limited to vision models.
+
+If no AI is configured, or if the AI call fails, a rule-based parser runs on the OCR text as a fallback (finds totals, dates, and merchant names via regex).
+
+### Setup
+
+Go to **Settings → AI** and fill in:
+
+| Field | Description |
+|-------|-------------|
+| Provider | `Anthropic` or `OpenAI-compatible` |
+| API Key | Your key (stored AES-256-GCM encrypted on your server) |
+| Custom API URL | Optional — for Open WebUI, LM Studio, Ollama, etc. |
+| Model | Optional — defaults to `claude-haiku-4-5-20251001` or `gpt-4o-mini` |
+| Debug mode | Shows OCR text and raw AI response in the scan form when enabled |
+
+For **Open WebUI**: set the provider to `OpenAI-compatible`, enter `http://your-server:port` as the URL (MyMidas appends `/v1/chat/completions`), and enter the model name exactly as shown in Open WebUI's interface.
+
+Use the **Test connection** button to verify your settings before scanning.
+
+### Usage
+
+Click **Scan Receipt** in the transactions toolbar, select a photo or PDF. The form opens pre-filled with extracted fields — review and save. The receipt image is automatically attached to the created transaction.
+
+---
+
## Backups
Encrypted backups run automatically every night at 3 AM (GPG AES-256 symmetric encryption). Backups are stored in `./data/backups/` and retained for 30 days.
diff --git a/backend/Dockerfile b/backend/Dockerfile
index ff79601..8a5d243 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -5,6 +5,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
gnupg \
gzip \
gosu \
+ tesseract-ocr \
+ tesseract-ocr-eng \
+ poppler-utils \
&& rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir uv
WORKDIR /app
diff --git a/backend/alembic/versions/0005_ai_debug.py b/backend/alembic/versions/0005_ai_debug.py
new file mode 100644
index 0000000..f3986c8
--- /dev/null
+++ b/backend/alembic/versions/0005_ai_debug.py
@@ -0,0 +1,21 @@
+"""add ai_debug flag to users
+
+Revision ID: 0005
+Revises: 0004
+Create Date: 2026-04-22
+"""
+from alembic import op
+import sqlalchemy as sa
+
+revision = "0005"
+down_revision = "0004"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ op.add_column("users", sa.Column("ai_debug", sa.Boolean(), nullable=False, server_default="false"))
+
+
+def downgrade() -> None:
+ op.drop_column("users", "ai_debug")
diff --git a/backend/app/api/v1/settings.py b/backend/app/api/v1/settings.py
index df12884..aca8a75 100644
--- a/backend/app/api/v1/settings.py
+++ b/backend/app/api/v1/settings.py
@@ -22,6 +22,7 @@ class AiSettingsResponse(BaseModel):
has_api_key: bool
base_url: str | None
model: str | None
+ debug: bool
class AiSettingsSave(BaseModel):
@@ -29,6 +30,7 @@ class AiSettingsSave(BaseModel):
api_key: str = ""
base_url: str = ""
model: str = ""
+ debug: bool = False
@router.get("/ai", response_model=AiSettingsResponse)
@@ -38,6 +40,7 @@ async def get_ai_settings(user: User = Depends(get_current_user)):
has_api_key=bool(user.ai_api_key_enc),
base_url=user.ai_base_url,
model=user.ai_model,
+ debug=user.ai_debug,
)
@@ -54,6 +57,7 @@ async def save_ai_settings(
"ai_provider": body.provider,
"ai_base_url": body.base_url.rstrip("/") or None,
"ai_model": body.model.strip() or None,
+ "ai_debug": body.debug,
}
if body.api_key.strip():
@@ -68,6 +72,7 @@ async def save_ai_settings(
has_api_key=True,
base_url=values["ai_base_url"],
model=values["ai_model"],
+ debug=body.debug,
)
diff --git a/backend/app/api/v1/transactions.py b/backend/app/api/v1/transactions.py
index 1de9ada..5a931b4 100644
--- a/backend/app/api/v1/transactions.py
+++ b/backend/app/api/v1/transactions.py
@@ -278,93 +278,230 @@ async def delete_attachment(
await db.commit()
-_RECEIPT_PROMPT = (
- "You are a receipt parser. Extract information from this receipt and return ONLY a JSON object "
- "with exactly these keys (use null for any field you cannot determine):\n"
- '{"merchant": "store name", "amount": 0.00, "currency": "GBP", '
+_RECEIPT_TEXT_PROMPT = (
+ "You are a receipt parser. Below is the raw text extracted from a receipt via OCR.\n\n"
+ "Receipt text:\n{ocr_text}\n\n"
+ "Extract the information and return ONLY a JSON object with exactly these keys "
+ "(use null for any field you cannot determine):\n"
+ '{{"merchant": "store name", "amount": 0.00, "currency": "GBP", '
'"date": "YYYY-MM-DD", "description": "brief description", '
- '"category": "one of: Food & Drink, Transport, Shopping, Entertainment, Health, Travel, Bills & Utilities, Other"}\n'
+ '"category": "one of: Food & Drink, Transport, Shopping, Entertainment, Health, Travel, Bills & Utilities, Other"}}\n'
"Return ONLY the JSON object. No markdown, no explanation, no code fences."
)
+_EMPTY_RESULT: dict = {
+ "merchant": None, "amount": None, "currency": None,
+ "date": None, "description": None, "category": None,
+ "raw": None, "ocr_text": None,
+}
+
+
+def _extract_ocr_text(file_bytes: bytes, mime_type: str) -> str:
+ """Extract text from an image or PDF. Returns empty string on failure."""
+ if mime_type == "application/pdf":
+ import io
+ import pdfplumber
+ try:
+ with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
+ pages_text = [page.extract_text() or "" for page in pdf.pages[:4]]
+ text = "\n".join(pages_text).strip()
+ if text:
+ return text
+ except Exception:
+ pass
+ # Scanned PDF — convert first page to image then OCR
+ try:
+ from pdf2image import convert_from_bytes
+ import pytesseract
+ images = convert_from_bytes(file_bytes, first_page=1, last_page=1, dpi=200)
+ if images:
+ return pytesseract.image_to_string(images[0])
+ except Exception:
+ pass
+ return ""
+ else:
+ import io
+ import pytesseract
+ from PIL import Image
+ try:
+ img = Image.open(io.BytesIO(file_bytes))
+ return pytesseract.image_to_string(img)
+ except Exception:
+ return ""
+
+
+def _rule_based_parse(ocr_text: str) -> dict:
+ """Extract receipt fields from OCR text using regex. Best-effort."""
+ import re
+ from datetime import datetime
+
+ lines = [ln.strip() for ln in ocr_text.splitlines() if ln.strip()]
+
+ # Merchant: skip very short lines and lines that look like addresses/phone numbers
+ merchant = None
+ for ln in lines[:5]:
+ if len(ln) > 2 and not re.match(r"^[\d\s\-\+\(\)]+$", ln) and not re.match(r"^\d+\s+\w+", ln):
+ merchant = ln
+ break
+
+ # Currency from symbols
+ currency = None
+ if "£" in ocr_text:
+ currency = "GBP"
+ elif "€" in ocr_text:
+ currency = "EUR"
+ elif "$" in ocr_text:
+ currency = "USD"
+
+ # Amount: prefer lines containing total/amount keywords, then fall back to largest number
+ amount = None
+ total_line_pat = re.compile(
+ r"(?:total|amount\s*due|grand\s*total|balance\s*due|subtotal|net\s*total)"
+ r"[^\d£$€]*([£$€]?\s*\d{1,6}[.,]\d{2})\b",
+ re.IGNORECASE,
+ )
+ all_amount_pat = re.compile(r"[£$€]?\s*(\d{1,6}[.,]\d{2})\b")
+
+ for m in total_line_pat.finditer(ocr_text):
+ raw = re.sub(r"[£$€\s]", "", m.group(1)).replace(",", ".")
+ try:
+ amount = float(raw)
+ break
+ except ValueError:
+ pass
+
+ if amount is None:
+ candidates = []
+ for m in all_amount_pat.finditer(ocr_text):
+ try:
+ candidates.append(float(m.group(1).replace(",", ".")))
+ except ValueError:
+ pass
+ if candidates:
+ amount = max(candidates)
+
+ # Date: try common formats
+ date = None
+ date_patterns = [
+ (r"\b(\d{4}[-/]\d{2}[-/]\d{2})\b", ["%Y-%m-%d", "%Y/%m/%d"]),
+ (r"\b(\d{2}[-/]\d{2}[-/]\d{4})\b", ["%d-%m-%Y", "%d/%m/%Y", "%m/%d/%Y"]),
+ (r"\b(\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{4})\b", ["%d %B %Y", "%d %b %Y"]),
+ (r"\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},?\s+\d{4})\b", ["%B %d, %Y", "%b %d, %Y"]),
+ ]
+ for pattern, fmts in date_patterns:
+ m = re.search(pattern, ocr_text, re.IGNORECASE)
+ if m:
+ raw_date = m.group(1).rstrip(".")
+ for fmt in fmts:
+ try:
+ date = datetime.strptime(raw_date, fmt).strftime("%Y-%m-%d")
+ break
+ except ValueError:
+ pass
+ if date:
+ break
+
+ description = merchant # simple default
+
+ return {
+ "merchant": merchant,
+ "amount": amount,
+ "currency": currency,
+ "date": date,
+ "description": description,
+ "category": None,
+ "raw": None,
+ "ocr_text": ocr_text,
+ }
+
+
+def _strip_code_fence(text: str) -> str:
+ if text.startswith("```"):
+ parts = text.split("```")
+ text = parts[1] if len(parts) > 1 else text
+ if text.startswith("json"):
+ text = text[4:]
+ return text.strip()
+
async def _call_ai_parse(file_bytes: bytes, mime_type: str, user_row) -> dict:
- """Call the configured AI provider and return parsed receipt fields."""
- import base64
+ """
+ Parse a receipt: OCR text extraction → AI (text prompt) → rule-based fallback.
+ AI is optional; rules always run as fallback if AI is unconfigured or fails.
+ """
import json
import httpx
from app.core.security import decrypt_field
- if not user_row.ai_provider or not user_row.ai_api_key_enc:
- raise HTTPException(status_code=400, detail="No AI provider configured. Add your API key in Settings → AI.")
+ # Step 1: extract text via OCR / PDF text layer
+ ocr_text = _extract_ocr_text(file_bytes, mime_type)
- api_key = decrypt_field(user_row.ai_api_key_enc)
- b64 = base64.standard_b64encode(file_bytes).decode()
- custom_base_url = (user_row.ai_base_url or "").rstrip("/")
- custom_model = (user_row.ai_model or "").strip()
+ has_ai = bool(user_row and user_row.ai_provider and user_row.ai_api_key_enc)
+
+ # Step 2: attempt AI parse if configured
+ if has_ai and ocr_text.strip():
+ api_key = decrypt_field(user_row.ai_api_key_enc)
+ custom_base_url = (user_row.ai_base_url or "").rstrip("/")
+ custom_model = (user_row.ai_model or "").strip()
+ prompt = _RECEIPT_TEXT_PROMPT.format(ocr_text=ocr_text)
+
+ try:
+ if user_row.ai_provider == "anthropic":
+ base_url = custom_base_url or "https://api.anthropic.com"
+ model = custom_model or "claude-haiku-4-5-20251001"
+ async with httpx.AsyncClient(timeout=60) as client:
+ resp = await client.post(
+ f"{base_url}/v1/messages",
+ headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json"},
+ json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]},
+ )
+ resp.raise_for_status()
+ raw = resp.json()["content"][0]["text"].strip()
+
+ elif user_row.ai_provider == "openai":
+ base_url = custom_base_url or "https://api.openai.com"
+ model = custom_model or "gpt-4o-mini"
+ async with httpx.AsyncClient(timeout=60) as client:
+ resp = await client.post(
+ f"{base_url}/v1/chat/completions",
+ headers={"Authorization": f"Bearer {api_key}", "content-type": "application/json"},
+ json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]},
+ )
+ resp.raise_for_status()
+ raw = resp.json()["choices"][0]["message"]["content"].strip()
- try:
- if user_row.ai_provider == "anthropic":
- base_url = custom_base_url or "https://api.anthropic.com"
- model = custom_model or "claude-haiku-4-5-20251001"
- if mime_type == "application/pdf":
- content_block = {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": b64}}
else:
- content_block = {"type": "image", "source": {"type": "base64", "media_type": mime_type, "data": b64}}
- async with httpx.AsyncClient(timeout=60) as client:
- resp = await client.post(
- f"{base_url}/v1/messages",
- headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json"},
- json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": [content_block, {"type": "text", "text": _RECEIPT_PROMPT}]}]},
- )
- resp.raise_for_status()
- text = resp.json()["content"][0]["text"].strip()
+ raw = None
- elif user_row.ai_provider == "openai":
- base_url = custom_base_url or "https://api.openai.com"
- model = custom_model or "gpt-4o-mini"
- if mime_type == "application/pdf" and not custom_base_url:
- raise HTTPException(status_code=400, detail="PDF parsing is not supported with the OpenAI provider. Use an image format or switch to Anthropic.")
- async with httpx.AsyncClient(timeout=60) as client:
- resp = await client.post(
- f"{base_url}/v1/chat/completions",
- headers={"Authorization": f"Bearer {api_key}", "content-type": "application/json"},
- json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": [
- {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{b64}"}},
- {"type": "text", "text": _RECEIPT_PROMPT},
- ]}]},
- )
- resp.raise_for_status()
- text = resp.json()["choices"][0]["message"]["content"].strip()
+ if raw:
+ cleaned = _strip_code_fence(raw)
+ try:
+ parsed = json.loads(cleaned)
+ return {
+ "merchant": parsed.get("merchant"),
+ "amount": parsed.get("amount"),
+ "currency": parsed.get("currency"),
+ "date": parsed.get("date"),
+ "description": parsed.get("description"),
+ "category": parsed.get("category"),
+ "raw": raw,
+ "ocr_text": ocr_text,
+ }
+ except json.JSONDecodeError:
+ # AI returned something non-JSON — fall through to rules, keep raw for debug
+ pass
- else:
- raise HTTPException(status_code=400, detail="Unknown provider")
+ except (httpx.HTTPStatusError, httpx.RequestError):
+ pass # fall through to rule-based
- except httpx.HTTPStatusError as e:
- raise HTTPException(status_code=502, detail=f"AI provider error: {e.response.status_code}")
- except httpx.RequestError:
- raise HTTPException(status_code=502, detail="Could not reach AI provider")
+ # Step 3: rule-based fallback (also used when AI is not configured)
+ if ocr_text.strip():
+ return _rule_based_parse(ocr_text)
- if text.startswith("```"):
- text = text.split("```")[1]
- if text.startswith("json"):
- text = text[4:]
- text = text.strip()
-
- try:
- parsed = json.loads(text)
- except json.JSONDecodeError:
- raise HTTPException(status_code=502, detail="AI returned an unexpected response. Try again.")
-
- return {
- "merchant": parsed.get("merchant"),
- "amount": parsed.get("amount"),
- "currency": parsed.get("currency"),
- "date": parsed.get("date"),
- "description": parsed.get("description"),
- "category": parsed.get("category"),
- "raw": text,
- }
+ # Nothing worked
+ if has_ai:
+ raise HTTPException(status_code=400, detail="Could not extract any text from the file. Try a clearer image.")
+ raise HTTPException(status_code=400, detail="No AI configured and OCR extracted no text. Add an API key in Settings → AI or try a clearer image.")
@router.post("/parse-receipt")
diff --git a/backend/app/core/middleware.py b/backend/app/core/middleware.py
index 7e80cb4..4a4dfea 100644
--- a/backend/app/core/middleware.py
+++ b/backend/app/core/middleware.py
@@ -7,7 +7,6 @@ from fastapi import Request, Response
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
-from app.config import get_settings
SAFE_METHODS = {"GET", "HEAD", "OPTIONS"}
@@ -57,7 +56,8 @@ class CSRFMiddleware(BaseHTTPMiddleware):
"csrf_token", token,
httponly=False, # must be readable by JS
samesite="lax",
- secure=not get_settings().is_development,
+ secure=False, # CSRF token is public by design; Secure would break HTTP deployments
+ max_age=604800, # 7 days — survive browser restarts
)
return response
@@ -65,7 +65,7 @@ class CSRFMiddleware(BaseHTTPMiddleware):
response = await call_next(request)
if not existing_csrf:
token = str(uuid.uuid4())
- response.set_cookie("csrf_token", token, httponly=False, samesite="lax", secure=not get_settings().is_development)
+ response.set_cookie("csrf_token", token, httponly=False, samesite="lax", secure=False, max_age=604800)
return response
if request.url.path in {"/api/v1/auth/login", "/api/v1/auth/login/totp"}:
diff --git a/backend/app/db/models/user.py b/backend/app/db/models/user.py
index b5c6897..adf2b9d 100644
--- a/backend/app/db/models/user.py
+++ b/backend/app/db/models/user.py
@@ -33,6 +33,7 @@ class User(Base):
ai_api_key_enc: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
ai_base_url: Mapped[str | None] = mapped_column(Text, nullable=True)
ai_model: Mapped[str | None] = mapped_column(Text, nullable=True)
+ ai_debug: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
accounts: Mapped[list["Account"]] = relationship(back_populates="user", lazy="noload") # type: ignore[name-defined]
sessions: Mapped[list["Session"]] = relationship(back_populates="user", lazy="noload") # type: ignore[name-defined]
diff --git a/backend/app/services/transaction_service.py b/backend/app/services/transaction_service.py
index 9408c66..ad2af19 100644
--- a/backend/app/services/transaction_service.py
+++ b/backend/app/services/transaction_service.py
@@ -47,6 +47,7 @@ def _to_response(t: Transaction) -> dict:
"notes": _dec(t.notes_enc),
"tags": t.tags or [],
"is_recurring": t.is_recurring,
+ "attachment_refs": t.attachment_refs or [],
"created_at": t.created_at,
"updated_at": t.updated_at,
}
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index afd4a55..aae74a9 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -31,6 +31,9 @@ dependencies = [
"structlog>=24.0",
"pillow>=11.0",
"python-magic>=0.4",
+ "pytesseract>=0.3",
+ "pdfplumber>=0.11",
+ "pdf2image>=1.17",
"psycopg2-binary>=2.9",
]
diff --git a/frontend/nginx.conf b/frontend/nginx.conf
index 5ec54d3..684db12 100644
--- a/frontend/nginx.conf
+++ b/frontend/nginx.conf
@@ -3,6 +3,9 @@ server {
root /usr/share/nginx/html;
index index.html;
+ # Allow uploads up to 15 MB (receipt images can be several MB)
+ client_max_body_size 15m;
+
# Proxy API calls to the backend container
location /api/ {
proxy_pass http://backend:8000;
@@ -10,6 +13,8 @@ server {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
+ proxy_read_timeout 120s; # OCR + AI can take a few seconds
+ proxy_send_timeout 120s;
}
# All other routes → index.html (React SPA)
diff --git a/frontend/src/api/settings.ts b/frontend/src/api/settings.ts
index 1c23fd1..0c285c0 100644
--- a/frontend/src/api/settings.ts
+++ b/frontend/src/api/settings.ts
@@ -5,6 +5,7 @@ export interface AiSettings {
has_api_key: boolean;
base_url: string | null;
model: string | null;
+ debug: boolean;
}
export interface AiSettingsSave {
@@ -12,6 +13,7 @@ export interface AiSettingsSave {
api_key?: string;
base_url?: string;
model?: string;
+ debug?: boolean;
}
export interface ParsedReceipt {
@@ -22,6 +24,7 @@ export interface ParsedReceipt {
description: string | null;
category: string | null;
raw: string | null;
+ ocr_text: string | null;
}
export async function getAiSettings(): Promise {
@@ -51,8 +54,7 @@ export async function parseReceipt(txnId: string, attachmentId: string): Promise
export async function parseReceiptFile(file: File): Promise {
const form = new FormData();
form.append("file", file);
- const { data } = await api.post("/transactions/parse-receipt", form, {
- headers: { "Content-Type": "multipart/form-data" },
- });
+ // Do NOT set Content-Type manually — axios sets it with the multipart boundary automatically
+ const { data } = await api.post("/transactions/parse-receipt", form);
return data;
}
diff --git a/frontend/src/api/transactions.ts b/frontend/src/api/transactions.ts
index e33c45d..b8ac710 100644
--- a/frontend/src/api/transactions.ts
+++ b/frontend/src/api/transactions.ts
@@ -113,9 +113,8 @@ export async function importCsv(
export async function uploadAttachment(txnId: string, file: File): Promise {
const form = new FormData();
form.append("file", file);
- const res = await api.post(`/transactions/${txnId}/attachments`, form, {
- headers: { "Content-Type": "multipart/form-data" },
- });
+ // Do NOT set Content-Type manually — axios sets it with the multipart boundary automatically
+ const res = await api.post(`/transactions/${txnId}/attachments`, form);
return res.data;
}
diff --git a/frontend/src/pages/settings/SettingsPage.tsx b/frontend/src/pages/settings/SettingsPage.tsx
index 3844b2b..1ea76ad 100644
--- a/frontend/src/pages/settings/SettingsPage.tsx
+++ b/frontend/src/pages/settings/SettingsPage.tsx
@@ -660,6 +660,7 @@ function AiSection() {
const [apiKey, setApiKey] = useState("");
const [baseUrl, setBaseUrl] = useState("");
const [model, setModel] = useState("");
+ const [debug, setDebug] = useState(false);
const [showKey, setShowKey] = useState(false);
const [success, setSuccess] = useState("");
@@ -670,6 +671,7 @@ function AiSection() {
if (d.provider) setProvider(d.provider);
if (d.base_url) setBaseUrl(d.base_url);
if (d.model) setModel(d.model);
+ setDebug(d.debug);
return d;
},
});
@@ -680,6 +682,7 @@ function AiSection() {
api_key: apiKey,
base_url: baseUrl,
model,
+ debug,
}),
onSuccess: () => {
qc.invalidateQueries({ queryKey: ["ai-settings"] });
@@ -800,6 +803,28 @@ function AiSection() {
+
+
+
Debug mode
+
Show OCR text and raw AI responses when scanning receipts
+
+
+
+