From 26e2a055db3690331af6719c33391c6efb0745c7 Mon Sep 17 00:00:00 2001 From: megaproxy Date: Wed, 22 Apr 2026 22:07:38 +0000 Subject: [PATCH] Add AI receipt scanning with OCR pipeline and debug toggle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - OCR pipeline: Tesseract (images) + pdfplumber (PDFs) → AI text prompt → rule-based regex fallback; works with any text model, not just vision models - Scan Receipt toolbar button parses a photo and pre-fills the transaction form; receipt image is automatically attached to the created transaction - AI settings page: provider, API key (AES-256-GCM encrypted), custom URL, model, and per-user debug toggle that gates the OCR/AI debug panel - Fix CSRF cookie secure=False so HTTP deployments work; add 7-day max_age - Fix attachment_refs missing from _to_response (attachments never appeared in UI) - Fix multipart boundary lost when Content-Type was set manually in axios calls - nginx: raise client_max_body_size to 15 MB, add 120s proxy timeout for OCR - Migration 0005: add ai_debug boolean to users table - Update README and CLAUDE.md with AI scanning docs and architecture notes Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 17 +- README.md | 30 ++ backend/Dockerfile | 3 + backend/alembic/versions/0005_ai_debug.py | 21 ++ backend/app/api/v1/settings.py | 5 + backend/app/api/v1/transactions.py | 277 +++++++++++++----- backend/app/core/middleware.py | 6 +- backend/app/db/models/user.py | 1 + backend/app/services/transaction_service.py | 1 + backend/pyproject.toml | 3 + frontend/nginx.conf | 5 + frontend/src/api/settings.ts | 8 +- frontend/src/api/transactions.ts | 5 +- frontend/src/pages/settings/SettingsPage.tsx | 25 ++ .../transactions/TransactionFormModal.tsx | 33 ++- .../pages/transactions/TransactionList.tsx | 56 ++-- 16 files changed, 397 insertions(+), 99 deletions(-) create mode 100644 backend/alembic/versions/0005_ai_debug.py diff --git a/CLAUDE.md b/CLAUDE.md index a7edd5d..599de25 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -70,7 +70,7 @@ backend/app/ dependencies.py — get_db, get_redis, get_current_user api/ router.py — Central router; investments/reports/budgets have no prefix (paths self-contained) - v1/ — One file per domain (auth, accounts, transactions, budgets, reports, investments, predictions) + v1/ — One file per domain (auth, accounts, transactions, budgets, reports, investments, predictions, settings) db/models/ — SQLAlchemy 2.0 Mapped models schemas/ — Pydantic request/response models (separate Create/Update/Response per domain) services/ — Business logic; each service owns one domain @@ -88,6 +88,21 @@ backend/app/ - Import deduplication via SHA-256 `import_hash` on transactions - Every mutation writes to `AuditLog` (append-only; app role has no UPDATE/DELETE on that table) - Soft deletes: `deleted_at` timestamp; all queries must filter `WHERE deleted_at IS NULL` +- `_to_response()` in `transaction_service.py` must include all fields returned to the frontend — omitting a field here makes it invisible to the UI even if it's in the DB + +### AI / receipt parsing (`api/v1/settings.py`, `api/v1/transactions.py`) +- User AI config (provider, encrypted API key, base URL, model, debug flag) lives on the `users` table; managed via `GET/PUT/DELETE /settings/ai` +- `ai_api_key_enc` is AES-256-GCM encrypted with `encrypt_field`/`decrypt_field` +- Receipt parsing pipeline in `_call_ai_parse()`: OCR text extraction (`_extract_ocr_text`) → AI text prompt → rule-based fallback (`_rule_based_parse`) + - Images: pytesseract; PDFs: pdfplumber (text layer) → pdf2image + tesseract (scanned fallback) + - AI receives OCR text, not the image — works with any text model, not just vision models + - `_RECEIPT_TEXT_PROMPT` uses `.format(ocr_text=...)` — escape literal braces in the JSON example with `{{` and `}}` +- `POST /transactions/parse-receipt` — scan without an existing transaction (used by "Scan Receipt" toolbar button) +- `POST /transactions/{id}/attachments/{att_id}/parse` — parse an already-uploaded attachment +- `ai_debug` boolean on user controls whether the OCR/AI debug panel shows in the transaction form; check `aiSettings?.debug` on the frontend via the `["ai-settings"]` query key + +### CSRF cookie +- Set with `secure=False` and `max_age=604800` (7 days) intentionally — the CSRF token is a public value readable by JS; `Secure` would break HTTP deployments. Session/auth cookies remain properly secured. ### Frontend layout ``` diff --git a/README.md b/README.md index 24cd41b..7632e44 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ Runs entirely on your own hardware via Docker Compose. Designed for LAN access w - Transfer detection between accounts - Recurring transaction rules (rrule) - Receipt and document attachments on transactions (JPEG, PNG, WebP, PDF — up to 10 MB each) +- **AI receipt scanning** — photograph a receipt to auto-extract merchant, amount, date, and description into a new transaction; receipt is automatically attached - CSV import with **auto-detection** for 10 UK bank formats: Monzo, Starling, Revolut, Barclays, Lloyds, NatWest, HSBC, Santander, Nationwide, and generic fallback - SHA-256 deduplication prevents re-importing the same transactions @@ -101,6 +102,7 @@ Ten independent security layers: | Database | PostgreSQL 16 with pgcrypto and RLS | | Cache / Sessions | Redis 7 | | ML | Prophet, statsmodels, NumPy, SciPy | +| OCR | Tesseract 5, pdfplumber, pdf2image | | Background jobs | APScheduler (in-process) | | Containerisation | Docker Compose | @@ -172,6 +174,34 @@ Forward your domain to `http://:4000`. The frontend nginx serves the React --- +## AI Receipt Scanning + +Receipt scanning uses OCR (Tesseract) to extract text from the image first, then optionally passes that text to an AI model to parse it into structured fields. This means **any text-capable LLM works** — you're not limited to vision models. + +If no AI is configured, or if the AI call fails, a rule-based parser runs on the OCR text as a fallback (finds totals, dates, and merchant names via regex). + +### Setup + +Go to **Settings → AI** and fill in: + +| Field | Description | +|-------|-------------| +| Provider | `Anthropic` or `OpenAI-compatible` | +| API Key | Your key (stored AES-256-GCM encrypted on your server) | +| Custom API URL | Optional — for Open WebUI, LM Studio, Ollama, etc. | +| Model | Optional — defaults to `claude-haiku-4-5-20251001` or `gpt-4o-mini` | +| Debug mode | Shows OCR text and raw AI response in the scan form when enabled | + +For **Open WebUI**: set the provider to `OpenAI-compatible`, enter `http://your-server:port` as the URL (MyMidas appends `/v1/chat/completions`), and enter the model name exactly as shown in Open WebUI's interface. + +Use the **Test connection** button to verify your settings before scanning. + +### Usage + +Click **Scan Receipt** in the transactions toolbar, select a photo or PDF. The form opens pre-filled with extracted fields — review and save. The receipt image is automatically attached to the created transaction. + +--- + ## Backups Encrypted backups run automatically every night at 3 AM (GPG AES-256 symmetric encryption). Backups are stored in `./data/backups/` and retained for 30 days. diff --git a/backend/Dockerfile b/backend/Dockerfile index ff79601..8a5d243 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -5,6 +5,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ gnupg \ gzip \ gosu \ + tesseract-ocr \ + tesseract-ocr-eng \ + poppler-utils \ && rm -rf /var/lib/apt/lists/* RUN pip install --no-cache-dir uv WORKDIR /app diff --git a/backend/alembic/versions/0005_ai_debug.py b/backend/alembic/versions/0005_ai_debug.py new file mode 100644 index 0000000..f3986c8 --- /dev/null +++ b/backend/alembic/versions/0005_ai_debug.py @@ -0,0 +1,21 @@ +"""add ai_debug flag to users + +Revision ID: 0005 +Revises: 0004 +Create Date: 2026-04-22 +""" +from alembic import op +import sqlalchemy as sa + +revision = "0005" +down_revision = "0004" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column("users", sa.Column("ai_debug", sa.Boolean(), nullable=False, server_default="false")) + + +def downgrade() -> None: + op.drop_column("users", "ai_debug") diff --git a/backend/app/api/v1/settings.py b/backend/app/api/v1/settings.py index df12884..aca8a75 100644 --- a/backend/app/api/v1/settings.py +++ b/backend/app/api/v1/settings.py @@ -22,6 +22,7 @@ class AiSettingsResponse(BaseModel): has_api_key: bool base_url: str | None model: str | None + debug: bool class AiSettingsSave(BaseModel): @@ -29,6 +30,7 @@ class AiSettingsSave(BaseModel): api_key: str = "" base_url: str = "" model: str = "" + debug: bool = False @router.get("/ai", response_model=AiSettingsResponse) @@ -38,6 +40,7 @@ async def get_ai_settings(user: User = Depends(get_current_user)): has_api_key=bool(user.ai_api_key_enc), base_url=user.ai_base_url, model=user.ai_model, + debug=user.ai_debug, ) @@ -54,6 +57,7 @@ async def save_ai_settings( "ai_provider": body.provider, "ai_base_url": body.base_url.rstrip("/") or None, "ai_model": body.model.strip() or None, + "ai_debug": body.debug, } if body.api_key.strip(): @@ -68,6 +72,7 @@ async def save_ai_settings( has_api_key=True, base_url=values["ai_base_url"], model=values["ai_model"], + debug=body.debug, ) diff --git a/backend/app/api/v1/transactions.py b/backend/app/api/v1/transactions.py index 1de9ada..5a931b4 100644 --- a/backend/app/api/v1/transactions.py +++ b/backend/app/api/v1/transactions.py @@ -278,93 +278,230 @@ async def delete_attachment( await db.commit() -_RECEIPT_PROMPT = ( - "You are a receipt parser. Extract information from this receipt and return ONLY a JSON object " - "with exactly these keys (use null for any field you cannot determine):\n" - '{"merchant": "store name", "amount": 0.00, "currency": "GBP", ' +_RECEIPT_TEXT_PROMPT = ( + "You are a receipt parser. Below is the raw text extracted from a receipt via OCR.\n\n" + "Receipt text:\n{ocr_text}\n\n" + "Extract the information and return ONLY a JSON object with exactly these keys " + "(use null for any field you cannot determine):\n" + '{{"merchant": "store name", "amount": 0.00, "currency": "GBP", ' '"date": "YYYY-MM-DD", "description": "brief description", ' - '"category": "one of: Food & Drink, Transport, Shopping, Entertainment, Health, Travel, Bills & Utilities, Other"}\n' + '"category": "one of: Food & Drink, Transport, Shopping, Entertainment, Health, Travel, Bills & Utilities, Other"}}\n' "Return ONLY the JSON object. No markdown, no explanation, no code fences." ) +_EMPTY_RESULT: dict = { + "merchant": None, "amount": None, "currency": None, + "date": None, "description": None, "category": None, + "raw": None, "ocr_text": None, +} + + +def _extract_ocr_text(file_bytes: bytes, mime_type: str) -> str: + """Extract text from an image or PDF. Returns empty string on failure.""" + if mime_type == "application/pdf": + import io + import pdfplumber + try: + with pdfplumber.open(io.BytesIO(file_bytes)) as pdf: + pages_text = [page.extract_text() or "" for page in pdf.pages[:4]] + text = "\n".join(pages_text).strip() + if text: + return text + except Exception: + pass + # Scanned PDF — convert first page to image then OCR + try: + from pdf2image import convert_from_bytes + import pytesseract + images = convert_from_bytes(file_bytes, first_page=1, last_page=1, dpi=200) + if images: + return pytesseract.image_to_string(images[0]) + except Exception: + pass + return "" + else: + import io + import pytesseract + from PIL import Image + try: + img = Image.open(io.BytesIO(file_bytes)) + return pytesseract.image_to_string(img) + except Exception: + return "" + + +def _rule_based_parse(ocr_text: str) -> dict: + """Extract receipt fields from OCR text using regex. Best-effort.""" + import re + from datetime import datetime + + lines = [ln.strip() for ln in ocr_text.splitlines() if ln.strip()] + + # Merchant: skip very short lines and lines that look like addresses/phone numbers + merchant = None + for ln in lines[:5]: + if len(ln) > 2 and not re.match(r"^[\d\s\-\+\(\)]+$", ln) and not re.match(r"^\d+\s+\w+", ln): + merchant = ln + break + + # Currency from symbols + currency = None + if "£" in ocr_text: + currency = "GBP" + elif "€" in ocr_text: + currency = "EUR" + elif "$" in ocr_text: + currency = "USD" + + # Amount: prefer lines containing total/amount keywords, then fall back to largest number + amount = None + total_line_pat = re.compile( + r"(?:total|amount\s*due|grand\s*total|balance\s*due|subtotal|net\s*total)" + r"[^\d£$€]*([£$€]?\s*\d{1,6}[.,]\d{2})\b", + re.IGNORECASE, + ) + all_amount_pat = re.compile(r"[£$€]?\s*(\d{1,6}[.,]\d{2})\b") + + for m in total_line_pat.finditer(ocr_text): + raw = re.sub(r"[£$€\s]", "", m.group(1)).replace(",", ".") + try: + amount = float(raw) + break + except ValueError: + pass + + if amount is None: + candidates = [] + for m in all_amount_pat.finditer(ocr_text): + try: + candidates.append(float(m.group(1).replace(",", "."))) + except ValueError: + pass + if candidates: + amount = max(candidates) + + # Date: try common formats + date = None + date_patterns = [ + (r"\b(\d{4}[-/]\d{2}[-/]\d{2})\b", ["%Y-%m-%d", "%Y/%m/%d"]), + (r"\b(\d{2}[-/]\d{2}[-/]\d{4})\b", ["%d-%m-%Y", "%d/%m/%Y", "%m/%d/%Y"]), + (r"\b(\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{4})\b", ["%d %B %Y", "%d %b %Y"]), + (r"\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},?\s+\d{4})\b", ["%B %d, %Y", "%b %d, %Y"]), + ] + for pattern, fmts in date_patterns: + m = re.search(pattern, ocr_text, re.IGNORECASE) + if m: + raw_date = m.group(1).rstrip(".") + for fmt in fmts: + try: + date = datetime.strptime(raw_date, fmt).strftime("%Y-%m-%d") + break + except ValueError: + pass + if date: + break + + description = merchant # simple default + + return { + "merchant": merchant, + "amount": amount, + "currency": currency, + "date": date, + "description": description, + "category": None, + "raw": None, + "ocr_text": ocr_text, + } + + +def _strip_code_fence(text: str) -> str: + if text.startswith("```"): + parts = text.split("```") + text = parts[1] if len(parts) > 1 else text + if text.startswith("json"): + text = text[4:] + return text.strip() + async def _call_ai_parse(file_bytes: bytes, mime_type: str, user_row) -> dict: - """Call the configured AI provider and return parsed receipt fields.""" - import base64 + """ + Parse a receipt: OCR text extraction → AI (text prompt) → rule-based fallback. + AI is optional; rules always run as fallback if AI is unconfigured or fails. + """ import json import httpx from app.core.security import decrypt_field - if not user_row.ai_provider or not user_row.ai_api_key_enc: - raise HTTPException(status_code=400, detail="No AI provider configured. Add your API key in Settings → AI.") + # Step 1: extract text via OCR / PDF text layer + ocr_text = _extract_ocr_text(file_bytes, mime_type) - api_key = decrypt_field(user_row.ai_api_key_enc) - b64 = base64.standard_b64encode(file_bytes).decode() - custom_base_url = (user_row.ai_base_url or "").rstrip("/") - custom_model = (user_row.ai_model or "").strip() + has_ai = bool(user_row and user_row.ai_provider and user_row.ai_api_key_enc) + + # Step 2: attempt AI parse if configured + if has_ai and ocr_text.strip(): + api_key = decrypt_field(user_row.ai_api_key_enc) + custom_base_url = (user_row.ai_base_url or "").rstrip("/") + custom_model = (user_row.ai_model or "").strip() + prompt = _RECEIPT_TEXT_PROMPT.format(ocr_text=ocr_text) + + try: + if user_row.ai_provider == "anthropic": + base_url = custom_base_url or "https://api.anthropic.com" + model = custom_model or "claude-haiku-4-5-20251001" + async with httpx.AsyncClient(timeout=60) as client: + resp = await client.post( + f"{base_url}/v1/messages", + headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json"}, + json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]}, + ) + resp.raise_for_status() + raw = resp.json()["content"][0]["text"].strip() + + elif user_row.ai_provider == "openai": + base_url = custom_base_url or "https://api.openai.com" + model = custom_model or "gpt-4o-mini" + async with httpx.AsyncClient(timeout=60) as client: + resp = await client.post( + f"{base_url}/v1/chat/completions", + headers={"Authorization": f"Bearer {api_key}", "content-type": "application/json"}, + json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]}, + ) + resp.raise_for_status() + raw = resp.json()["choices"][0]["message"]["content"].strip() - try: - if user_row.ai_provider == "anthropic": - base_url = custom_base_url or "https://api.anthropic.com" - model = custom_model or "claude-haiku-4-5-20251001" - if mime_type == "application/pdf": - content_block = {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": b64}} else: - content_block = {"type": "image", "source": {"type": "base64", "media_type": mime_type, "data": b64}} - async with httpx.AsyncClient(timeout=60) as client: - resp = await client.post( - f"{base_url}/v1/messages", - headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json"}, - json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": [content_block, {"type": "text", "text": _RECEIPT_PROMPT}]}]}, - ) - resp.raise_for_status() - text = resp.json()["content"][0]["text"].strip() + raw = None - elif user_row.ai_provider == "openai": - base_url = custom_base_url or "https://api.openai.com" - model = custom_model or "gpt-4o-mini" - if mime_type == "application/pdf" and not custom_base_url: - raise HTTPException(status_code=400, detail="PDF parsing is not supported with the OpenAI provider. Use an image format or switch to Anthropic.") - async with httpx.AsyncClient(timeout=60) as client: - resp = await client.post( - f"{base_url}/v1/chat/completions", - headers={"Authorization": f"Bearer {api_key}", "content-type": "application/json"}, - json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": [ - {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{b64}"}}, - {"type": "text", "text": _RECEIPT_PROMPT}, - ]}]}, - ) - resp.raise_for_status() - text = resp.json()["choices"][0]["message"]["content"].strip() + if raw: + cleaned = _strip_code_fence(raw) + try: + parsed = json.loads(cleaned) + return { + "merchant": parsed.get("merchant"), + "amount": parsed.get("amount"), + "currency": parsed.get("currency"), + "date": parsed.get("date"), + "description": parsed.get("description"), + "category": parsed.get("category"), + "raw": raw, + "ocr_text": ocr_text, + } + except json.JSONDecodeError: + # AI returned something non-JSON — fall through to rules, keep raw for debug + pass - else: - raise HTTPException(status_code=400, detail="Unknown provider") + except (httpx.HTTPStatusError, httpx.RequestError): + pass # fall through to rule-based - except httpx.HTTPStatusError as e: - raise HTTPException(status_code=502, detail=f"AI provider error: {e.response.status_code}") - except httpx.RequestError: - raise HTTPException(status_code=502, detail="Could not reach AI provider") + # Step 3: rule-based fallback (also used when AI is not configured) + if ocr_text.strip(): + return _rule_based_parse(ocr_text) - if text.startswith("```"): - text = text.split("```")[1] - if text.startswith("json"): - text = text[4:] - text = text.strip() - - try: - parsed = json.loads(text) - except json.JSONDecodeError: - raise HTTPException(status_code=502, detail="AI returned an unexpected response. Try again.") - - return { - "merchant": parsed.get("merchant"), - "amount": parsed.get("amount"), - "currency": parsed.get("currency"), - "date": parsed.get("date"), - "description": parsed.get("description"), - "category": parsed.get("category"), - "raw": text, - } + # Nothing worked + if has_ai: + raise HTTPException(status_code=400, detail="Could not extract any text from the file. Try a clearer image.") + raise HTTPException(status_code=400, detail="No AI configured and OCR extracted no text. Add an API key in Settings → AI or try a clearer image.") @router.post("/parse-receipt") diff --git a/backend/app/core/middleware.py b/backend/app/core/middleware.py index 7e80cb4..4a4dfea 100644 --- a/backend/app/core/middleware.py +++ b/backend/app/core/middleware.py @@ -7,7 +7,6 @@ from fastapi import Request, Response from starlette.middleware.base import BaseHTTPMiddleware from starlette.responses import JSONResponse -from app.config import get_settings SAFE_METHODS = {"GET", "HEAD", "OPTIONS"} @@ -57,7 +56,8 @@ class CSRFMiddleware(BaseHTTPMiddleware): "csrf_token", token, httponly=False, # must be readable by JS samesite="lax", - secure=not get_settings().is_development, + secure=False, # CSRF token is public by design; Secure would break HTTP deployments + max_age=604800, # 7 days — survive browser restarts ) return response @@ -65,7 +65,7 @@ class CSRFMiddleware(BaseHTTPMiddleware): response = await call_next(request) if not existing_csrf: token = str(uuid.uuid4()) - response.set_cookie("csrf_token", token, httponly=False, samesite="lax", secure=not get_settings().is_development) + response.set_cookie("csrf_token", token, httponly=False, samesite="lax", secure=False, max_age=604800) return response if request.url.path in {"/api/v1/auth/login", "/api/v1/auth/login/totp"}: diff --git a/backend/app/db/models/user.py b/backend/app/db/models/user.py index b5c6897..adf2b9d 100644 --- a/backend/app/db/models/user.py +++ b/backend/app/db/models/user.py @@ -33,6 +33,7 @@ class User(Base): ai_api_key_enc: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True) ai_base_url: Mapped[str | None] = mapped_column(Text, nullable=True) ai_model: Mapped[str | None] = mapped_column(Text, nullable=True) + ai_debug: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False) accounts: Mapped[list["Account"]] = relationship(back_populates="user", lazy="noload") # type: ignore[name-defined] sessions: Mapped[list["Session"]] = relationship(back_populates="user", lazy="noload") # type: ignore[name-defined] diff --git a/backend/app/services/transaction_service.py b/backend/app/services/transaction_service.py index 9408c66..ad2af19 100644 --- a/backend/app/services/transaction_service.py +++ b/backend/app/services/transaction_service.py @@ -47,6 +47,7 @@ def _to_response(t: Transaction) -> dict: "notes": _dec(t.notes_enc), "tags": t.tags or [], "is_recurring": t.is_recurring, + "attachment_refs": t.attachment_refs or [], "created_at": t.created_at, "updated_at": t.updated_at, } diff --git a/backend/pyproject.toml b/backend/pyproject.toml index afd4a55..aae74a9 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -31,6 +31,9 @@ dependencies = [ "structlog>=24.0", "pillow>=11.0", "python-magic>=0.4", + "pytesseract>=0.3", + "pdfplumber>=0.11", + "pdf2image>=1.17", "psycopg2-binary>=2.9", ] diff --git a/frontend/nginx.conf b/frontend/nginx.conf index 5ec54d3..684db12 100644 --- a/frontend/nginx.conf +++ b/frontend/nginx.conf @@ -3,6 +3,9 @@ server { root /usr/share/nginx/html; index index.html; + # Allow uploads up to 15 MB (receipt images can be several MB) + client_max_body_size 15m; + # Proxy API calls to the backend container location /api/ { proxy_pass http://backend:8000; @@ -10,6 +13,8 @@ server { proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 120s; # OCR + AI can take a few seconds + proxy_send_timeout 120s; } # All other routes → index.html (React SPA) diff --git a/frontend/src/api/settings.ts b/frontend/src/api/settings.ts index 1c23fd1..0c285c0 100644 --- a/frontend/src/api/settings.ts +++ b/frontend/src/api/settings.ts @@ -5,6 +5,7 @@ export interface AiSettings { has_api_key: boolean; base_url: string | null; model: string | null; + debug: boolean; } export interface AiSettingsSave { @@ -12,6 +13,7 @@ export interface AiSettingsSave { api_key?: string; base_url?: string; model?: string; + debug?: boolean; } export interface ParsedReceipt { @@ -22,6 +24,7 @@ export interface ParsedReceipt { description: string | null; category: string | null; raw: string | null; + ocr_text: string | null; } export async function getAiSettings(): Promise { @@ -51,8 +54,7 @@ export async function parseReceipt(txnId: string, attachmentId: string): Promise export async function parseReceiptFile(file: File): Promise { const form = new FormData(); form.append("file", file); - const { data } = await api.post("/transactions/parse-receipt", form, { - headers: { "Content-Type": "multipart/form-data" }, - }); + // Do NOT set Content-Type manually — axios sets it with the multipart boundary automatically + const { data } = await api.post("/transactions/parse-receipt", form); return data; } diff --git a/frontend/src/api/transactions.ts b/frontend/src/api/transactions.ts index e33c45d..b8ac710 100644 --- a/frontend/src/api/transactions.ts +++ b/frontend/src/api/transactions.ts @@ -113,9 +113,8 @@ export async function importCsv( export async function uploadAttachment(txnId: string, file: File): Promise { const form = new FormData(); form.append("file", file); - const res = await api.post(`/transactions/${txnId}/attachments`, form, { - headers: { "Content-Type": "multipart/form-data" }, - }); + // Do NOT set Content-Type manually — axios sets it with the multipart boundary automatically + const res = await api.post(`/transactions/${txnId}/attachments`, form); return res.data; } diff --git a/frontend/src/pages/settings/SettingsPage.tsx b/frontend/src/pages/settings/SettingsPage.tsx index 3844b2b..1ea76ad 100644 --- a/frontend/src/pages/settings/SettingsPage.tsx +++ b/frontend/src/pages/settings/SettingsPage.tsx @@ -660,6 +660,7 @@ function AiSection() { const [apiKey, setApiKey] = useState(""); const [baseUrl, setBaseUrl] = useState(""); const [model, setModel] = useState(""); + const [debug, setDebug] = useState(false); const [showKey, setShowKey] = useState(false); const [success, setSuccess] = useState(""); @@ -670,6 +671,7 @@ function AiSection() { if (d.provider) setProvider(d.provider); if (d.base_url) setBaseUrl(d.base_url); if (d.model) setModel(d.model); + setDebug(d.debug); return d; }, }); @@ -680,6 +682,7 @@ function AiSection() { api_key: apiKey, base_url: baseUrl, model, + debug, }), onSuccess: () => { qc.invalidateQueries({ queryKey: ["ai-settings"] }); @@ -800,6 +803,28 @@ function AiSection() {

+
+
+

Debug mode

+

Show OCR text and raw AI responses when scanning receipts

+
+ +
+
- {parsedFromReceipt && ( + {parsedFromReceipt && !showAiDebug && (
Fields pre-filled from receipt — review before saving
)} + {parsedFromReceipt && showAiDebug && ( +
+

+ AI scan result — review before saving +

+
+ Merchant{initialValues?.merchant ?? not detected} + Amount{initialValues?.amount != null ? initialValues.amount : not detected} + Date{initialValues?.date ?? not detected} + Currency{initialValues?.currency ?? not detected} + Description{initialValues?.description ?? not detected} +
+ {initialValues?.ocr_text && ( +
+ OCR extracted text +
{initialValues.ocr_text}
+
+ )} + {initialValues?.raw && ( +
+ Raw AI response +
{initialValues.raw}
+
+ )} +
+ )} {/* Type */}
diff --git a/frontend/src/pages/transactions/TransactionList.tsx b/frontend/src/pages/transactions/TransactionList.tsx index f381779..f2d044b 100644 --- a/frontend/src/pages/transactions/TransactionList.tsx +++ b/frontend/src/pages/transactions/TransactionList.tsx @@ -1,9 +1,9 @@ import { useRef, useState } from "react"; import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query"; -import { getTransactions, deleteTransaction, createTransaction, getCategories } from "@/api/transactions"; +import { getTransactions, deleteTransaction, createTransaction, getCategories, uploadAttachment } from "@/api/transactions"; import type { Transaction } from "@/api/transactions"; import { getAccounts } from "@/api/accounts"; -import { parseReceiptFile } from "@/api/settings"; +import { parseReceiptFile, getAiSettings } from "@/api/settings"; import type { ParsedReceipt } from "@/api/settings"; import { formatCurrency } from "@/utils/currency"; import { cn } from "@/utils/cn"; @@ -52,6 +52,7 @@ export default function TransactionList() { const [showForm, setShowForm] = useState(false); const [selectedTxn, setSelectedTxn] = useState(null); const [receiptParsed, setReceiptParsed] = useState(null); + const receiptFileRef = useRef(null); const [scanError, setScanError] = useState(null); const [scanning, setScanning] = useState(false); const receiptInputRef = useRef(null); @@ -80,6 +81,7 @@ export default function TransactionList() { const { data: accounts = [] } = useQuery({ queryKey: ["accounts"], queryFn: getAccounts }); const { data: categories = [] } = useQuery({ queryKey: ["categories"], queryFn: getCategories }); + const { data: aiSettings } = useQuery({ queryKey: ["ai-settings"], queryFn: getAiSettings }); const deleteMutation = useMutation({ mutationFn: deleteTransaction, @@ -89,30 +91,47 @@ export default function TransactionList() { }, }); - const createMutation = useMutation({ - mutationFn: createTransaction, - onSuccess: () => { + const createMutation = useMutation({ mutationFn: createTransaction }); + + async function handleCreateTransaction(data: any) { + try { + const txn = await createMutation.mutateAsync(data); + if (receiptFileRef.current) { + try { + await uploadAttachment(txn.id, receiptFileRef.current); + } catch (e: any) { + setScanError(`Transaction saved but receipt attachment failed: ${e?.response?.data?.detail ?? e?.message ?? "unknown error"}`); + } + receiptFileRef.current = null; + } qc.invalidateQueries({ queryKey: ["transactions"] }); qc.invalidateQueries({ queryKey: ["accounts"] }); setShowForm(false); setReceiptParsed(null); - }, - }); + } catch { + // Transaction creation failed — createMutation.error has the detail, form stays open + } + } async function handleReceiptFile(file: File) { setScanning(true); setScanError(null); try { const parsed = await parseReceiptFile(file); - const hasAnyField = parsed.merchant || parsed.amount || parsed.description || parsed.date; - if (!hasAnyField && parsed.raw) { - setScanError(`AI couldn't extract any fields. Raw response: "${parsed.raw}"`); - } else { - setReceiptParsed(parsed); - setShowForm(true); - } + // Always open the form — the modal shows a debug panel with what was/wasn't detected + setReceiptParsed(parsed); + receiptFileRef.current = file; + setShowForm(true); } catch (e: any) { - setScanError(e?.response?.data?.detail ?? "Could not parse receipt. Check your AI settings."); + const detail = e?.response?.data?.detail; + const status = e?.response?.status; + if (detail) { + setScanError(typeof detail === "string" ? detail : `HTTP ${status}: ${JSON.stringify(detail)}`); + } else if (status) { + setScanError(`Server error ${status} — check backend logs (docker compose logs backend).`); + } else { + setScanError(`Network error — backend may be unreachable. ${e?.message ?? ""}`); + } } finally { setScanning(false); if (receiptInputRef.current) receiptInputRef.current.value = ""; @@ -351,8 +370,8 @@ export default function TransactionList() { { setShowForm(false); setReceiptParsed(null); }} - onSubmit={(data) => createMutation.mutate(data)} + onClose={() => { setShowForm(false); setReceiptParsed(null); receiptFileRef.current = null; }} + onSubmit={handleCreateTransaction} isLoading={createMutation.isPending} initialValues={receiptParsed ? { description: receiptParsed.description ?? undefined, @@ -360,8 +379,11 @@ export default function TransactionList() { amount: receiptParsed.amount ?? undefined, date: receiptParsed.date ?? undefined, currency: receiptParsed.currency ?? undefined, + raw: receiptParsed.raw, + ocr_text: receiptParsed.ocr_text, } : undefined} parsedFromReceipt={!!receiptParsed} + showAiDebug={aiSettings?.debug ?? false} /> )}