Add AI receipt scanning with OCR pipeline and debug toggle
- OCR pipeline: Tesseract (images) + pdfplumber (PDFs) → AI text prompt → rule-based regex fallback; works with any text model, not just vision models - Scan Receipt toolbar button parses a photo and pre-fills the transaction form; receipt image is automatically attached to the created transaction - AI settings page: provider, API key (AES-256-GCM encrypted), custom URL, model, and per-user debug toggle that gates the OCR/AI debug panel - Fix CSRF cookie secure=False so HTTP deployments work; add 7-day max_age - Fix attachment_refs missing from _to_response (attachments never appeared in UI) - Fix multipart boundary lost when Content-Type was set manually in axios calls - nginx: raise client_max_body_size to 15 MB, add 120s proxy timeout for OCR - Migration 0005: add ai_debug boolean to users table - Update README and CLAUDE.md with AI scanning docs and architecture notes Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a7c54ca61c
commit
26e2a055db
16 changed files with 397 additions and 99 deletions
|
|
@ -278,93 +278,230 @@ async def delete_attachment(
|
|||
await db.commit()
|
||||
|
||||
|
||||
_RECEIPT_PROMPT = (
|
||||
"You are a receipt parser. Extract information from this receipt and return ONLY a JSON object "
|
||||
"with exactly these keys (use null for any field you cannot determine):\n"
|
||||
'{"merchant": "store name", "amount": 0.00, "currency": "GBP", '
|
||||
_RECEIPT_TEXT_PROMPT = (
|
||||
"You are a receipt parser. Below is the raw text extracted from a receipt via OCR.\n\n"
|
||||
"Receipt text:\n{ocr_text}\n\n"
|
||||
"Extract the information and return ONLY a JSON object with exactly these keys "
|
||||
"(use null for any field you cannot determine):\n"
|
||||
'{{"merchant": "store name", "amount": 0.00, "currency": "GBP", '
|
||||
'"date": "YYYY-MM-DD", "description": "brief description", '
|
||||
'"category": "one of: Food & Drink, Transport, Shopping, Entertainment, Health, Travel, Bills & Utilities, Other"}\n'
|
||||
'"category": "one of: Food & Drink, Transport, Shopping, Entertainment, Health, Travel, Bills & Utilities, Other"}}\n'
|
||||
"Return ONLY the JSON object. No markdown, no explanation, no code fences."
|
||||
)
|
||||
|
||||
_EMPTY_RESULT: dict = {
|
||||
"merchant": None, "amount": None, "currency": None,
|
||||
"date": None, "description": None, "category": None,
|
||||
"raw": None, "ocr_text": None,
|
||||
}
|
||||
|
||||
|
||||
def _extract_ocr_text(file_bytes: bytes, mime_type: str) -> str:
|
||||
"""Extract text from an image or PDF. Returns empty string on failure."""
|
||||
if mime_type == "application/pdf":
|
||||
import io
|
||||
import pdfplumber
|
||||
try:
|
||||
with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
|
||||
pages_text = [page.extract_text() or "" for page in pdf.pages[:4]]
|
||||
text = "\n".join(pages_text).strip()
|
||||
if text:
|
||||
return text
|
||||
except Exception:
|
||||
pass
|
||||
# Scanned PDF — convert first page to image then OCR
|
||||
try:
|
||||
from pdf2image import convert_from_bytes
|
||||
import pytesseract
|
||||
images = convert_from_bytes(file_bytes, first_page=1, last_page=1, dpi=200)
|
||||
if images:
|
||||
return pytesseract.image_to_string(images[0])
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
else:
|
||||
import io
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
try:
|
||||
img = Image.open(io.BytesIO(file_bytes))
|
||||
return pytesseract.image_to_string(img)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _rule_based_parse(ocr_text: str) -> dict:
|
||||
"""Extract receipt fields from OCR text using regex. Best-effort."""
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
lines = [ln.strip() for ln in ocr_text.splitlines() if ln.strip()]
|
||||
|
||||
# Merchant: skip very short lines and lines that look like addresses/phone numbers
|
||||
merchant = None
|
||||
for ln in lines[:5]:
|
||||
if len(ln) > 2 and not re.match(r"^[\d\s\-\+\(\)]+$", ln) and not re.match(r"^\d+\s+\w+", ln):
|
||||
merchant = ln
|
||||
break
|
||||
|
||||
# Currency from symbols
|
||||
currency = None
|
||||
if "£" in ocr_text:
|
||||
currency = "GBP"
|
||||
elif "€" in ocr_text:
|
||||
currency = "EUR"
|
||||
elif "$" in ocr_text:
|
||||
currency = "USD"
|
||||
|
||||
# Amount: prefer lines containing total/amount keywords, then fall back to largest number
|
||||
amount = None
|
||||
total_line_pat = re.compile(
|
||||
r"(?:total|amount\s*due|grand\s*total|balance\s*due|subtotal|net\s*total)"
|
||||
r"[^\d£$€]*([£$€]?\s*\d{1,6}[.,]\d{2})\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
all_amount_pat = re.compile(r"[£$€]?\s*(\d{1,6}[.,]\d{2})\b")
|
||||
|
||||
for m in total_line_pat.finditer(ocr_text):
|
||||
raw = re.sub(r"[£$€\s]", "", m.group(1)).replace(",", ".")
|
||||
try:
|
||||
amount = float(raw)
|
||||
break
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if amount is None:
|
||||
candidates = []
|
||||
for m in all_amount_pat.finditer(ocr_text):
|
||||
try:
|
||||
candidates.append(float(m.group(1).replace(",", ".")))
|
||||
except ValueError:
|
||||
pass
|
||||
if candidates:
|
||||
amount = max(candidates)
|
||||
|
||||
# Date: try common formats
|
||||
date = None
|
||||
date_patterns = [
|
||||
(r"\b(\d{4}[-/]\d{2}[-/]\d{2})\b", ["%Y-%m-%d", "%Y/%m/%d"]),
|
||||
(r"\b(\d{2}[-/]\d{2}[-/]\d{4})\b", ["%d-%m-%Y", "%d/%m/%Y", "%m/%d/%Y"]),
|
||||
(r"\b(\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{4})\b", ["%d %B %Y", "%d %b %Y"]),
|
||||
(r"\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},?\s+\d{4})\b", ["%B %d, %Y", "%b %d, %Y"]),
|
||||
]
|
||||
for pattern, fmts in date_patterns:
|
||||
m = re.search(pattern, ocr_text, re.IGNORECASE)
|
||||
if m:
|
||||
raw_date = m.group(1).rstrip(".")
|
||||
for fmt in fmts:
|
||||
try:
|
||||
date = datetime.strptime(raw_date, fmt).strftime("%Y-%m-%d")
|
||||
break
|
||||
except ValueError:
|
||||
pass
|
||||
if date:
|
||||
break
|
||||
|
||||
description = merchant # simple default
|
||||
|
||||
return {
|
||||
"merchant": merchant,
|
||||
"amount": amount,
|
||||
"currency": currency,
|
||||
"date": date,
|
||||
"description": description,
|
||||
"category": None,
|
||||
"raw": None,
|
||||
"ocr_text": ocr_text,
|
||||
}
|
||||
|
||||
|
||||
def _strip_code_fence(text: str) -> str:
|
||||
if text.startswith("```"):
|
||||
parts = text.split("```")
|
||||
text = parts[1] if len(parts) > 1 else text
|
||||
if text.startswith("json"):
|
||||
text = text[4:]
|
||||
return text.strip()
|
||||
|
||||
|
||||
async def _call_ai_parse(file_bytes: bytes, mime_type: str, user_row) -> dict:
|
||||
"""Call the configured AI provider and return parsed receipt fields."""
|
||||
import base64
|
||||
"""
|
||||
Parse a receipt: OCR text extraction → AI (text prompt) → rule-based fallback.
|
||||
AI is optional; rules always run as fallback if AI is unconfigured or fails.
|
||||
"""
|
||||
import json
|
||||
import httpx
|
||||
from app.core.security import decrypt_field
|
||||
|
||||
if not user_row.ai_provider or not user_row.ai_api_key_enc:
|
||||
raise HTTPException(status_code=400, detail="No AI provider configured. Add your API key in Settings → AI.")
|
||||
# Step 1: extract text via OCR / PDF text layer
|
||||
ocr_text = _extract_ocr_text(file_bytes, mime_type)
|
||||
|
||||
api_key = decrypt_field(user_row.ai_api_key_enc)
|
||||
b64 = base64.standard_b64encode(file_bytes).decode()
|
||||
custom_base_url = (user_row.ai_base_url or "").rstrip("/")
|
||||
custom_model = (user_row.ai_model or "").strip()
|
||||
has_ai = bool(user_row and user_row.ai_provider and user_row.ai_api_key_enc)
|
||||
|
||||
# Step 2: attempt AI parse if configured
|
||||
if has_ai and ocr_text.strip():
|
||||
api_key = decrypt_field(user_row.ai_api_key_enc)
|
||||
custom_base_url = (user_row.ai_base_url or "").rstrip("/")
|
||||
custom_model = (user_row.ai_model or "").strip()
|
||||
prompt = _RECEIPT_TEXT_PROMPT.format(ocr_text=ocr_text)
|
||||
|
||||
try:
|
||||
if user_row.ai_provider == "anthropic":
|
||||
base_url = custom_base_url or "https://api.anthropic.com"
|
||||
model = custom_model or "claude-haiku-4-5-20251001"
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
resp = await client.post(
|
||||
f"{base_url}/v1/messages",
|
||||
headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json"},
|
||||
json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
raw = resp.json()["content"][0]["text"].strip()
|
||||
|
||||
elif user_row.ai_provider == "openai":
|
||||
base_url = custom_base_url or "https://api.openai.com"
|
||||
model = custom_model or "gpt-4o-mini"
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
resp = await client.post(
|
||||
f"{base_url}/v1/chat/completions",
|
||||
headers={"Authorization": f"Bearer {api_key}", "content-type": "application/json"},
|
||||
json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": prompt}]},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
raw = resp.json()["choices"][0]["message"]["content"].strip()
|
||||
|
||||
try:
|
||||
if user_row.ai_provider == "anthropic":
|
||||
base_url = custom_base_url or "https://api.anthropic.com"
|
||||
model = custom_model or "claude-haiku-4-5-20251001"
|
||||
if mime_type == "application/pdf":
|
||||
content_block = {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": b64}}
|
||||
else:
|
||||
content_block = {"type": "image", "source": {"type": "base64", "media_type": mime_type, "data": b64}}
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
resp = await client.post(
|
||||
f"{base_url}/v1/messages",
|
||||
headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json"},
|
||||
json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": [content_block, {"type": "text", "text": _RECEIPT_PROMPT}]}]},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
text = resp.json()["content"][0]["text"].strip()
|
||||
raw = None
|
||||
|
||||
elif user_row.ai_provider == "openai":
|
||||
base_url = custom_base_url or "https://api.openai.com"
|
||||
model = custom_model or "gpt-4o-mini"
|
||||
if mime_type == "application/pdf" and not custom_base_url:
|
||||
raise HTTPException(status_code=400, detail="PDF parsing is not supported with the OpenAI provider. Use an image format or switch to Anthropic.")
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
resp = await client.post(
|
||||
f"{base_url}/v1/chat/completions",
|
||||
headers={"Authorization": f"Bearer {api_key}", "content-type": "application/json"},
|
||||
json={"model": model, "max_tokens": 512, "messages": [{"role": "user", "content": [
|
||||
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{b64}"}},
|
||||
{"type": "text", "text": _RECEIPT_PROMPT},
|
||||
]}]},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
text = resp.json()["choices"][0]["message"]["content"].strip()
|
||||
if raw:
|
||||
cleaned = _strip_code_fence(raw)
|
||||
try:
|
||||
parsed = json.loads(cleaned)
|
||||
return {
|
||||
"merchant": parsed.get("merchant"),
|
||||
"amount": parsed.get("amount"),
|
||||
"currency": parsed.get("currency"),
|
||||
"date": parsed.get("date"),
|
||||
"description": parsed.get("description"),
|
||||
"category": parsed.get("category"),
|
||||
"raw": raw,
|
||||
"ocr_text": ocr_text,
|
||||
}
|
||||
except json.JSONDecodeError:
|
||||
# AI returned something non-JSON — fall through to rules, keep raw for debug
|
||||
pass
|
||||
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Unknown provider")
|
||||
except (httpx.HTTPStatusError, httpx.RequestError):
|
||||
pass # fall through to rule-based
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise HTTPException(status_code=502, detail=f"AI provider error: {e.response.status_code}")
|
||||
except httpx.RequestError:
|
||||
raise HTTPException(status_code=502, detail="Could not reach AI provider")
|
||||
# Step 3: rule-based fallback (also used when AI is not configured)
|
||||
if ocr_text.strip():
|
||||
return _rule_based_parse(ocr_text)
|
||||
|
||||
if text.startswith("```"):
|
||||
text = text.split("```")[1]
|
||||
if text.startswith("json"):
|
||||
text = text[4:]
|
||||
text = text.strip()
|
||||
|
||||
try:
|
||||
parsed = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
raise HTTPException(status_code=502, detail="AI returned an unexpected response. Try again.")
|
||||
|
||||
return {
|
||||
"merchant": parsed.get("merchant"),
|
||||
"amount": parsed.get("amount"),
|
||||
"currency": parsed.get("currency"),
|
||||
"date": parsed.get("date"),
|
||||
"description": parsed.get("description"),
|
||||
"category": parsed.get("category"),
|
||||
"raw": text,
|
||||
}
|
||||
# Nothing worked
|
||||
if has_ai:
|
||||
raise HTTPException(status_code=400, detail="Could not extract any text from the file. Try a clearer image.")
|
||||
raise HTTPException(status_code=400, detail="No AI configured and OCR extracted no text. Add an API key in Settings → AI or try a clearer image.")
|
||||
|
||||
|
||||
@router.post("/parse-receipt")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue