from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from PIL import Image, ImageOps, ImageEnhance, ImageFilter
import pytesseract, io, re, cv2, numpy as np, math, os, time

app = FastAPI(title="TudoCerto OCR Service", version="1.6.0")
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])

# OCR engines: PaddleOCR is the preferred engine for small thermal/matrix receipts.
# Tesseract remains as fallback so the container still works if Paddle models are unavailable.
OCR_ENGINES = {"paddle": False, "tesseract": True}
PADDLE_OCR = None
try:
    from paddleocr import PaddleOCR
    PADDLE_OCR = PaddleOCR(
        use_angle_cls=True,
        lang=os.getenv("PADDLE_OCR_LANG", "pt"),
        show_log=False,
        det_db_box_thresh=0.35,
        det_db_unclip_ratio=1.8,
        rec_batch_num=8,
        use_gpu=os.getenv("PADDLE_USE_GPU", "false").lower() == "true",
    )
    OCR_ENGINES["paddle"] = True
except Exception as _paddle_error:
    print("paddleocr-unavailable-fallback-tesseract", str(_paddle_error))

MONEY = r"(?:R\$\s*)?\d{1,6}(?:[\.,]\d{2})"
HEADER_HINTS = [
    "desc", "descr", "descrição", "descricao", "produto", "item", "itens", "mercadoria",
    "cod", "cód", "codigo", "código", "ean", "gtin", "qtd", "qt", "quant", "un", "unit", "vl", "vlr", "valor", "total", "tot"
]
END_HINTS = [
    "subtotal", "sub total", "total", "valor total", "troco", "desconto", "acrescimo", "acréscimo",
    "forma pagamento", "pagamento", "cartao", "cartão", "dinheiro", "pix", "credito", "crédito", "debito", "débito",
    "tribut", "imposto", "icms", "pis", "cofins", "cupom", "chave", "protocolo", "autoriz", "consumidor", "cpf", "cnpj", "sat", "nfce", "nfc-e", "danfe", "qr-code", "qrcode", "operador", "caixa", "consulta", "sefaz"
]
MERCHANT_NOISE = ["cnpj", "cpf", "endereco", "endereço", "telefone", "extrato", "cupom", "sat", "nfce", "danfe", "documento", "qrcode", "qr-code"]
UNITS = r"KG|KILO|UN|UND|UNID|PC|PCT|LT|L|G|ML|CX|FD|BDJ|FR|MT|M"

# Correções comuns de OCR em cupom térmico/matricial.
OCR_REPLACEMENTS = {
    "O,": "0,", "o,": "0,", "S,": "5,", "l,": "1,", "I,": "1,", "B,": "8,",
    " R$ ": " ", "RS ": " ", "R5 ": " ", "§": "5", "€": "C", "¢": "c",
    " QT ": " QTD ", " QTE ": " QTD ", " QTD.": " QTD ", " VL ": " VLR ", " VLR.": " VLR ",
}

def br_float(s):
    if not s: return 0.0
    s = re.sub(r"[^0-9,\.]", "", str(s))
    if not s: return 0.0
    if "," in s and "." in s:
        # 1.234,56 -> 1234.56
        s = s.replace(".", "").replace(",", ".")
    elif "," in s:
        s = s.replace(",", ".")
    try: return round(float(s), 3)
    except: return 0.0

def normalize_line(line):
    line = str(line or "")
    for a,b in OCR_REPLACEMENTS.items():
        line = line.replace(a,b)
    line = re.sub(r"[|¦‖]", " ", line)
    line = re.sub(r"[\t]+", " ", line)
    line = re.sub(r"\s+", " ", line).strip()
    # Corrige preços que vieram com espaço: 12 , 90 / 12. 90
    line = re.sub(r"(\d)\s*([\.,])\s*(\d{2})(?!\d)", r"\1\2\3", line)
    return line

def score_text_quality(text: str) -> float:
    if not text: return 0
    lines = [normalize_line(x) for x in text.splitlines() if normalize_line(x)]
    money_count = len(re.findall(MONEY, text))
    alpha = len(re.findall(r"[A-Za-zÀ-ÿ]", text))
    header = max([sum(1 for h in HEADER_HINTS if h in l.lower()) for l in lines] or [0])
    itemish = sum(1 for l in lines if re.search(MONEY,l) and len(re.findall(r"[A-Za-zÀ-ÿ]",l))>=2)
    return money_count*4 + itemish*7 + header*12 + min(alpha/20, 20) + min(len(lines), 30)

def rotate_bound(gray, angle):
    (h, w) = gray.shape[:2]
    cX, cY = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
    cos = abs(M[0, 0]); sin = abs(M[0, 1])
    nW = int((h * sin) + (w * cos)); nH = int((h * cos) + (w * sin))
    M[0, 2] += (nW / 2) - cX; M[1, 2] += (nH / 2) - cY
    return cv2.warpAffine(gray, M, (nW, nH), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

def deskew(gray):
    # Tenta corrigir pequena inclinação. Cupom inclinado derruba muito o OCR.
    try:
        inv = cv2.bitwise_not(gray)
        _, th = cv2.threshold(inv, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        coords = np.column_stack(np.where(th > 0))
        if coords.size == 0: return gray
        angle = cv2.minAreaRect(coords)[-1]
        if angle < -45: angle = -(90 + angle)
        else: angle = -angle
        if abs(angle) > 0.2 and abs(angle) < 8:
            return rotate_bound(gray, angle)
    except Exception:
        pass
    return gray

def crop_receipt(gray):
    # Remove bordas/fundo quando a foto mostra mesa junto com cupom.
    try:
        blur = cv2.GaussianBlur(gray, (5,5), 0)
        edges = cv2.Canny(blur, 50, 150)
        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if not contours: return gray
        h,w = gray.shape[:2]
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
        for c in contours:
            x,y,cw,ch = cv2.boundingRect(c)
            area = cw*ch
            if area > 0.35*w*h and ch > h*0.45 and cw > w*0.25:
                pad = int(min(w,h)*0.015)
                x=max(0,x-pad); y=max(0,y-pad); cw=min(w-x,cw+pad*2); ch=min(h-y,ch+pad*2)
                return gray[y:y+ch, x:x+cw]
    except Exception:
        pass
    return gray

def preprocess_variants(img: Image.Image):
    img = ImageOps.exif_transpose(img).convert("RGB")
    # Orientação: se foto veio de lado, deixa o recibo preferencialmente em retrato.
    if img.width > img.height * 1.35:
        img = img.rotate(90, expand=True)
    # Cupom tem letra pequena: mínimo 3000 px de largura para recibos estreitos.
    w, h = img.size
    target_width = 3200 if w < 2200 else w
    scale = max(1.0, target_width / max(1, w))
    if scale > 1:
        img = img.resize((int(w * scale), int(h * scale)), Image.Resampling.LANCZOS)
    img = ImageEnhance.Sharpness(img).enhance(2.2)
    img = ImageEnhance.Contrast(img).enhance(1.35)
    gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
    gray = crop_receipt(gray)
    gray = deskew(gray)
    # Amplia mais se a região útil ainda estiver pequena.
    h,w = gray.shape[:2]
    if w < 2600:
        gray = cv2.resize(gray, (2600, int(h*(2600/w))), interpolation=cv2.INTER_CUBIC)
    variants = []
    den = cv2.fastNlMeansDenoising(gray, None, 11, 7, 21)
    clahe = cv2.createCLAHE(clipLimit=3.2, tileGridSize=(8,8)).apply(den)
    # Remove sombra e amarelado/térmico: divisão pelo fundo suavizado.
    background = cv2.medianBlur(clahe, 31)
    divided = cv2.divide(clahe, background, scale=255)
    # 1: imagem normalizada em cinza, boa para Tesseract LSTM.
    variants.append(divided)
    # 2: threshold adaptativo fino para térmico.
    variants.append(cv2.adaptiveThreshold(divided,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,41,13))
    # 3: Otsu para impressora matricial/baixo contraste.
    _, otsu = cv2.threshold(divided,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    variants.append(otsu)
    # 4: leve fechamento horizontal para caracteres quebrados e pontos matriciais.
    kernel_h = cv2.getStructuringElement(cv2.MORPH_RECT, (2,1))
    close = cv2.morphologyEx(variants[1], cv2.MORPH_CLOSE, kernel_h, iterations=1)
    variants.append(close)
    # 5: nitidez final via unsharp mask.
    blur = cv2.GaussianBlur(divided, (0,0), 1.0)
    sharp = cv2.addWeighted(divided, 1.7, blur, -0.7, 0)
    variants.append(sharp)
    return [Image.fromarray(v) for v in variants]

def tesseract_configs():
    common = "-l por+eng --oem 1 -c preserve_interword_spaces=1 -c tessedit_char_blacklist=¢§<>[]{}"
    return [
        common + " --psm 6",   # bloco uniforme de texto: melhor para tabela de cupom
        common + " --psm 4",   # coluna única variável
        common + " --psm 11",  # texto esparso para fotos ruins
        common + " --psm 12",  # texto esparso + OSD
    ]

def tsv_lines(image: Image.Image, cfg: str):
    try:
        data = pytesseract.image_to_data(image, config=cfg, output_type=pytesseract.Output.DICT)
    except Exception:
        return []
    rows = {}
    n = len(data.get('text', []))
    for i in range(n):
        txt = normalize_line(data['text'][i])
        if not txt: continue
        try: conf = float(data.get('conf', ['-1'])[i])
        except: conf = -1
        if conf < 25 and not re.search(MONEY, txt):
            continue
        key = (data.get('block_num',[0])[i], data.get('par_num',[0])[i], data.get('line_num',[0])[i])
        rows.setdefault(key, []).append((data['left'][i], txt, conf))
    lines=[]
    for key, words in sorted(rows.items(), key=lambda kv: (kv[0][0], kv[0][1], kv[0][2])):
        words = sorted(words, key=lambda x:x[0])
        line = normalize_line(" ".join(w[1] for w in words))
        if line: lines.append(line)
    return lines


def paddle_lines_from_image(image: Image.Image):
    if not PADDLE_OCR:
        return []
    try:
        rgb = np.array(image.convert("RGB"))
        result = PADDLE_OCR.ocr(rgb, cls=True)
    except Exception as e:
        print("paddle-ocr-error", str(e))
        return []
    rows = []
    for page in result or []:
        for entry in page or []:
            try:
                box, rec = entry[0], entry[1]
                text, conf = rec[0], float(rec[1])
                if not text or conf < 0.35:
                    continue
                ys = [p[1] for p in box]
                xs = [p[0] for p in box]
                rows.append({"x": min(xs), "y": sum(ys)/len(ys), "text": normalize_line(text), "conf": conf})
            except Exception:
                continue
    if not rows:
        return []
    # Agrupa palavras/blocos por linha visual. Em cupom, a ordem espacial importa mais que o texto corrido.
    rows = sorted(rows, key=lambda r: (r["y"], r["x"]))
    grouped = []
    for r in rows:
        placed = False
        for g in grouped:
            if abs(g["y"] - r["y"]) <= 18:
                g["items"].append(r); g["y"] = (g["y"] * (len(g["items"])-1) + r["y"]) / len(g["items"]); placed = True; break
        if not placed:
            grouped.append({"y": r["y"], "items": [r]})
    lines=[]
    for g in sorted(grouped, key=lambda g: g["y"]):
        parts = [x for x in sorted(g["items"], key=lambda r: r["x"]) if x["text"]]
        line = normalize_line(" ".join(x["text"] for x in parts))
        if line:
            lines.append(line)
    return lines

def detect_qr_text(raw: bytes):
    try:
        arr = np.frombuffer(raw, np.uint8)
        img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
        if img is None:
            return None
        detector = cv2.QRCodeDetector()
        data, points, _ = detector.detectAndDecode(img)
        return data or None
    except Exception:
        return None

def extract_nfce_key(text: str):
    digits = re.sub(r"\D", "", text or "")
    m = re.search(r"\d{44}", digits)
    return m.group(0) if m else None

def ocr_text(raw: bytes):
    img = Image.open(io.BytesIO(raw))
    candidates = []
    qr = detect_qr_text(raw)
    if qr:
        candidates.append("QR_CODE_NFCE " + qr)
    pre = preprocess_variants(img)
    # 1) PaddleOCR primeiro: muito melhor para texto pequeno, inclinado e blocos quebrados.
    for processed in pre[:3]:
        rows = paddle_lines_from_image(processed)
        if rows:
            candidates.append("\n".join(rows))
    # 2) Tesseract em várias configurações como fallback/segunda opinião.
    for processed in pre:
        for cfg in tesseract_configs():
            try:
                txt = pytesseract.image_to_string(processed, config=cfg)
                if txt and len(txt.strip()) > 10:
                    candidates.append(txt)
                rows = tsv_lines(processed, cfg)
                if rows:
                    candidates.append("\n".join(rows))
            except Exception:
                pass
    # Ordena por qualidade, preservando as melhores linhas e não apenas a primeira leitura.
    candidates = sorted(candidates, key=score_text_quality, reverse=True)
    seen, out = set(), []
    for t in candidates[:12]:
        for l in t.splitlines():
            nl = normalize_line(l)
            if not nl: continue
            # Normalização para deduplicar sem perder linhas de itens semelhantes.
            key = re.sub(r"\s+", " ", nl.lower())
            if key not in seen:
                seen.add(key); out.append(nl)
    merged = "\n".join(out)
    nfce_key = extract_nfce_key((qr or '') + "\n" + merged)
    if nfce_key and "CHAVE_NFCE_44" not in merged:
        merged = "CHAVE_NFCE_44 " + nfce_key + "\n" + merged
    return merged

def clean_name(name):
    name = normalize_line(name)
    name = re.sub(r"^\s*(?:#\s*)?\d{1,4}\s+", "", str(name))
    name = re.sub(r"\b\d{5,14}\b", " ", name)  # códigos/EAN
    name = re.sub(r"\b(COD|CÓD|CODIGO|CÓDIGO|ITEM|QTD|QT|QUANT|VL|VLR|UNIT|UNID|EAN|GTIN|TOTAL|TOT|CFOP)\b", " ", name, flags=re.I)
    name = re.sub(rf"\b({UNITS})\b", " ", name, flags=re.I)
    # Remove lixo comum sem apagar marca/produto.
    name = re.sub(r"[^A-Za-zÀ-ÿ0-9\s\-\.\/]+", " ", name)
    name = re.sub(r"\s+", " ", name).strip(" -.")
    # Title case leve só se veio tudo minúsculo/maiúsculo.
    if name.isupper() or name.islower():
        name = " ".join([w.capitalize() if len(w)>2 else w.upper() for w in name.split()])
    return name[:120] or "Item do cupom"

def extract_product_code(line):
    raw = str(line or "")
    candidates = re.findall(r"(?<!\d)(\d{3,14})(?!\d)", raw)
    if not candidates:
        return None, None
    money_pos = re.search(MONEY, raw)
    before_money = raw[:money_pos.start()] if money_pos else raw
    before = re.findall(r"(?<!\d)(\d{3,14})(?!\d)", before_money)
    # Prefere GTIN válido quando existir; senão primeiro código antes dos valores.
    pool = before or candidates
    gtins = [c for c in pool if len(c) in [8,12,13,14] and not re.fullmatch(r"0+", c)]
    code = gtins[0] if gtins else pool[0]
    kind = "GTIN" if len(code) in [8, 12, 13, 14] else "INTERNAL"
    return code, kind

def merchant_and_type(text: str):
    raw_lines = [normalize_line(l) for l in text.splitlines() if normalize_line(l)]
    merchant = None
    for line in raw_lines[:18]:
        low = line.lower()
        if any(x in low for x in MERCHANT_NOISE):
            continue
        if re.search(MONEY, line) or re.fullmatch(r"[0-9\s\-\./]+", line):
            continue
        if len(line) >= 3:
            merchant = clean_name(line)
            break
    joined = " ".join(raw_lines).lower()
    account_type = "comércio"
    if any(x in joined for x in ["mercado", "supermercado", "hortifruti", "mambo", "carrefour", "assai", "assaí", "extra", "pão de açúcar", "pao de acucar", "atacadao", "atacadão"]): account_type = "mercado"
    elif any(x in joined for x in ["farmacia", "farmácia", "drogaria", "drogasil", "raia", "panvel", "pague menos", "drog"]): account_type = "farmácia"
    elif any(x in joined for x in ["restaurante", "lanchonete", "padaria", "ifood", "delivery"]): account_type = "alimentação fora de casa"
    elif any(x in joined for x in ["posto", "combustivel", "combustível", "gasolina", "etanol"]): account_type = "combustível"
    return merchant or "Estabelecimento não identificado", account_type

def header_score(line):
    low = line.lower()
    return sum(1 for h in HEADER_HINTS if h in low)

def find_item_window(lines):
    start = 0
    end = len(lines)
    # Encontra cabeçalho com tolerância para OCR quebrado: DESCRICAO, QT, VL, TOTAL etc.
    for i, line in enumerate(lines):
        low = line.lower()
        hits = header_score(line)
        if hits >= 3 or (re.search(r"descr|produto|item", low) and re.search(r"qtd|qt|quant|un|vl|valor|tot", low)):
            start = i + 1
            break
    # Em muitos cupons o cabeçalho não aparece; pula cabeçalho fiscal até primeira linha item-like.
    if start == 0:
        for i, line in enumerate(lines[:25]):
            if re.search(MONEY, line) and len(re.findall(r"[A-Za-zÀ-ÿ]", line)) >= 2 and not any(h in line.lower() for h in END_HINTS):
                start = max(0, i-1); break
    for j in range(start, len(lines)):
        low = lines[j].lower()
        if any(h in low for h in ["forma pagamento", "pagamento", "tribut", "chave de acesso", "consulte pela chave", "protocolo", "qrcode", "qr-code"]):
            end = j; break
        # TOTAL isolado costuma marcar fim; mas não para linhas de item com total.
        if re.search(r"\b(total|valor total|subtotal|sub total)\b", low) and re.search(MONEY, lines[j]) and len(re.findall(r"[A-Za-zÀ-ÿ]", lines[j])) < 14:
            end = j; break
    return lines[start:end]

def is_item_line(line: str):
    low = line.lower()
    if len(line) < 4: return False
    if any(w in low for w in ["cnpj", "cpf", "chave", "protocolo", "autoriz", "tribut", "operador", "caixa", "consumidor"]): return False
    if not re.search(MONEY, line): return False
    if header_score(line) >= 3: return False
    letters = len(re.findall(r"[A-Za-zÀ-ÿ]", line))
    return letters >= 2

def merge_wrapped_lines(lines):
    merged = []
    pending = ""
    for line in lines:
        line = normalize_line(line)
        if not line: continue
        if re.search(MONEY, line):
            merged.append(normalize_line((pending + " " + line).strip()))
            pending = ""
        else:
            low = line.lower()
            if len(line) > 2 and not any(x in low for x in END_HINTS):
                # Mantém descrição quebrada antes da linha com números.
                pending = normalize_line((pending + " " + line).strip())[:180]
    if pending and len(pending) > 4:
        merged.append(pending)
    return merged

def parse_item_line(line):
    line = normalize_line(line)
    product_code, code_kind = extract_product_code(line)
    values = re.findall(MONEY, line)
    if not values: return None
    total = br_float(values[-1])
    if total <= 0 or total > 99999: return None
    last_pos = line.rfind(values[-1])
    left = line[: last_pos].strip()
    qty = 1.0
    unit = total
    name = left
    # Formatos comuns de cupom/NFC-e:
    # COD DESCRICAO 2 UN X 4,99 9,98
    # COD DESCRICAO 0,350 KG 14,90 5,22
    # COD DESCRICAO 1 3,49 3,49
    m = re.search(r"(\d+(?:[\.,]\d{1,3})?)\s*(?:" + UNITS + r")?\s*(?:x|X|\*)\s*(" + MONEY + r")", left, flags=re.I)
    if m:
        qty = br_float(m.group(1)); unit = br_float(m.group(2))
        name = left[:m.start()] + " " + left[m.end():]
    else:
        qm = re.search(r"(\d+(?:[\.,]\d{1,3})?)\s*(" + UNITS + r")\b", left, flags=re.I)
        if qm:
            qty = br_float(qm.group(1))
        if len(values) >= 2:
            unit = br_float(values[-2])
            # Se quantidade não foi explícita, tenta inferir por total/unitário.
            if unit > 0 and unit <= max(total, unit):
                inferred = round(total / unit, 3)
                if 0.05 <= inferred <= 999:
                    # Usa inferência se aproximar de inteiro ou quantidade típica de kg.
                    if abs(round(inferred) - inferred) < 0.04 or inferred < 10:
                        qty = inferred
        else:
            unit = round(total / qty, 2) if qty else total
        # Remove preços e bloco de quantidade do nome.
        name = re.sub(MONEY, " ", left)
        if qm:
            name = name[:qm.start()] + " " + name[qm.end():]
    name = clean_name(name)
    # Heurística: se nome ficou muito quebrado, pega texto entre código e primeira quantidade/preço.
    if len(name) < 3:
        tmp = left
        if product_code:
            tmp = re.sub(r"\b" + re.escape(product_code) + r"\b", " ", tmp, count=1)
        tmp = re.split(r"\b\d+(?:[\.,]\d{1,3})?\s*(?:"+UNITS+r")\b|"+MONEY, tmp, flags=re.I)[0]
        name = clean_name(tmp)
    if qty <= 0: qty = 1
    calc = round(qty * unit, 2)
    if unit <= 0 or abs(calc - total) > 0.20:
        unit = round(total / qty, 2) if qty else total
    if len(name) < 2 or re.fullmatch(r"[0-9\s\-\.]+", name): return None
    confidence = 0.88
    if not product_code: confidence -= 0.08
    if abs(round(qty*unit,2)-round(total,2)) > 0.15: confidence -= 0.12
    return {"name": name, "quantity": round(qty,3), "unitPrice": round(unit,2), "total": round(total,2), "productCode": product_code, "codeKind": code_kind, "sourceLine": line, "ocrConfidence": round(max(0.35, confidence),2)}

def parse_items(text: str):
    raw_lines = [normalize_line(l) for l in text.splitlines() if normalize_line(l)]
    window = find_item_window(raw_lines)
    candidate_lines = merge_wrapped_lines(window)
    if len(candidate_lines) < 2:
        candidate_lines = merge_wrapped_lines(raw_lines)
    items = []
    for line in candidate_lines:
        if not is_item_line(line):
            continue
        parsed = parse_item_line(line)
        if parsed:
            items.append(parsed)
    # Segunda passada: linhas sem preço, mas próximas a linhas numéricas podem conter descrição. Já foram mescladas; dedup final.
    dedup, seen = [], set()
    for it in items:
        key = (re.sub(r"[^a-z0-9]", "", it["name"].lower())[:28], round(it["quantity"],3), round(it["total"],2))
        if key not in seen:
            seen.add(key); dedup.append(it)
    return dedup

def parse_price_label(text: str):
    lines = [normalize_line(l) for l in text.splitlines() if normalize_line(l)]
    joined = "\n".join(lines)
    values = [(br_float(v), v) for v in re.findall(MONEY, joined)]
    price = max([v[0] for v in values], default=0.0)
    product_candidates = []
    for line in lines:
        low = line.lower()
        cleaned = clean_name(re.sub(MONEY, " ", line)) if any(x in low for x in ["r$", "kg", "un", "cada", "oferta", "valid", "preco", "preço", "por"]) else clean_name(line)
        if len(cleaned) >= 3 and not re.fullmatch(r"[0-9\s\-\.]+", cleaned) and "valid" not in low:
            product_candidates.append(cleaned)
    product = product_candidates[0] if product_candidates else "Produto extraído da etiqueta"
    warnings = []
    if price <= 0:
        warnings.append("Não foi possível identificar o preço com segurança. Edite manualmente o item incluído.")
    product_code, code_kind = extract_product_code(joined)
    return {"product": product[:100], "quantity": 1, "price": price, "total": round(price, 2), "productCode": product_code, "codeKind": code_kind, "rawText": joined[:12000], "confidence": 0.86 if price > 0 else 0.35, "warnings": warnings}

@app.get("/health")
def health(): return {"status":"ok", "version":"1.6.0", "engines": OCR_ENGINES}

@app.post("/ocr/receipt")
async def receipt(file: UploadFile = File(...)):
    raw = await file.read()
    text = ocr_text(raw)
    items = parse_items(text)
    total = round(sum(i["total"] for i in items), 2)
    merchant, account_type = merchant_and_type(text)
    warnings = []
    if len(items) < 3:
        warnings.append("Poucos itens foram extraídos. Para cupom térmico/matricial, fotografe mais perto, com boa luz, sem sombra, em superfície plana e divida o cupom em partes se ele for grande.")
    if not any(header_score(l) >= 3 for l in text.splitlines()):
        warnings.append("Cabeçalho de colunas não foi encontrado com segurança; o sistema aplicou extração por linhas com valores. Revise os itens antes de salvar.")
    # Aviso prático para evitar falso positivo de baixa qualidade.
    if score_text_quality(text) < 35:
        warnings.append("A imagem parece estar com baixa nitidez/contraste para OCR. Refaça a foto alinhada, ocupando toda a tela e sem inclinação.")
    return {"merchant": merchant, "accountType": account_type, "descriptionSuggestion": f"Conta de {account_type} - {merchant}", "items": items, "total": total, "rawText": text[:24000], "warnings": warnings, "parser":"paddleocr-opencv-thermal-receipt-v1.6.0"}

@app.post("/ocr/price-label")
async def price_label(file: UploadFile = File(...)):
    raw = await file.read()
    text = ocr_text(raw)
    return parse_price_label(text)
