import os

# ========== 반드시 import paddleocr 전에 설정 ==========
os.environ["PADDLE_HOME"] = "G:/AI_Study/ocr/paddle_cache"
os.environ["PADDLEX_HOME"] = "G:/AI_Study/ocr/paddlex_cache"
os.environ["PADDLE_PDX_CACHE_HOME"] = "G:/AI_Study/ocr/paddlex_cache"
os.environ["PPOCR_HOME"] = "G:/AI_Study/ocr/ppocr_cache"
os.environ["HUGGINGFACE_HUB_CACHE"] = "G:/AI_Study/ocr/huggingface_cache"
os.environ["TRANSFORMERS_CACHE"] = "G:/AI_Study/ocr/transformers_cache"
os.environ["HF_HOME"] = "G:/AI_Study/ocr/huggingface_cache"
os.environ["XDG_CACHE_HOME"] = "G:/AI_Study/ocr/cache"
os.environ["PPDET_HOME"] = "G:/AI_Study/ocr/ppdet_cache"
os.environ["PPLITE_HOME"] = "G:/AI_Study/ocr/pplite_cache"
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "1"
os.environ["FLAGS_use_pir_api"] = "0"

import paddle

paddle.disable_static()

# 캐시 디렉토리 생성
cache_dirs = [
    "G:/AI_Study/ocr/paddle_cache",
    "G:/AI_Study/ocr/paddlex_cache",
    "G:/AI_Study/ocr/ppocr_cache",
    "G:/AI_Study/ocr/huggingface_cache",
    "G:/AI_Study/ocr/transformers_cache",
    "G:/AI_Study/ocr/cache",
    "G:/AI_Study/ocr/ppdet_cache",
    "G:/AI_Study/ocr/pplite_cache",
]
for d in cache_dirs:
    os.makedirs(d, exist_ok=True)

print("=" * 60)
print("[환경 변수 확인]")
print(f"  PADDLE_HOME: {os.environ.get('PADDLE_HOME')}")
print(f"  PADDLEX_HOME: {os.environ.get('PADDLEX_HOME')}")
print(f"  PADDLE_PDX_CACHE_HOME: {os.environ.get('PADDLE_PDX_CACHE_HOME')}")
print(f"  HF_HOME: {os.environ.get('HF_HOME')}")
print("=" * 60)

import re
import json
import numpy as np
import tkinter as tk
from tkinter import filedialog, scrolledtext, messagebox
from threading import Thread
from PIL import Image, ImageTk
from datetime import datetime
from paddleocr import PaddleOCRVL

print("PaddleOCR-VL 초기화 중...")


class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if hasattr(obj, "__class__") and obj.__class__.__name__ == "PaddleOCRVLBlock":
            return {
                "block_label": getattr(obj, "block_label", None),
                "block_content": getattr(obj, "block_content", None),
                "block_bbox": getattr(obj, "block_bbox", None),
            }
        if hasattr(obj, "__class__") and obj.__class__.__name__ == "Image":
            return {
                "type": "Image",
                "size": getattr(obj, "size", None),
                "mode": getattr(obj, "mode", None),
            }
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        return super(NumpyEncoder, self).default(obj)


class OCRProcessor:
    def __init__(self):
        self.pipeline = None
        self._init_ocr()

    def _init_ocr(self):
        try:
            print("[OCR] PaddleOCR-VL 초기화 중...")
            print("문서 분석 모델(v1)을 로드합니다.")
            self.pipeline = PaddleOCRVL(pipeline_version="v1")
            print("[OCR] 초기화 완료! (PaddleOCR-VL-0.9B)")
            return True
        except Exception as e:
            print(f"[OCR] 초기화 실패: {e}")
            import traceback

            traceback.print_exc()
            return False

    def pdf_to_images(self, pdf_path, dpi=150):
        """PDF 파일을 이미지 리스트로 변환"""
        import fitz

        images = []
        try:
            doc = fitz.open(pdf_path)
            for page_num in range(len(doc)):
                page = doc.load_page(page_num)
                pix = page.get_pixmap(dpi=dpi)
                img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                images.append(img)
            doc.close()
            print(f"[PDF] 변환 완료: {len(images)}페이지")
            return images
        except Exception as e:
            print(f"[PDF 변환 오류] {e}")
            return None

    def extract_receipt_json(self, file_path, progress_callback=None):
        if not self.pipeline:
            return None, "OCR 엔진이 초기화되지 않았습니다."
        if not os.path.exists(file_path):
            return None, f"파일 없음: {file_path}"

        try:
            print(f"\n[OCR] 분석 시작: {os.path.basename(file_path)}")

            if file_path.lower().endswith(".pdf"):
                print("[PDF] PDF 파일 감지, 페이지 단위로 순차 처리 중...")

                import fitz
                import gc
                from datetime import datetime

                doc = fitz.open(file_path)
                total_pages = len(doc)

                output_dir = os.path.join(os.path.dirname(__file__), "ocr_results")
                os.makedirs(output_dir, exist_ok=True)

                base_name = os.path.splitext(os.path.basename(file_path))[0]
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

                summary = {
                    "source_file": os.path.basename(file_path),
                    "total_pages": total_pages,
                    "processed_date": timestamp,
                    "pages": [],
                }

                all_items = []

                for page_num in range(total_pages):
                    if progress_callback:
                        try:
                            import psutil

                            process = psutil.Process(os.getpid())
                            current_memory = process.memory_info().rss / 1024 / 1024
                            progress_callback(page_num + 1, total_pages, current_memory)
                        except Exception:
                            pass

                    print(f"[OCR] 페이지 {page_num + 1}/{total_pages} 처리 중...")

                    page = doc.load_page(page_num)
                    pix = page.get_pixmap(dpi=150)
                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                    del pix

                    temp_file = os.path.join(output_dir, f"temp_page_{page_num}.png")
                    img.save(temp_file)
                    img.close()
                    del img

                    try:
                        output = self.pipeline.predict(temp_file)
                    except Exception as e:
                        print(f"페이지 {page_num + 1} OCR 실패: {e}")
                        continue

                    for res in output:
                        if hasattr(res, "save_to_json"):
                            try:
                                res.save_to_json(save_path=output_dir)
                            except Exception:
                                pass

                    res_file = None
                    for f in os.listdir(output_dir):
                        if f.endswith("_res.json") and (
                            "temp_page" in f or base_name in f
                        ):
                            res_file = os.path.join(output_dir, f)
                            break

                    page_text = ""
                    page_parsing_res = []

                    if res_file and os.path.exists(res_file):
                        with open(res_file, "r", encoding="utf-8") as f:
                            saved_data = json.load(f)

                        if "parsing_res_list" in saved_data:
                            for block in saved_data["parsing_res_list"]:
                                if isinstance(block, dict):
                                    block["page_num"] = page_num + 1
                                    page_parsing_res.append(block)
                                    content = block.get("block_content", "")
                                    if content:
                                        if page_text:
                                            page_text += "\n"
                                        page_text += content

                    page_text = page_text.replace("\\n", "\n").replace("\\t", " ")

                    extracted_result = {
                        "source_file": os.path.basename(file_path),
                        "page_num": page_num + 1,
                        "total_pages": total_pages,
                        "processed_date": timestamp,
                        "text": page_text,
                        "parsing_res_list": page_parsing_res,
                    }

                    extracted_json_path = os.path.join(
                        output_dir, f"{base_name}_page_{page_num+1}_extracted.json"
                    )
                    with open(extracted_json_path, "w", encoding="utf-8") as f:
                        json.dump(extracted_result, f, ensure_ascii=False, indent=2)

                    for block in page_parsing_res:
                        if block.get("block_label") == "table":
                            table_content = block.get("block_content", "")
                            if table_content:
                                items = self._parse_table_content(table_content)
                                for item in items:
                                    item["page"] = page_num + 1
                                    all_items.append(item)
                        elif block.get("block_label") in ["text", "content"]:
                            content = block.get("block_content", "")
                            if content:
                                items = self._parse_text_to_items(content)
                                for item in items:
                                    item["page"] = page_num + 1
                                    all_items.append(item)

                    summary["pages"].append(
                        {
                            "page_num": page_num + 1,
                            "status": "success",
                            "extracted_file": extracted_json_path,
                            "text_length": len(page_text),
                            "blocks_count": len(page_parsing_res),
                            "items_count": len(
                                [
                                    i
                                    for i in page_parsing_res
                                    if i.get("block_label")
                                    in ["table", "text", "content"]
                                ]
                            ),
                        }
                    )

                    if os.path.exists(temp_file):
                        try:
                            os.remove(temp_file)
                        except Exception:
                            pass
                    if res_file and os.path.exists(res_file):
                        try:
                            os.remove(res_file)
                        except Exception:
                            pass

                    del output
                    gc.collect()

                doc.close()

                summary["total_items_extracted"] = len(all_items)
                summary["items"] = all_items

                summary_path = os.path.join(
                    output_dir, f"{base_name}_{timestamp}_summary.json"
                )
                with open(summary_path, "w", encoding="utf-8") as f:
                    json.dump(summary, f, ensure_ascii=False, indent=2)

                print(
                    f"\n[OCR] 분석 완료! 총 {total_pages}페이지, 추출 품목: {len(all_items)}개"
                )
                return summary, None

            else:
                print("[이미지] 이미지 파일 처리 중...")
                output_dir = os.path.join(os.path.dirname(__file__), "ocr_results")
                os.makedirs(output_dir, exist_ok=True)
                output = self.pipeline.predict(file_path)

                result_data = None
                raw_text = ""
                for res in output:
                    if hasattr(res, "print"):
                        res.print()
                    if hasattr(res, "res") and isinstance(res.res, dict):
                        result_data = res.res
                        if "text" in res.res:
                            raw_text = res.res["text"]
                    if hasattr(res, "result"):
                        result_data = res.result
                    if hasattr(res, "save_to_json"):
                        try:
                            res.save_to_json(save_path=output_dir)
                        except Exception:
                            pass

                layout_boxes_count = 0
                if result_data and isinstance(result_data, dict):
                    layout_res = result_data.get("layout_det_res", {})
                    boxes = layout_res.get("boxes", [])
                    layout_boxes_count = len(boxes)
                    parsing_list = result_data.get("parsing_res_list", [])

                    if layout_boxes_count == 0 and len(parsing_list) == 0 and raw_text:
                        result_data["parsing_res_list"] = [
                            {
                                "block_label": "text",
                                "block_content": raw_text,
                                "block_bbox": [0, 0, 1000, 1333],
                            }
                        ]
                        if "layout_det_res" not in result_data:
                            result_data["layout_det_res"] = {}
                        result_data["layout_det_res"]["boxes"] = [[0, 0, 1000, 1333]]

                if result_data is None:
                    result_data = {"text": str(output), "raw_text": raw_text}

                result_data["total_pages"] = 1
                print(f"[OCR] 이미지 분석 완료! (감지된 블록: {layout_boxes_count})")
                return result_data, None

        except Exception as e:
            print(f"[OCR 오류] {e}")
            import traceback

            traceback.print_exc()
            return None, str(e)

    def _parse_parsing_res_list(self, parsing_res_list):
        """parsing_res_list에서 아이템 추출"""
        items = []

        for block in parsing_res_list:
            if isinstance(block, dict):
                label = block.get("block_label")
                content = block.get("block_content")
            else:
                continue

            if label == "table" and content:
                items.extend(self._parse_table_content(content))
            elif label == "text" and content:
                items.extend(self._parse_text_to_items(content))

        return items

    def _get_memory_usage(self):
        """현재 메모리 사용량 확인 (MB)"""
        import psutil
        import os

        process = psutil.Process(os.getpid())
        return process.memory_info().rss / 1024 / 1024

    def pdf_to_images_stream(self, pdf_path, dpi=150, start_page=0, end_page=None):
        """제너레이터로 페이지를 하나씩 반환 (메모리 효율적)"""
        import fitz

        doc = fitz.open(pdf_path)

        if end_page is None:
            end_page = len(doc)

        for page_num in range(start_page, min(end_page, len(doc))):
            page = doc.load_page(page_num)
            pix = page.get_pixmap(dpi=dpi)
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

            # 페이지 객체 메모리 해제
            del pix
            del page

            yield img, page_num + 1

            # 이미지 메모리 해제는 호출자가 처리
            img.close()
            del img

            import gc

            gc.collect()

        doc.close()

    def parse_receipt_items_from_json(self, json_data):
        if not json_data:
            return []
        items = []
        if isinstance(json_data, dict):
            if "parsing_res_list" in json_data:
                for block in json_data["parsing_res_list"]:
                    if hasattr(block, "block_label"):
                        label = block.block_label
                        content = block.block_content
                    elif isinstance(block, dict):
                        label = block.get("block_label")
                        content = block.get("block_content")
                    else:
                        continue
                    if label == "table" and content:
                        items.extend(self._parse_table_content(content))
            elif "markdown" in json_data:
                items = self._parse_markdown_table(json_data["markdown"])
            elif "table" in json_data:
                items = self._parse_table_to_items(json_data["table"])
            elif "res" in json_data:
                items = self._parse_text_to_items(str(json_data["res"]))
        return items

    def _parse_table_content(self, table_text):
        items = []
        if not table_text:
            return items
        lines = table_text.strip().split("\n")
        rows = []
        for line in lines:
            if "|" not in line:
                continue
            cells = [c.strip() for c in line.split("|")[1:-1]]
            if len(cells) >= 4:
                if cells[0] in ["상품명", "품명", "Product", "Item"]:
                    continue
                product = cells[0]
                clean_product = product.replace(",", "")
                is_barcode = (product.isdigit() and len(product) >= 12) or (
                    clean_product.isdigit() and len(clean_product) >= 12
                )
                numbers = []
                for cell in cells[1:4]:
                    nums = re.findall(r"[\d,]+", cell)
                    for n in nums:
                        try:
                            num = int(n.replace(",", ""))
                            if 100 <= num <= 1000000:
                                numbers.append(num)
                        except:
                            pass
                rows.append(
                    {"product": product, "is_barcode": is_barcode, "numbers": numbers}
                )

        for row in rows:
            if row["is_barcode"]:
                continue
            has_korean = any("\uac00" <= c <= "\ud7a3" for c in row["product"])
            if not has_korean:
                continue
            numbers = row["numbers"]
            if len(numbers) >= 2:
                total_price = numbers[-1]
                if len(numbers) >= 3:
                    quantity = numbers[0] if numbers[0] < 100 else 1
                    unit_price = numbers[1]
                else:
                    unit_price = numbers[0]
                    quantity = total_price // unit_price if unit_price > 0 else 1
                items.append(
                    {
                        "product": row["product"],
                        "quantity": quantity,
                        "unit_price": unit_price,
                        "total_price": total_price,
                    }
                )
        return items

    def _parse_markdown_table(self, markdown_text):
        items = []
        if not markdown_text:
            return items
        lines = markdown_text.strip().split("\n")
        if len(lines) < 3:
            return items
        for line in lines:
            if re.match(r"[\|\s\-:]+$", line):
                continue
            if "|" not in line:
                continue
            cells = [c.strip() for c in line.split("|")[1:-1]]
            if len(cells) >= 4:
                if cells[0] in ["상품명", "품명", "Product", "Item", "제품명"]:
                    continue
                try:
                    items.append(
                        {
                            "product": cells[0],
                            "quantity": self._to_int(cells[1]),
                            "unit_price": self._to_int(cells[2]),
                            "total_price": self._to_int(cells[3]),
                        }
                    )
                except:
                    continue
        return items

    def _parse_table_to_items(self, table_data):
        items = []
        if isinstance(table_data, list):
            for row in table_data:
                if isinstance(row, dict):
                    item = {
                        "product": row.get(
                            "product", row.get("name", row.get("item", ""))
                        ),
                        "quantity": self._to_int(row.get("quantity", 1)),
                        "unit_price": self._to_int(
                            row.get("unit_price", row.get("price", 0))
                        ),
                        "total_price": self._to_int(
                            row.get("total_price", row.get("amount", 0))
                        ),
                    }
                    if item["product"]:
                        items.append(item)
        return items

    def _parse_text_to_items(self, text):
        items = []
        if not text:
            return items
        lines = text.split("\n")
        for line in lines:
            if re.search(r"[가-힣]", line) and re.search(r"\d", line):
                numbers = re.findall(r"[\d,]+", line)
                numbers = [
                    int(n.replace(",", ""))
                    for n in numbers
                    if n.replace(",", "").isdigit()
                ]
                if len(numbers) >= 2:
                    product_match = re.search(r"([가-힣\*\/]+[가-힣\*\/\d\w]*)", line)
                    product = product_match.group(1) if product_match else line[:20]
                    if len(str(numbers[0])) < 12:
                        items.append(
                            {
                                "product": product,
                                "quantity": 1,
                                "unit_price": numbers[0],
                                "total_price": numbers[-1],
                            }
                        )
        return items

    def _to_int(self, value):
        if isinstance(value, int):
            return value
        if isinstance(value, float):
            return int(value)
        if isinstance(value, str):
            clean = re.sub(r"[^0-9]", "", value)
            return int(clean) if clean else 0
        return 0


class OCRGuiApp:
    def __init__(self, root):
        self.root = root
        self.root.title("📷 PaddleOCR-VL 영수증 분석기")
        self.root.geometry("1200x800")
        self.root.configure(bg="#1a1a2e")

        self.current_image_path = None
        self.ocr_results = []
        self.receipt_items = []
        self.raw_json = None
        self._current_photo = None
        self._processing_lock = False
        self.page_range_var = tk.StringVar(value="all")
        self.page_start_var = tk.StringVar(value="1")
        self.page_end_var = tk.StringVar(value="")

        self.output_dir = os.path.join(os.path.dirname(__file__), "ocr_results")
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        self.setup_gui()
        self.root.after(100, self.init_ocr_engine)
        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

    def init_ocr_engine(self):
        def _init():
            self.ocr = OCRProcessor()
            if self.ocr.pipeline:
                self.root.after(
                    0,
                    lambda: self.status.config(
                        text="✅ OCR 엔진 준비 완료 (PaddleOCR-VL-0.9B)", fg="#27ae60"
                    ),
                )
            else:
                self.root.after(
                    0,
                    lambda: self.status.config(
                        text="❌ OCR 엔진 초기화 실패", fg="#e74c3c"
                    ),
                )

        Thread(target=_init, daemon=True).start()

    def setup_gui(self):
        from tkinter import ttk

        self.root.configure(bg="#e9ecef")
        # 메인 프레임
        self.main = tk.Frame(self.root, bg="#e9ecef")
        self.main.pack(fill=tk.BOTH, expand=True, padx=20, pady=(20, 10))
        # 상단 제목
        self.title_frame = tk.Frame(self.main, bg="#e9ecef")
        self.title_frame.pack(fill=tk.X, pady=(0, 15))
        self.title_label = tk.Label(
            self.title_frame,
            text="OCR 문서 분석기",
            font=("Malgun Gothic", 16, "bold"),
            bg="#e9ecef",
            fg="#2c3e50",
        )
        self.title_label.pack()
        # 3컬럼 컨테이너
        self.columns = tk.Frame(self.main, bg="#e9ecef")
        self.columns.pack(fill=tk.BOTH, expand=True)
        self.columns.grid_columnconfigure(0, weight=2)
        self.columns.grid_columnconfigure(1, weight=4)
        self.columns.grid_columnconfigure(2, weight=3)
        # 왼쪽: 이미지
        self.left_card = tk.Frame(self.columns, bg="#ffffff", relief=tk.RAISED, bd=1)
        self.left_card.grid(row=0, column=0, sticky="nsew", padx=(0, 5))
        self.left_label = tk.Label(
            self.left_card,
            text="입력 이미지",
            font=("Malgun Gothic", 12, "bold"),
            bg="#ffffff",
            fg="#2c3e50",
        )
        self.left_label.pack(anchor=tk.W, padx=15, pady=(15, 8))
        self.canvas = tk.Canvas(
            self.left_card,
            bg="#f8f9fa",
            highlightthickness=1,
            highlightbackground="#dee2e6",
        )
        self.canvas.pack(fill=tk.BOTH, expand=True, padx=15, pady=(0, 10))
        self.canvas.create_text(
            180,
            150,
            text="이미지 또는 PDF를 선택하세요",
            fill="#adb5bd",
            font=("Malgun Gothic", 11),
        )
        # 중간: 품목
        self.center_card = tk.Frame(self.columns, bg="#ffffff", relief=tk.RAISED, bd=1)
        self.center_card.grid(row=0, column=1, sticky="nsew", padx=5)
        self.center_label = tk.Label(
            self.center_card,
            text="추출된 품목",
            font=("Malgun Gothic", 12, "bold"),
            bg="#ffffff",
            fg="#2c3e50",
        )
        self.center_label.pack(anchor=tk.W, padx=15, pady=(15, 8))
        self.items_area = scrolledtext.ScrolledText(
            self.center_card,
            font=("Malgun Gothic", 10),
            bg="#f8f9fa",
            fg="#2c3e50",
            wrap=tk.WORD,
            bd=0,
            highlightthickness=1,
            highlightbackground="#dee2e6",
        )
        self.items_area.pack(fill=tk.BOTH, expand=True, padx=15, pady=(0, 15))
        # 오른쪽: JSON
        self.right_card = tk.Frame(self.columns, bg="#ffffff", relief=tk.RAISED, bd=1)
        self.right_card.grid(row=0, column=2, sticky="nsew", padx=(5, 0))
        self.right_label = tk.Label(
            self.right_card,
            text="JSON 결과",
            font=("Malgun Gothic", 12, "bold"),
            bg="#ffffff",
            fg="#2c3e50",
        )
        self.right_label.pack(anchor=tk.W, padx=15, pady=(15, 8))
        self.json_area = scrolledtext.ScrolledText(
            self.right_card,
            font=("Consolas", 9),
            bg="#f8f9fa",
            fg="#2c3e50",
            wrap=tk.WORD,
            bd=0,
            highlightthickness=1,
            highlightbackground="#dee2e6",
        )
        self.json_area.pack(fill=tk.BOTH, expand=True, padx=15, pady=(0, 15))
        # 하단 영역
        self.bottom_area = tk.Frame(self.main, bg="#e9ecef")
        self.bottom_area.pack(fill=tk.X, side=tk.BOTTOM, pady=(10, 0))
        self.btn_frame = tk.Frame(self.bottom_area, bg="#e9ecef")
        self.btn_frame.pack(fill=tk.X, pady=(0, 10))
        self.btn_style = {
            "font": ("Malgun Gothic", 10, "bold"),
            "fg": "#ffffff",
            "padx": 15,
            "pady": 8,
            "bd": 0,
            "cursor": "hand2",
            "relief": tk.FLAT,
        }
        self.btn_select = tk.Button(
            self.btn_frame,
            text="파일 선택",
            command=self.select_image,
            bg="#3498db",
            width=12,
            **self.btn_style,
        )
        self.btn_select.pack(side=tk.LEFT, padx=5)
        self.btn_ocr = tk.Button(
            self.btn_frame,
            text="텍스트 추출하기",
            command=self.run_ocr,
            bg="#2c3e50",
            width=15,
            **self.btn_style,
        )
        self.btn_ocr.pack(side=tk.LEFT, padx=5)
        self.btn_save = tk.Button(
            self.btn_frame,
            text="JSON 저장",
            command=self.save_json,
            bg="#e67e22",
            width=12,
            **self.btn_style,
        )
        self.btn_save.pack(side=tk.LEFT, padx=5)
        # 페이지 범위 설정
        self.page_frame = tk.Frame(self.btn_frame, bg="#e9ecef")
        self.page_frame.pack(side=tk.LEFT, padx=10)
        self.page_label = tk.Label(
            self.page_frame, text="페이지:", bg="#e9ecef", font=("Malgun Gothic", 9)
        )
        self.page_label.pack(side=tk.LEFT)
        self.radio_all = tk.Radiobutton(
            self.page_frame,
            text="전체",
            variable=self.page_range_var,
            value="all",
            bg="#e9ecef",
        )
        self.radio_all.pack(side=tk.LEFT, padx=2)
        self.radio_range = tk.Radiobutton(
            self.page_frame,
            text="범위",
            variable=self.page_range_var,
            value="range",
            bg="#e9ecef",
        )
        self.radio_range.pack(side=tk.LEFT, padx=2)
        self.entry_start = tk.Entry(
            self.page_frame, textvariable=self.page_start_var, width=3
        )
        self.entry_start.pack(side=tk.LEFT, padx=2)
        self.tilde_label = tk.Label(self.page_frame, text="~", bg="#e9ecef")
        self.tilde_label.pack(side=tk.LEFT)
        self.entry_end = tk.Entry(
            self.page_frame, textvariable=self.page_end_var, width=3
        )
        self.entry_end.pack(side=tk.LEFT, padx=2)
        # 호버 효과
        for btn, color in [
            (self.btn_select, "#2980b9"),
            (self.btn_ocr, "#1a252f"),
            (self.btn_save, "#d35400"),
        ]:
            btn.bind("<Enter>", lambda e, c=color: e.widget.config(bg=c))
            btn.bind("<Leave>", lambda e, orig=btn.cget("bg"): e.widget.config(bg=orig))
        # 진행률 표시줄
        self.progress_frame = tk.Frame(self.bottom_area, bg="#e9ecef", height=40)
        self.progress_frame.pack(fill=tk.X, pady=(5, 5))
        self.progress_frame.pack_propagate(False)
        self.progress_label = tk.Label(
            self.progress_frame,
            text="준비됨",
            font=("Malgun Gothic", 9),
            bg="#e9ecef",
            fg="#2c3e50",
            width=20,
        )
        self.progress_label.pack(side=tk.LEFT, padx=(0, 10))
        self.progress_bar = ttk.Progressbar(
            self.progress_frame, mode="determinate", length=400
        )
        self.progress_bar.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 10))
        self.progress_percent = tk.Label(
            self.progress_frame,
            text="0%",
            font=("Malgun Gothic", 9, "bold"),
            bg="#e9ecef",
            fg="#2c3e50",
            width=6,
        )
        self.progress_percent.pack(side=tk.LEFT)
        # 상태바
        self.status = tk.Label(
            self.root,
            text="OCR 엔진 초기화 중...",
            font=("Malgun Gothic", 9),
            bg="#dee2e6",
            fg="#2c3e50",
            anchor=tk.W,
            padx=20,
            pady=8,
        )
        self.status.pack(side=tk.BOTTOM, fill=tk.X)
        self.root.update_idletasks()
        self.root.minsize(1100, 700)

    def select_image(self):
        file_path = filedialog.askopenfilename(
            title="이미지 또는 PDF 선택",
            filetypes=[("이미지/PDF 파일", "*.jpg *.jpeg *.png *.bmp *.pdf")],
        )
        if not file_path:
            return

        self.current_image_path = file_path
        self.show_preview(file_path)
        self.items_area.delete(1.0, tk.END)
        self.json_area.delete(1.0, tk.END)
        self.ocr_results = []
        self.receipt_items = []
        self.raw_json = None
        self.status.config(
            text=f"📷 파일 로드: {os.path.basename(file_path)}", fg="#3498db"
        )

    def show_preview(self, path):
        try:
            self.canvas.delete("all")
            if hasattr(self, "_current_photo"):
                del self._current_photo

            # PDF 미리보기 (첫 페이지)
            if path.lower().endswith(".pdf"):
                import fitz

                doc = fitz.open(path)
                if len(doc) > 0:
                    page = doc.load_page(0)
                    pix = page.get_pixmap(dpi=100)
                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                    img.thumbnail((350, 400), Image.Resampling.LANCZOS)
                    photo = ImageTk.PhotoImage(img)
                    self._current_photo = photo
                    self.canvas.create_image(175, 200, anchor=tk.CENTER, image=photo)
                doc.close()
            else:
                img = Image.open(path)
                img.thumbnail((350, 400), Image.Resampling.LANCZOS)
                photo = ImageTk.PhotoImage(img)
                self._current_photo = photo
                self.canvas.create_image(175, 200, anchor=tk.CENTER, image=photo)
                img.close()
        except Exception as e:
            self.status.config(text=f"미리보기 오류: {e}", fg="#e74c3c")

    def run_ocr(self):
        if self._processing_lock:
            self.status.config(text="이미 처리 중입니다...", fg="#f39c12")
            return
        if not hasattr(self, "ocr") or not self.ocr.pipeline:
            messagebox.showerror("오류", "OCR 엔진이 초기화 중입니다.")
            return
        if not self.current_image_path:
            messagebox.showwarning("경고", "먼저 파일을 선택하세요")
            return

        self._processing_lock = True

        # 진행률 초기화
        self.progress_bar["value"] = 0
        self.progress_percent.config(text="0%")
        self.progress_label.config(text="OCR 준비 중...")

        def _worker():
            try:
                import psutil
                import os
                import gc

                process = psutil.Process(os.getpid())
                start_memory = process.memory_info().rss / 1024 / 1024

                # 진행 상황 업데이트 콜백
                def update_progress(page_num, total_pages, current_memory):
                    def _update():
                        percent = (page_num / total_pages) * 100
                        self.progress_bar["value"] = percent
                        self.progress_percent.config(text=f"{percent:.1f}%")
                        self.progress_label.config(
                            text=f"페이지 {page_num}/{total_pages} 분석 중..."
                        )
                        self.status.config(
                            text=f"페이지 {page_num}/{total_pages} 처리 중... (메모리: {current_memory:.1f}MB)",
                            fg="#f39c12",
                        )
                        self.root.update_idletasks()

                    self.root.after(0, _update)

                # OCR 처리
                self.root.after(
                    0, lambda: self.status.config(text="OCR 분석 시작...", fg="#f39c12")
                )

                result = self.ocr.extract_receipt_json(
                    self.current_image_path, progress_callback=update_progress
                )

                if result is None:
                    self.root.after(
                        0,
                        lambda: self._update_display_error("OCR 처리 결과가 없습니다."),
                    )
                    return

                json_data, error = result

                if error:
                    self.root.after(
                        0, lambda err=error: self._update_display_error(err)
                    )
                    return

                if json_data is None:
                    self.root.after(
                        0,
                        lambda: self._update_display_error("OCR 결과가 비어있습니다."),
                    )
                    return

                self.raw_json = json_data
                self.receipt_items = self.ocr.parse_receipt_items_from_json(json_data)

                # 완료 처리
                end_memory = process.memory_info().rss / 1024 / 1024
                memory_used = end_memory - start_memory
                total_pages = json_data.get("total_pages", 1)

                def finish_update():
                    self.progress_bar["value"] = 100
                    self.progress_percent.config(text="100%")
                    self.progress_label.config(text="분석 완료!")
                    self.status.config(
                        text=f"분석 완료! (총 {total_pages}페이지, 메모리 사용: {memory_used:.1f}MB)",
                        fg="#27ae60",
                    )
                    self._update_display_items(json_data)

                self.root.after(0, finish_update)

            except Exception as e:
                import traceback

                traceback.print_exc()
                error_msg = str(e)
                self.root.after(
                    0,
                    lambda msg=error_msg: self._update_display_error(
                        f"처리 중 오류: {msg}"
                    ),
                )
            finally:
                self._processing_lock = False
                gc.collect()

        Thread(target=_worker, daemon=True).start()

    def _update_display_items(self, json_data):
        items_display = []
        total_sum = 0
        total_pages = json_data.get("total_pages", 1)

        items_by_page = {}
        for item in self.receipt_items:
            page = item.get("page", 1)
            if page not in items_by_page:
                items_by_page[page] = []
            items_by_page[page].append(item)
            total_sum += item.get("total_price", 0)

        if items_by_page:
            for page_num in sorted(items_by_page.keys()):
                if total_pages > 1:
                    items_display.append(f"\n[페이지 {page_num}]\n")
                for item in items_by_page[page_num]:
                    items_display.append(
                        f"{item.get('product', 'N/A')}\n"
                        f"    수량: {item.get('quantity', 1)}개\n"
                        f"    단가: {item.get('unit_price', 0):,}원\n"
                        f"    금액: {item.get('total_price', 0):,}원\n"
                        f"    ─────────────────"
                    )
        else:
            for item in self.receipt_items:
                total_sum += item.get("total_price", 0)
                items_display.append(
                    f"{item.get('product', 'N/A')}\n"
                    f"    수량: {item.get('quantity', 1)}개\n"
                    f"    단가: {item.get('unit_price', 0):,}원\n"
                    f"    금액: {item.get('total_price', 0):,}원\n"
                    f"    ─────────────────"
                )

        if items_display:
            items_display.append(f"\n총 합계: {total_sum:,}원")
            if total_pages > 1:
                items_display.insert(0, f"총 {total_pages}페이지 분석 완료\n")
            items_text = "\n".join(items_display)
        else:
            items_text = (
                f"품목을 찾을 수 없습니다.\n\n분석된 페이지: {total_pages}페이지"
            )

        json_text = json.dumps(
            json_data, ensure_ascii=False, indent=2, cls=NumpyEncoder
        )

        self.items_area.delete(1.0, tk.END)
        self.items_area.insert(1.0, items_text)
        self.json_area.delete(1.0, tk.END)
        self.json_area.insert(1.0, json_text)

        self._save_json_from_memory(auto=True)

    def _update_display(self, items_text, json_text, total_pages=1, memory_used=0):
        self.items_area.delete(1.0, tk.END)
        self.items_area.insert(1.0, items_text)
        self.json_area.delete(1.0, tk.END)
        self.json_area.insert(1.0, json_text)

        if self.receipt_items:
            status_msg = f"✅ 분석 완료: {len(self.receipt_items)}개 품목 추출"
            if total_pages > 1:
                status_msg += f" (총 {total_pages}페이지)"
            if memory_used > 0:
                status_msg += f" | 메모리 사용: {memory_used:.1f}MB"

            self.status.config(text=status_msg, fg="#27ae60")
            self._save_json_from_memory(auto=True)
        else:
            status_msg = f"⚠️ 품목을 찾을 수 없음"
            if total_pages > 1:
                status_msg += f" (분석된 페이지: {total_pages})"
            self.status.config(text=status_msg, fg="#e67e22")

    def _update_display_error(self, error):
        self.items_area.delete(1.0, tk.END)
        self.items_area.insert(1.0, f"❌ 오류 발생:\n{error}")
        self.status.config(text=f"❌ 분석 실패", fg="#e74c3c")

    def _save_json_from_memory(self, auto=False):
        """OCR 결과 파일(_res.json)에서 읽어서 저장 (좌표 제외, 내용만)"""
        if not self.current_image_path:
            if not auto:
                messagebox.showwarning("경고", "먼저 파일을 선택하세요")
            return

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = os.path.basename(self.current_image_path)
        name, ext = os.path.splitext(filename)

        # OCR 결과 디렉토리
        ocr_result_dir = os.path.join(os.path.dirname(__file__), "ocr_results")

        if not os.path.exists(ocr_result_dir):
            if not auto:
                messagebox.showwarning("경고", "OCR 결과 디렉토리가 없습니다")
            return

        # _res.json 파일 찾기
        ocr_result_file = None
        for f in os.listdir(ocr_result_dir):
            if f.endswith("_res.json") and name in f:
                ocr_result_file = os.path.join(ocr_result_dir, f)
                break

        # 못 찾으면 가장 최근 _res.json 파일 사용
        if not ocr_result_file or not os.path.exists(ocr_result_file):
            res_files = [
                f for f in os.listdir(ocr_result_dir) if f.endswith("_res.json")
            ]
            if res_files:
                res_files.sort(reverse=True)
                ocr_result_file = os.path.join(ocr_result_dir, res_files[0])
                print(f"[저장] 최근 파일 사용: {res_files[0]}")

        if not ocr_result_file or not os.path.exists(ocr_result_file):
            if not auto:
                messagebox.showwarning("경고", f"OCR 결과 파일(_res.json)이 없습니다")
            return

        # 파일에서 읽기
        with open(ocr_result_file, "r", encoding="utf-8") as f:
            saved_data = json.load(f)

        # parsing_res_list 추출
        parsing_res_list = []
        if "parsing_res_list" in saved_data:
            parsing_res_list = saved_data["parsing_res_list"]
        elif "res" in saved_data and "parsing_res_list" in saved_data["res"]:
            parsing_res_list = saved_data["res"]["parsing_res_list"]

        # 내용만 추출 (좌표 제외)
        contents = []
        for block in parsing_res_list:
            if isinstance(block, dict):
                label = block.get("block_label", "unknown")
                content = block.get("block_content", "")
                if content:
                    contents.append({"type": label, "content": content})

        # 전체 텍스트 합치기
        full_text = "\n\n".join([c["content"] for c in contents])

        data = {
            "source_file": filename,
            "extracted_date": timestamp,
            "total_blocks": len(contents),
            "contents": contents,
            "full_text": full_text,
        }

        # 저장 경로
        json_path = os.path.join(ocr_result_dir, f"{name}_{timestamp}_clean.json")
        with open(json_path, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2, cls=NumpyEncoder)

        # 화면에도 표시
        self.json_area.delete(1.0, tk.END)
        self.json_area.insert(
            1.0, json.dumps(data, ensure_ascii=False, indent=2, cls=NumpyEncoder)
        )

        if not auto:
            self.status.config(text=f"저장 완료: {len(contents)}개 블록", fg="#27ae60")
        else:
            self.status.config(
                text=f"분석 완료: {len(contents)}개 블록 추출", fg="#27ae60"
            )

    def save_json(self, auto=False):
        """JSON 저장 버튼 클릭 시 호출"""
        if not self.raw_json:
            messagebox.showwarning("경고", "먼저 OCR을 실행하세요")
            return
        self._save_json_from_memory(auto=False)

    def on_closing(self):
        self._processing_lock = False
        self._current_photo = None
        import gc

        gc.collect()
        self.root.destroy()


if __name__ == "__main__":
    root = tk.Tk()
    app = OCRGuiApp(root)
    root.mainloop()
