diff --git a/apps/receipt_ocr/README.md b/apps/receipt_ocr/README.md index eb72e2ac6..101180cdf 100644 --- a/apps/receipt_ocr/README.md +++ b/apps/receipt_ocr/README.md @@ -6,14 +6,17 @@ Korean so Hangul is transcribed accurately. Uploaded files are saved in the `nocommit` directory, which is ignored by git. Amounts found in each receipt are summed and receipts are grouped by detected address. The original files can be reviewed one at a time with arrow buttons instead of a long list. The recognized -text is stored for Q&A but not displayed next to the images. Each image is - -Base64 encoded before being sent to OpenAI for OCR. +text is stored for Q&A and shown in the interface. Each image is +Base64 encoded before being sent to OpenAI for OCR. You can jump directly to an +image by entering its file name in a separate input box. OCR results are merged +and saved to `nocommit/ocr_results.json` so previous extractions persist across +uploads. During the upload a progress bar inside the Streamlit app shows the status of files being sent to OpenAI. Uploaded receipts are cached so subsequent Q&A uses the stored text without re-uploading, and each answer shows how long the model took to respond. + Place your OpenAI API key in `nocommit/nocommit_key.txt` before running the app. After OCR extraction embeddings are built with the `text-embedding-3-large` model and a retrieval augmented generation (RAG) pipeline powers a Q&A chat box so you diff --git a/apps/receipt_ocr/receipt_ocr_app.py b/apps/receipt_ocr/receipt_ocr_app.py index 7d07e4223..8c455bb4c 100644 --- a/apps/receipt_ocr/receipt_ocr_app.py +++ b/apps/receipt_ocr/receipt_ocr_app.py @@ -6,7 +6,7 @@ import base64 import numpy as np import time - +import json try: @@ -19,6 +19,8 @@ ) os.makedirs(NOCOMMIT_DIR, exist_ok=True) +OCR_JSON_PATH = os.path.join(NOCOMMIT_DIR, "ocr_results.json") + OPENAI_KEY_PATH = os.path.join(NOCOMMIT_DIR, "nocommit_key.txt") openai_api_key = None @@ -167,6 +169,22 @@ def rag_answer(question: str, receipts: List[Dict]) -> str: except Exception: return "" + +def merge_save_ocr_json(new_receipts: List[Dict], path: str = OCR_JSON_PATH): + existing: List[Dict] = [] + if os.path.exists(path): + try: + with open(path, "r", encoding="utf-8") as f: + existing = json.load(f) + except Exception: + existing = [] + data = {r.get("filename"): r for r in existing if r.get("filename")} + for r in new_receipts: + data[r.get("filename")] = r + with open(path, "w", encoding="utf-8") as f: + json.dump(list(data.values()), f, ensure_ascii=False, indent=2) + + def process_receipts(files: List[Dict]) -> List[Dict]: receipts: List[Dict] = [] status = st.empty() @@ -192,6 +210,9 @@ def process_receipts(files: List[Dict]) -> List[Dict]: ) bar.progress(i / total) status.text("완료") + if receipts: + merge_save_ocr_json(receipts) + return receipts @@ -232,9 +253,15 @@ def summarize(receipts: List[Dict]): summarize(receipts) st.header("영수증 이미지") - if "view_idx" not in st.session_state: st.session_state.view_idx = 0 + file_query = st.text_input("파일 이름 입력", key="file_query") + if file_query: + idx = next((i for i, r in enumerate(receipts) if r["filename"] == file_query), None) + if idx is not None: + st.session_state.view_idx = idx + else: + st.warning("해당 파일이 없습니다.") current = receipts[st.session_state.view_idx] st.subheader(current["filename"]) st.image(current["path"], use_column_width=True)