jeonghoonkang · jeonghoonkang · Jul 31, 2025 · Jul 31, 2025
diff --git a/apps/receipt_ocr/README.md b/apps/receipt_ocr/README.md
@@ -6,14 +6,17 @@ Korean so Hangul is transcribed accurately. Uploaded files are saved in the
 `nocommit` directory, which is ignored by git. Amounts found in each receipt are
 summed and receipts are grouped by detected address. The original files can be
 reviewed one at a time with arrow buttons instead of a long list. The recognized
-text is stored for Q&A but not displayed next to the images. Each image is
-
-Base64 encoded before being sent to OpenAI for OCR.
+text is stored for Q&A and shown in the interface. Each image is
+Base64 encoded before being sent to OpenAI for OCR. You can jump directly to an
+image by entering its file name in a separate input box. OCR results are merged
+and saved to `nocommit/ocr_results.json` so previous extractions persist across
+uploads.
 During the upload a progress bar inside the Streamlit app shows the status of
 files being sent to OpenAI.
 Uploaded receipts are cached so subsequent Q&A uses the stored text without
 re-uploading, and each answer shows how long the model took to respond.
 
+
 Place your OpenAI API key in `nocommit/nocommit_key.txt` before running the app.
 After OCR extraction embeddings are built with the `text-embedding-3-large` model
 and a retrieval augmented generation (RAG) pipeline powers a Q&A chat box so you

diff --git a/apps/receipt_ocr/receipt_ocr_app.py b/apps/receipt_ocr/receipt_ocr_app.py
@@ -6,7 +6,7 @@
 import base64
 import numpy as np
 import time
-
+import json
 
 
 try:
@@ -19,6 +19,8 @@
 )
 os.makedirs(NOCOMMIT_DIR, exist_ok=True)
 
+OCR_JSON_PATH = os.path.join(NOCOMMIT_DIR, "ocr_results.json")
+
 OPENAI_KEY_PATH = os.path.join(NOCOMMIT_DIR, "nocommit_key.txt")
 
 openai_api_key = None
@@ -167,6 +169,22 @@ def rag_answer(question: str, receipts: List[Dict]) -> str:
     except Exception:
         return ""
 
+
+def merge_save_ocr_json(new_receipts: List[Dict], path: str = OCR_JSON_PATH):
+    existing: List[Dict] = []
+    if os.path.exists(path):
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                existing = json.load(f)
+        except Exception:
+            existing = []
+    data = {r.get("filename"): r for r in existing if r.get("filename")}
+    for r in new_receipts:
+        data[r.get("filename")] = r
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(list(data.values()), f, ensure_ascii=False, indent=2)
+
+
 def process_receipts(files: List[Dict]) -> List[Dict]:
     receipts: List[Dict] = []
     status = st.empty()
@@ -192,6 +210,9 @@ def process_receipts(files: List[Dict]) -> List[Dict]:
         )
         bar.progress(i / total)
     status.text("완료")
+    if receipts:
+        merge_save_ocr_json(receipts)
+
     return receipts
 
 
@@ -232,9 +253,15 @@ def summarize(receipts: List[Dict]):
     summarize(receipts)
 
     st.header("영수증 이미지")
-
     if "view_idx" not in st.session_state:
         st.session_state.view_idx = 0
+    file_query = st.text_input("파일 이름 입력", key="file_query")
+    if file_query:
+        idx = next((i for i, r in enumerate(receipts) if r["filename"] == file_query), None)
+        if idx is not None:
+            st.session_state.view_idx = idx
+        else:
+            st.warning("해당 파일이 없습니다.")
     current = receipts[st.session_state.view_idx]
     st.subheader(current["filename"])
     st.image(current["path"], use_column_width=True)