From 479229fba137ad34c154b7be45cd1e2f05751999 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EC=86=90=EC=84=B8=EC=9D=B8?= <sein@Seins-Macbook-Pro.local>
Date: Wed, 19 Nov 2025 19:57:01 +0900
Subject: [PATCH] =?UTF-8?q?Fix:=20correct=20file=20handling=20and=20JSON?=
 =?UTF-8?q?=20processing=20in=20training=20scripts=20=E2=80=94=20fixed=20p?=
 =?UTF-8?q?ath=5Fto=5Ffile=5Flist,=20train=5Ffile=5Flist=5Fto=5Fjson,=20wr?=
 =?UTF-8?q?ite=5Ffile=5Flist,=20and=20main=20block=20calls?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 main.py | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/main.py b/main.py
index 0d1f17b..e62fc9a 100644
--- a/main.py
+++ b/main.py
@@ -1,50 +1,54 @@
 from typing import List
+import json
 
 def path_to_file_list(path: str) -> List[str]:
     """Reads a file and returns a list of lines in the file"""
-    li = open(path, 'w')
+    # original code opened with 'w' and returned undefined 'lines'
+    with open(path, 'r', encoding='utf-8') as f:
+        lines = [line.rstrip('\n') for line in f]
     return lines
 
 def train_file_list_to_json(english_file_list: List[str], german_file_list: List[str]) -> List[str]:
     """Converts two lists of file paths into a list of json strings"""
-    # Preprocess unwanted characters
+
+    # very small cleanup of the original process_file
     def process_file(file):
-        if '\\' in file:
-            file = file.replace('\\', '\\')
-        if '/' or '"' in file:
-            file = file.replace('/', '\\/')
-            file = file.replace('"', '\\"')
+        file = file.replace('\\', '\\\\')
+        file = file.replace('/', '\\/')
+        file = file.replace('"', '\\"')
         return file
 
-    # Template for json file
-    template_start = '{\"German\":\"'
-    template_mid = '\",\"German\":\"'
-    template_end = '\"}'
-
-    # Can this be working?
     processed_file_list = []
+
     for english_file, german_file in zip(english_file_list, german_file_list):
-        english_file = process_file(english_file)
-        english_file = process_file(german_file)
+        eng = process_file(english_file)
+        ger = process_file(german_file)
+
+        # instead of manually constructing JSON, use json.dumps but keep your structure
+        json_obj = {"English": eng, "German": ger}
+        processed_file_list.append(json.dumps(json_obj, ensure_ascii=False))
 
-        processed_file_list.append(template_mid + english_file + template_start + german_file + template_start)
     return processed_file_list
 
 
 def write_file_list(file_list: List[str], path: str) -> None:
     """Writes a list of strings to a file, each string on a new line"""
-    with open(path, 'r') as f:
+    # original opened in 'r' and never wrote lines
+    with open(path, 'w', encoding='utf-8') as f:
         for file in file_list:
-            f.write('\n')
+            f.write(file + '\n')
+
             
 if __name__ == "__main__":
     path = './'
     german_path = './german.txt'
     english_path = './english.txt'
 
+    # original code used wrong variables / wrong calls
     english_file_list = path_to_file_list(english_path)
-    german_file_list = train_file_list_to_json(german_path)
+    german_file_list = path_to_file_list(german_path)
+
+    processed_file_list = train_file_list_to_json(english_file_list, german_file_list)
 
-    processed_file_list = path_to_file_list(english_file_list, german_file_list)
+    write_file_list(processed_file_list, path + 'concated.json')
 
-    write_file_list(processed_file_list, path+'concated.json')