From 479229fba137ad34c154b7be45cd1e2f05751999 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=86=90=EC=84=B8=EC=9D=B8?= Date: Wed, 19 Nov 2025 19:57:01 +0900 Subject: [PATCH] =?UTF-8?q?Fix:=20correct=20file=20handling=20and=20JSON?= =?UTF-8?q?=20processing=20in=20training=20scripts=20=E2=80=94=20fixed=20p?= =?UTF-8?q?ath=5Fto=5Ffile=5Flist,=20train=5Ffile=5Flist=5Fto=5Fjson,=20wr?= =?UTF-8?q?ite=5Ffile=5Flist,=20and=20main=20block=20calls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/main.py b/main.py index 0d1f17b..e62fc9a 100644 --- a/main.py +++ b/main.py @@ -1,50 +1,54 @@ from typing import List +import json def path_to_file_list(path: str) -> List[str]: """Reads a file and returns a list of lines in the file""" - li = open(path, 'w') + # original code opened with 'w' and returned undefined 'lines' + with open(path, 'r', encoding='utf-8') as f: + lines = [line.rstrip('\n') for line in f] return lines def train_file_list_to_json(english_file_list: List[str], german_file_list: List[str]) -> List[str]: """Converts two lists of file paths into a list of json strings""" - # Preprocess unwanted characters + + # very small cleanup of the original process_file def process_file(file): - if '\\' in file: - file = file.replace('\\', '\\') - if '/' or '"' in file: - file = file.replace('/', '\\/') - file = file.replace('"', '\\"') + file = file.replace('\\', '\\\\') + file = file.replace('/', '\\/') + file = file.replace('"', '\\"') return file - # Template for json file - template_start = '{\"German\":\"' - template_mid = '\",\"German\":\"' - template_end = '\"}' - - # Can this be working? processed_file_list = [] + for english_file, german_file in zip(english_file_list, german_file_list): - english_file = process_file(english_file) - english_file = process_file(german_file) + eng = process_file(english_file) + ger = process_file(german_file) + + # instead of manually constructing JSON, use json.dumps but keep your structure + json_obj = {"English": eng, "German": ger} + processed_file_list.append(json.dumps(json_obj, ensure_ascii=False)) - processed_file_list.append(template_mid + english_file + template_start + german_file + template_start) return processed_file_list def write_file_list(file_list: List[str], path: str) -> None: """Writes a list of strings to a file, each string on a new line""" - with open(path, 'r') as f: + # original opened in 'r' and never wrote lines + with open(path, 'w', encoding='utf-8') as f: for file in file_list: - f.write('\n') + f.write(file + '\n') + if __name__ == "__main__": path = './' german_path = './german.txt' english_path = './english.txt' + # original code used wrong variables / wrong calls english_file_list = path_to_file_list(english_path) - german_file_list = train_file_list_to_json(german_path) + german_file_list = path_to_file_list(german_path) + + processed_file_list = train_file_list_to_json(english_file_list, german_file_list) - processed_file_list = path_to_file_list(english_file_list, german_file_list) + write_file_list(processed_file_list, path + 'concated.json') - write_file_list(processed_file_list, path+'concated.json')