From 0768d47b2a362fe235f09f31f466fe42cb45ec93 Mon Sep 17 00:00:00 2001 From: Jeong Kunghun Date: Wed, 19 Nov 2025 23:14:30 +0900 Subject: [PATCH] Fix main.py to correctly build JSON from english/german files --- main.py | 71 +++++++++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/main.py b/main.py index 0d1f17b..ca0da11 100644 --- a/main.py +++ b/main.py @@ -1,50 +1,47 @@ from typing import List +import json + def path_to_file_list(path: str) -> List[str]: """Reads a file and returns a list of lines in the file""" - li = open(path, 'w') - return lines - -def train_file_list_to_json(english_file_list: List[str], german_file_list: List[str]) -> List[str]: - """Converts two lists of file paths into a list of json strings""" - # Preprocess unwanted characters - def process_file(file): - if '\\' in file: - file = file.replace('\\', '\\') - if '/' or '"' in file: - file = file.replace('/', '\\/') - file = file.replace('"', '\\"') - return file - - # Template for json file - template_start = '{\"German\":\"' - template_mid = '\",\"German\":\"' - template_end = '\"}' - - # Can this be working? - processed_file_list = [] - for english_file, german_file in zip(english_file_list, german_file_list): - english_file = process_file(english_file) - english_file = process_file(german_file) - - processed_file_list.append(template_mid + english_file + template_start + german_file + template_start) - return processed_file_list + # 파일을 읽기 모드로 열고, 줄 끝 개행(\n)을 제거해서 리스트로 반환 + with open(path, "r", encoding="utf-8") as f: + return [line.rstrip("\n") for line in f] + + +def train_file_list_to_json(english_file_list: List[str], + german_file_list: List[str]) -> List[str]: + """Converts two lists of sentences into a list of json strings""" + json_lines: List[str] = [] + + for en, de in zip(english_file_list, german_file_list): + data = {"English": en, "German": de} + # json.dumps를 사용해서 안전하게 JSON 문자열 생성 + json_lines.append(json.dumps(data, ensure_ascii=False)) + + return json_lines def write_file_list(file_list: List[str], path: str) -> None: """Writes a list of strings to a file, each string on a new line""" - with open(path, 'r') as f: - for file in file_list: - f.write('\n') - + with open(path, "w", encoding="utf-8") as f: + for line in file_list: + f.write(line + "\n") + + if __name__ == "__main__": - path = './' - german_path = './german.txt' - english_path = './english.txt' + base_path = "./" + german_path = base_path + "german.txt" + english_path = base_path + "english.txt" + # 1) 영어/독일어 파일에서 문장 리스트 읽기 english_file_list = path_to_file_list(english_path) - german_file_list = train_file_list_to_json(german_path) + german_file_list = path_to_file_list(german_path) - processed_file_list = path_to_file_list(english_file_list, german_file_list) + # 2) JSON 문자열 리스트 만들기 + processed_file_list = train_file_list_to_json( + english_file_list, german_file_list + ) - write_file_list(processed_file_list, path+'concated.json') + # 3) concated.json에 저장 + write_file_list(processed_file_list, base_path + "concated.json")