From 134b8f49748b4f98138bb20af83c3b971790d9bb Mon Sep 17 00:00:00 2001 From: 217785-collab Date: Wed, 19 Nov 2025 19:50:17 +0900 Subject: [PATCH] Fixed main.py: Correct file reading and JSON formatting --- main.py | 59 +++++++++++++++++++-------------------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) diff --git a/main.py b/main.py index 0d1f17b..61d24c0 100644 --- a/main.py +++ b/main.py @@ -1,50 +1,31 @@ +import json from typing import List def path_to_file_list(path: str) -> List[str]: - """Reads a file and returns a list of lines in the file""" - li = open(path, 'w') - return lines + """Reads a file and returns a list of trimmed lines.""" + with open(path, "r", encoding="utf-8") as f: + return [line.strip() for line in f if line.strip()] def train_file_list_to_json(english_file_list: List[str], german_file_list: List[str]) -> List[str]: - """Converts two lists of file paths into a list of json strings""" - # Preprocess unwanted characters - def process_file(file): - if '\\' in file: - file = file.replace('\\', '\\') - if '/' or '"' in file: - file = file.replace('/', '\\/') - file = file.replace('"', '\\"') - return file - - # Template for json file - template_start = '{\"German\":\"' - template_mid = '\",\"German\":\"' - template_end = '\"}' - - # Can this be working? - processed_file_list = [] - for english_file, german_file in zip(english_file_list, german_file_list): - english_file = process_file(english_file) - english_file = process_file(german_file) - - processed_file_list.append(template_mid + english_file + template_start + german_file + template_start) - return processed_file_list + """Converts English/German sentence lists into JSON lines.""" + json_lines = [] + for eng, ger in zip(english_file_list, german_file_list): + json_obj = {"English": eng, "German": ger} + json_lines.append(json.dumps(json_obj, ensure_ascii=False)) + + return json_lines def write_file_list(file_list: List[str], path: str) -> None: - """Writes a list of strings to a file, each string on a new line""" - with open(path, 'r') as f: - for file in file_list: - f.write('\n') - -if __name__ == "__main__": - path = './' - german_path = './german.txt' - english_path = './english.txt' + """Writes each JSON string on its own line.""" + with open(path, "w", encoding="utf-8") as f: + for line in file_list: + f.write(line + "\n") - english_file_list = path_to_file_list(english_path) - german_file_list = train_file_list_to_json(german_path) +if __name__ == "__main__": + english_file_list = path_to_file_list("english.txt") + german_file_list = path_to_file_list("german.txt") - processed_file_list = path_to_file_list(english_file_list, german_file_list) + processed_file_list = train_file_list_to_json(english_file_list, german_file_list) - write_file_list(processed_file_list, path+'concated.json') + write_file_list(processed_file_list, "concated.json")