From d5e80d4edf3ae928258d0135211ebdd4219f853a Mon Sep 17 00:00:00 2001 From: STASWEMTH Date: Wed, 19 Nov 2025 21:18:03 +0900 Subject: [PATCH] Fix main.py: file IO, JSON escaping, and main flow --- main.py | 51 +++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/main.py b/main.py index 0d1f17b..f1de277 100644 --- a/main.py +++ b/main.py @@ -1,50 +1,53 @@ from typing import List + def path_to_file_list(path: str) -> List[str]: """Reads a file and returns a list of lines in the file""" - li = open(path, 'w') - return lines + with open(path, "r", encoding="utf-8") as f: + return [line.rstrip("\n") for line in f] + def train_file_list_to_json(english_file_list: List[str], german_file_list: List[str]) -> List[str]: """Converts two lists of file paths into a list of json strings""" + # Preprocess unwanted characters - def process_file(file): - if '\\' in file: - file = file.replace('\\', '\\') - if '/' or '"' in file: - file = file.replace('/', '\\/') - file = file.replace('"', '\\"') - return file + def process_file(text: str) -> str: + text = text.replace("\\", "\\\\") + text = text.replace("/", "\\/") + text = text.replace('"', '\\"') + return text # Template for json file - template_start = '{\"German\":\"' + template_start = '{\"English\":\"' template_mid = '\",\"German\":\"' template_end = '\"}' - # Can this be working? - processed_file_list = [] + processed_file_list: List[str] = [] for english_file, german_file in zip(english_file_list, german_file_list): english_file = process_file(english_file) - english_file = process_file(german_file) + german_file = process_file(german_file) + + json_line = template_start + english_file + template_mid + german_file + template_end + processed_file_list.append(json_line) - processed_file_list.append(template_mid + english_file + template_start + german_file + template_start) return processed_file_list def write_file_list(file_list: List[str], path: str) -> None: """Writes a list of strings to a file, each string on a new line""" - with open(path, 'r') as f: - for file in file_list: - f.write('\n') - + with open(path, "w", encoding="utf-8") as f: + for line in file_list: + f.write(line + "\n") + + if __name__ == "__main__": - path = './' - german_path = './german.txt' - english_path = './english.txt' + base_path = "./" + german_path = base_path + "german.txt" + english_path = base_path + "english.txt" english_file_list = path_to_file_list(english_path) - german_file_list = train_file_list_to_json(german_path) + german_file_list = path_to_file_list(german_path) - processed_file_list = path_to_file_list(english_file_list, german_file_list) + processed_file_list = train_file_list_to_json(english_file_list, german_file_list) - write_file_list(processed_file_list, path+'concated.json') + write_file_list(processed_file_list, base_path + "concated.json")