From e1c8324019d5c3c7a0c29ec57198c2a902d81e19 Mon Sep 17 00:00:00 2001 From: PowerViber Date: Wed, 11 Feb 2026 14:27:54 +0700 Subject: [PATCH 1/3] Refactor dfler --- dfler.py | 389 ----------- pyproject.toml | 35 + src/dfler/__init__.py | 0 config.json => src/dfler/config.json | 0 src/dfler/dfler.py | 304 +++++++++ .../dfler/generate_report.py | 0 parse.py => src/dfler/parse.py | 602 +++++++++--------- tests/test_cli.py | 43 ++ 8 files changed, 683 insertions(+), 690 deletions(-) delete mode 100644 dfler.py create mode 100644 pyproject.toml create mode 100644 src/dfler/__init__.py rename config.json => src/dfler/config.json (100%) create mode 100644 src/dfler/dfler.py rename generate_report.py => src/dfler/generate_report.py (100%) rename parse.py => src/dfler/parse.py (97%) create mode 100644 tests/test_cli.py diff --git a/dfler.py b/dfler.py deleted file mode 100644 index ff8cde3..0000000 --- a/dfler.py +++ /dev/null @@ -1,389 +0,0 @@ -import requests -import time -import os -import sys -import json -from tqdm import tqdm -from datetime import datetime -from os import system, name -from parse import read_android_log, read_ios_log -from generate_report import generate_report -from simpletransformers.ner import NERModel -import pandas as pd -import torch - - -def get_config(): - config_file = open('config.json') - config_file = json.load(config_file) - - now = datetime.now() - now = now.strftime("%Y%m%d_%H%M%S") - output_dir = os.path.join(config_file['output_dir'], now) - # output_dir = os.path.join(config_file['output_dir'], '27112022_190057') - previous_step = 0 - previous_status = False - use_cuda = True if torch.cuda.is_available() == True else False - - - wkhtml_path = "" - if name == 'nt': - wkhtml_path = config_file['wkhtml_path']['windows'] - # for mac and linux(here, os.name is 'posix') - else: - wkhtml_path = config_file['wkhtml_path']['linux'] - - return { - "output_dir": output_dir, - "model_dir": config_file['model_dir'], - "previous_step": previous_step, - "previous_status": previous_status, - "wkhtml_path": wkhtml_path, - "app_version": config_file['app_version'], - "use_cuda": use_cuda, - "evidence_dir": config_file['source_evidence'], - } - -def clear_screen(): - # for windows - if name == 'nt': - _ = system('cls') - # for mac and linux(here, os.name is 'posix') - else: - _ = system('clear') - -def menu(): - clear_screen() - print("\t\t====================================================================") - print("\t\t============== Drone Flight Log Entity Recognizer ==============") - print("\t\t====================================================================\n") - print("\t\tAction to perform:\n") - print("\t\t\t1. Evidence Checking") - print("\t\t\t2. Forensic Timeline Construction") - print("\t\t\t3. Drone Entity Recognition") - print("\t\t\t4. Forensic Report Generation") - print("\t\t\t0. Exit\n") - try: - option = input("\t\tEnter option: ") - except EOFError: - option = "1" - return option - - -def main(): - # now = datetime.now() - # now = now.strftime("%d%m%Y_%H%M%S") - # output_dir = os.path.join("./result", now) - config = get_config() - - if not os.path.exists(config['output_dir']): - os.makedirs(config['output_dir']) - - start = menu() - if start == '0': - with open(config['output_dir'] + '/config.json', 'w') as file: - json.dump(config, file) - print("Exit program...") - time.sleep(2) - sys.exit(0) - while start != '0': - if start == '0': - with open(config['output_dir'] + '/config.json', 'w') as file: - json.dump(config, file) - print("Exit program...") - time.sleep(1) - sys.exit(0) - elif start == '1': - clear_screen() - print('Evidence checking in process...\n') - time.sleep(1) - config['previous_step'] = 1 - - files = os.listdir(config['evidence_dir']) - android_logs = [] - ios_logs = [] - folders = [d for d in files if os.path.isdir(config['evidence_dir']+'/'+d)] - # print(folders) - if(len(folders) == 0): - print("No sub-folders in the evidence folder") - config['previous_status'] = False - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - else: - for folder in folders: - # Filtering only the files. - files = os.listdir(config['evidence_dir']+'/'+folder) - files = [f for f in files if os.path.isfile(config['evidence_dir']+'/'+folder+'/'+f)] - if(folder == 'android'): - android_logs.append(files) - else: - ios_logs.append(files) - android_logs.extend(ios_logs) - # save to .json file - - if(len(android_logs) == 0): - print('No found files in the evidence folder!') - config['previous_status'] = False - time.sleep(1) - else: - with open(config['output_dir'] + '/raw_list.json', 'w') as file: - json.dump(android_logs, file) - config['previous_status'] = True - time.sleep(1) - print('Found files: \n') - print('iOS logs: ') - print(*ios_logs, sep="\n") - print("\nAndroid logs: \n") - print(*android_logs, sep="\n") - print('Finish checking evidence...') - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - elif start == '2': - if config['previous_status'] == False and config['previous_step'] == 1: - print('Previous step is not complete, please return to previous step') - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - elif (config['previous_step'] == 1 and config['previous_status'] == True) or (config['previous_step'] != 1 and config['previous_status'] == True): - clear_screen() - print('Forensic timeline construction is in process...\n') - config['previous_step'] = 2 - # Parse the raw flight logs - os.makedirs(config['output_dir'] + '/parsed/android') - android_path = os.path.join(config['output_dir'], 'parsed/android') - os.makedirs(config['output_dir'] + '/parsed/ios') - ios_path = os.path.join(config['output_dir'], 'parsed/ios') - full_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), config['evidence_dir']) - - # Construct the forensic timeline from parsed flight log - # print(full_path) - # print(os.path.join(dir_path, config['evidence_dir'])) - path_list = [] - ios_parsed = False - android_parsed = False - for path, subdirs, files in os.walk(full_path): - if path.find("android") != -1: - for filename in os.listdir(path): - if filename.find("parsed") != -1: - continue - print("path: ", path) - print("Extracting file: %s" % filename) - read_android_log(path, filename, android_path) - print("Finish Extracting file: %s\n" % filename) - android_parsed = True - - elif path.find("ios") != -1: - for filename in os.listdir(path): - if filename.find("parsed") != -1: - continue - print("path: ", path) - print("Extracting file: %s" % filename) - read_ios_log(path, filename, ios_path) - print("Finish Extracting file: %s\n" % filename) - ios_parsed = True - - parsed_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.path.join(config['output_dir'], 'parsed')) - for path, subdirs, files in os.walk(parsed_path): - for filename in files: - path_list.append(os.path.join(path, filename)) - # if(ios_parsed or android_parsed): - # for name in files: - # file_ext = name.split(".") - # file_ext = file_ext[-1] if len(file_ext) > 1 else "" - # if(name.find("parsed_") != -1 and file_ext == "csv"): - # path_list.append(os.path.join(path, name)) - - parent_df = pd.DataFrame() - if(len(path_list) == 0): - print('No parsed evidence found.') - config['previous_status'] = False - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - - for path in path_list: - child_df = pd.read_csv(path, encoding='utf-8') - parent_df = pd.concat([parent_df, child_df]) - - # making copy of team column - time_col = parent_df["time"].copy() - parent_df["timestamp"] = parent_df["date"].str.cat(time_col, sep =" ") - parent_df.drop(columns = ['time', 'date'], inplace=True) - parent_df = parent_df[['timestamp', 'message']] - parent_df['timestamp'] = pd.to_datetime(parent_df['timestamp']) - # Sort the data by timestamp - parent_df.sort_values(by='timestamp', inplace=True) - - print('Save forensic timeline to .csv file...') - parent_df.to_csv(config['output_dir'] + '/forensic_timeline.csv', index=False, encoding="utf-8") - - print('Finish constructing timeline.') - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - else: - print('Please follow the steps accordingly') - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - elif start == '3': - if config['previous_status'] == False and config['previous_step'] == 2: - print('Previous step is not complete, please return to previous step') - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - elif (config['previous_step'] == 2 and config['previous_status'] == True) or (config['previous_step'] != 2 and config['previous_status'] == True): - clear_screen() - print('Entity Recognition is in process...\n') - config['previous_step'] == 3 - # Load the fine-tuned model - print("Loading model...\n") - model_exist = os.path.exists(config['model_dir'] + '/pytorch_model.bin') - if (model_exist == False): - print('The model file is not found.') - config['previous_status'] = False - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - else: - droner = NERModel( - "bert", config['model_dir'], use_cuda=config['use_cuda'] - ) - print("Model is loaded successfully\n") - # Load the forensic timeline - print("Loading forensic timeline...\n") - timeline_exist = os.path.exists(config['output_dir'] + '/forensic_timeline.csv') - if(timeline_exist == False): - print('The forensic timeline file is not found.') - config['previous_status'] = False - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - else: - timeline = pd.read_csv(config['output_dir'] + '/forensic_timeline.csv', encoding="utf-8") - print("Forensic timeline is loaded successfully\n") - print('Start recognizing mentioned entities...') - pred_list = [] - for row in tqdm(range(0, timeline.shape[0])): - message = timeline.iloc[row, 1] - [entities], _ = droner.predict([message]) - timestamp = timeline.iloc[row, 0] - pred_list.append({"timestamp": timestamp, "entities": entities}) - - # save to .json file - with open(config['output_dir'] + '/ner_result.json', 'w') as file: - json.dump(pred_list, file) - print('Finish recognizing mentioned entities...') - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - else: - print('Please follow the steps accordingly') - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - elif start == '4': - if config['previous_status'] == False and config['previous_step'] == 3: - print('Previous step is not complete, please return to previous step') - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - elif (config['previous_step'] == 2 and config['previous_status'] == True) or (config['previous_step'] != 2 and config['previous_status'] == True): - clear_screen() - print('Forensic report generation is in process...\n') - config['previous_step'] == 4 - print('Loading the NER results...') - # Opening JSON file - ner_result_exist = os.path.exists(config['output_dir'] + '/ner_result.json') - if(ner_result_exist == False): - print('The NER result is not found.') - config['previous_status'] = False - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - else: - # Load the NER results - timeline_file = open(config['output_dir'] + '/ner_result.json') - timeline = json.load(timeline_file) - print('NER result is loaded successfully.') - - print('Start generating forensic report...') - try: - generate_report(config) - except: - print('Error in generating report.') - config['previous_status'] = False - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - else: - print('Report has generated successfully.') - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - else: - print('Please follow the steps accordingly') - time.sleep(1) - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - else: - print('Invalid option!') - try: - input("Press enter to continue...") - except EOFError: - print("No input received, exit program...") - sys.exit(0) - start = menu() - sys.exit(0) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c9313b4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,35 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "dfler" +version = "0.1.0" +description = "Drone Flight Log Entity Recognizer" +readme = "README.md" +authors = [ + { name = "Your Name", email = "your.email@example.com" }, +] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] +requires-python = ">=3.7" +dependencies = [ + "simpletransformers>=0.63.6", + "seqeval>=1.22.0", + "pandas", + "numpy", + "torch", + "tqdm", + "pdfkit", + "requests", +] + +[project.scripts] +dfler = "dfler.dfler:main" + +[project.urls] +"Homepage" = "https://github.com/DroneNLP/dfler" +"Bug Tracker" = "https://github.com/DroneNLP/dfler/issues" diff --git a/src/dfler/__init__.py b/src/dfler/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config.json b/src/dfler/config.json similarity index 100% rename from config.json rename to src/dfler/config.json diff --git a/src/dfler/dfler.py b/src/dfler/dfler.py new file mode 100644 index 0000000..12d502e --- /dev/null +++ b/src/dfler/dfler.py @@ -0,0 +1,304 @@ +import argparse +import sys +import os +import json +import time +import shutil +from datetime import datetime +from tqdm import tqdm +import pandas as pd +import torch +from simpletransformers.ner import NERModel + +# Relative imports +from .parse import read_android_log, read_ios_log +from .generate_report import generate_report + +def load_config(config_path=None): + if config_path and os.path.exists(config_path): + with open(config_path, 'r') as f: + return json.load(f) + elif os.path.exists('config.json'): + with open('config.json', 'r') as f: + return json.load(f) + return {} + +def get_wkhtml_path(config): + if os.name == 'nt': + return config.get('wkhtml_path', {}).get('windows') + else: + return config.get('wkhtml_path', {}).get('linux') + +def check_evidence(config): + print('Evidence checking in process...\n') + evidence_dir = config.get('source_evidence') + output_dir = config.get('output_dir') + + if not evidence_dir or not os.path.exists(evidence_dir): + print(f"Error: Evidence directory '{evidence_dir}' not found.") + return False + + files = os.listdir(evidence_dir) + android_logs = [] + ios_logs = [] + folders = [d for d in files if os.path.isdir(os.path.join(evidence_dir, d))] + + if not folders: + print("No sub-folders in the evidence folder") + return False + + for folder in folders: + folder_path = os.path.join(evidence_dir, folder) + files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))] + if folder == 'android': + android_logs.extend(files) + else: + ios_logs.extend(files) + + all_logs = android_logs + ios_logs # Just for checking if empty + + if not all_logs: + print('No found files in the evidence folder!') + return False + + # Save raw_list.json + raw_list_data = [] # Structure from original code seemed to be [[android_files], [ios_files]] roughly? + # Original code: + # if(folder == 'android'): android_logs.append(files) -> files is list. so android_logs is list of lists? + # android_logs.extend(ios_logs) -> extends list of lists? + # checking original code: + # android_logs = [] + # files (list) -> android_logs.append(files) -> [[file1, file2]] + # wait, if multiple folders? + # it seems it expects specific structure. + # Let's recreate original structure for compatibility with generate_report + + # Re-reading original code logic: + # android_logs = [] + # for folder in folders: + # files = ... + # if folder == 'android': android_logs.append(files) + # else: ios_logs.append(files) + # android_logs.extend(ios_logs) + # json.dump(android_logs) + + # So it produces [[android_files...], [ios_files...]] basically. + + final_list = [] + if android_logs: final_list.append(android_logs) + if ios_logs: final_list.append(ios_logs) + + os.makedirs(output_dir, exist_ok=True) + with open(os.path.join(output_dir, 'raw_list.json'), 'w') as file: + json.dump(final_list, file) + + print('Found files: \n') + print('iOS logs: ', ios_logs) + print("\nAndroid logs: ", android_logs) + print('Finish checking evidence...') + return True + +def construct_timeline(config): + print('Forensic timeline construction is in process...\n') + evidence_dir = config.get('source_evidence') + output_dir = config.get('output_dir') + + if not os.path.exists(os.path.join(output_dir, 'raw_list.json')): + print("Error: Previous step (Check) not complete. raw_list.json missing.") + return False + + parsed_android_dir = os.path.join(output_dir, 'parsed', 'android') + parsed_ios_dir = os.path.join(output_dir, 'parsed', 'ios') + os.makedirs(parsed_android_dir, exist_ok=True) + os.makedirs(parsed_ios_dir, exist_ok=True) + + full_evidence_path = os.path.abspath(evidence_dir) + + for path, subdirs, files in os.walk(full_evidence_path): + if "android" in path: + for filename in files: + if "parsed" in filename: continue + print(f"Extracting file: {filename}") + read_android_log(path, filename, parsed_android_dir) + elif "ios" in path: + for filename in files: + if "parsed" in filename: continue + print(f"Extracting file: {filename}") + read_ios_log(path, filename, parsed_ios_dir) + + # Combine parsed csvs + path_list = [] + parsed_path = os.path.join(output_dir, 'parsed') + for path, subdirs, files in os.walk(parsed_path): + for filename in files: + path_list.append(os.path.join(path, filename)) + + if not path_list: + print('No parsed evidence found.') + return False + + parent_df = pd.DataFrame() + for path in path_list: + try: + child_df = pd.read_csv(path, encoding='utf-8') + parent_df = pd.concat([parent_df, child_df]) + except Exception as e: + print(f"Error reading {path}: {e}") + + if parent_df.empty: + print("Merged dataframe is empty.") + return False + + # Standardize and sort + try: + # Check if columns exist + if 'time' in parent_df.columns and 'date' in parent_df.columns: + time_col = parent_df["time"].copy() + parent_df["timestamp"] = parent_df["date"].str.cat(time_col, sep =" ") + parent_df.drop(columns = ['time', 'date'], inplace=True) + parent_df = parent_df[['timestamp', 'message']] + parent_df['timestamp'] = pd.to_datetime(parent_df['timestamp']) + parent_df.sort_values(by='timestamp', inplace=True) + + parent_df.to_csv(os.path.join(output_dir, 'forensic_timeline.csv'), index=False, encoding="utf-8") + print('Finish constructing timeline.') + return True + else: + print("Error: Required columns 'date' and 'time' not found in parsed logs.") + return False + except Exception as e: + print(f"Error processing timeline: {e}") + return False + +def run_ner(config): + print('Entity Recognition is in process...\n') + output_dir = config.get('output_dir') + model_dir = config.get('model_dir') + use_cuda = config.get('use_cuda', False) + + if not os.path.exists(os.path.join(model_dir, 'pytorch_model.bin')): + print(f'The model file is not found at {model_dir}.') + return False + + timeline_path = os.path.join(output_dir, 'forensic_timeline.csv') + if not os.path.exists(timeline_path): + print('The forensic timeline file is not found.') + return False + + print("Loading model...\n") + droner = NERModel("bert", model_dir, use_cuda=use_cuda) + print("Model loaded.\n") + + timeline = pd.read_csv(timeline_path, encoding="utf-8") + print('Start recognizing mentioned entities...') + + pred_list = [] + # Handle empty timeline + if timeline.empty: + print("Timeline is empty.") + else: + for row in tqdm(range(0, timeline.shape[0])): + message = timeline.iloc[row, 1] + if pd.isna(message): continue + [entities], _ = droner.predict([str(message)]) + timestamp = timeline.iloc[row, 0] + pred_list.append({"timestamp": timestamp, "entities": entities}) + + with open(os.path.join(output_dir, 'ner_result.json'), 'w') as file: + json.dump(pred_list, file) + + print('Finish recognizing mentioned entities...') + return True + +def run_report(config): + print('Forensic report generation is in process...\n') + output_dir = config.get('output_dir') + + if not os.path.exists(os.path.join(output_dir, 'ner_result.json')): + print('The NER result is not found.') + return False + + print('Start generating forensic report...') + try: + generate_report(config) + print('Report has generated successfully.') + return True + except Exception as e: + print(f'Error in generating report: {e}') + # Check if it was wkhtmltopdf error + wkhtml_path = config.get('wkhtml_path') + if not wkhtml_path or not os.path.exists(wkhtml_path): + print(f"Possible Cause: wkhtmltopdf not found at '{wkhtml_path}'. Please check configuration.") + return False + +def main(): + parser = argparse.ArgumentParser(description="Drone Flight Log Entity Recognizer (DFLER)") + + # Global arguments + parser.add_argument("--config", help="Path to configuration file") + parser.add_argument("--output", help="Output directory") + parser.add_argument("--evidence", help="Evidence directory") + parser.add_argument("--model", help="Model directory") + + subparsers = parser.add_subparsers(dest="command", help="Command to run") + + # Subcommands + subparsers.add_parser("check", help="Check evidence files") + subparsers.add_parser("timeline", help="Construct forensic timeline") + subparsers.add_parser("ner", help="Run Named Entity Recognition") + subparsers.add_parser("report", help="Generate forensic report") + subparsers.add_parser("all", help="Run all steps") + + args = parser.parse_args() + + # Load config + config = load_config(args.config) + + # Override config with args + if args.output: config['output_dir'] = args.output + if args.evidence: config['source_evidence'] = args.evidence + if args.model: config['model_dir'] = args.model + + # Defaults + if 'output_dir' not in config: + now = datetime.now().strftime("%Y%m%d_%H%M%S") + config['output_dir'] = os.path.join('result', now) + + if 'use_cuda' not in config: + config['use_cuda'] = torch.cuda.is_available() + + if 'app_version' not in config: + config['app_version'] = "1.0.0" # Default + + # Resolve wkhtmltopdf path + if 'wkhtml_path' not in config: + config['wkhtml_path'] = {} + + # If using dictionary structure from original config + if isinstance(config.get('wkhtml_path'), dict): + real_wkhtml_path = get_wkhtml_path(config) + else: + real_wkhtml_path = config.get('wkhtml_path') + + # Update config flat parameter for generate_report compatibility + config['wkhtml_path'] = real_wkhtml_path + + # Dispatch + if args.command == "check": + check_evidence(config) + elif args.command == "timeline": + construct_timeline(config) + elif args.command == "ner": + run_ner(config) + elif args.command == "report": + run_report(config) + elif args.command == "all": + if check_evidence(config): + if construct_timeline(config): + if run_ner(config): + run_report(config) + else: + parser.print_help() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/generate_report.py b/src/dfler/generate_report.py similarity index 100% rename from generate_report.py rename to src/dfler/generate_report.py diff --git a/parse.py b/src/dfler/parse.py similarity index 97% rename from parse.py rename to src/dfler/parse.py index 9ab6f84..73f4a6c 100644 --- a/parse.py +++ b/src/dfler/parse.py @@ -1,302 +1,302 @@ -import pandas as pd -import os - -def read_android_log(path, file_name, output_dir): - # file_name = "contoh.csv" - full_path = f"{path}/{file_name}" - file_ext = file_name.split(".") - file_ext = file_ext[1] if len(file_ext) > 1 else "" - # print("Ekstensi: %s" % file_ext) - if file_ext == "csv": - flight_log = "" - first_line = "" - first_col = "" - sep = "" - col_num = 0 - data_num = 0 - - # read file first line - with open(full_path, "r") as file: - first_line = file.readline() - first_col = first_line.split(',')[0] - # print("first col: ", first_col) - file.close() - - # print("num of line: ", len(first_line)) - if (first_col == "CUSTOM.date [local]"): - # print("Normal condition") - flight_log = pd.read_csv(full_path, encoding="utf-8") - elif (len(first_line) > 50 and (("No." in list(first_col)) or ("#" in list(first_col)))): - # print('There is an additional col in the first col') - flight_log = pd.read_csv(full_path, encoding="utf-8") - flight_log = flight_log.drop(flight_log.columns[[0]], axis=1) - elif (len(first_line)> 50): - # print("Kondisi first line adalah kolom, namun first col berisi karakter random") - # First line is col, but the first col is a random char from decrypt process - dataframe = [] - with open(full_path) as file: - for i, line in enumerate(file): - if i == 0: # First row should be column name - line = line.rstrip().split(",") - # print("first line:", line) - # print("Length: ", len(line)) - start_index = line.index('CUSTOM.date [local]') - col_num = len(line) - start_index - dataframe.append(line[start_index:]) - # if(len(line) > col_num): - # sisa = len(line) - 169 - # elif "#" in list(line[0]): - # print("ada nih") - # dataframe.append(line[1:]) - # elif (len(line) > 160): - # dataframe.append(line) -# print(sep) - elif i > 0: -# print("second line: ", line, '\n') - line = line.rstrip().split(",") - data_num = len(line) - if (data_num == col_num): - dataframe.append(line) - elif (data_num > col_num): - dataframe.append(line[data_num-col_num:]) - else: - # print("Jumlah data lebih kecil dari jumlah kolom") - print("Number of data is less than number of the column") - # if (i == 1): - # print("i = 1, len :", len(line)) - # col_num = len(line) - # if(len(line) > col_num): - # sisa = len(line) - col_num - # dataframe.append(line[sisa:]) - # elif (len(line[0].split('/')) != 3): - # print(len(line[0])) - # print(line[0].split('/')) - # dataframe.append(line[1:]) - # else: - # dataframe.append(line) - flight_log = pd.DataFrame(data=dataframe[1:], columns=dataframe[0]) - file.close() - else: - # print("Kondisi first line adalah karakter random") - # First line is a random char - dataframe = [] - # read ulang file untuk ambil content - with open(full_path) as file: - for i, line in enumerate(file): - if i == 0: - # sep = line.rstrip()[-1] - sep = "," -# print(sep) - # print("first line:", line) - # print("Length: ", len(line)) - elif i > 0: - line = line.rstrip().split(sep) - # Second row should be column name - if (i == 1): - start_index = line.index('CUSTOM.date [local]') - col_num = len(line) - start_index - dataframe.append(line[start_index:]) - else: - # 3rd... row should be the log records - data_num = len(line) - if (data_num == col_num): - dataframe.append(line) - elif (data_num > col_num): - dataframe.append(line[data_num-col_num:]) - else: - print("The number of columns do not match") - # if(dataframe[0][0] == "CUSTOM.date [local]"): - # flight_log = pd.DataFrame(data=dataframe[1:], columns=dataframe[0]) - # else: - flight_log = pd.DataFrame(data=dataframe[1:], columns=dataframe[0]) - file.close() -# flight_log = "" -# with open(full_path, "r") as file: -# first_line = file.readline() -# first_col = first_line.split(',') -# print("num of line: ", len(first_line)) -# sep = "" -# if (first_col == "CUSTOM.date [local]"): -# print("masuk waras") -# flight_log = pd.read_csv(full_path, encoding="utf-8") -# else: -# print("masuk ndak waras") -# dataframe = [] -# for i, line in enumerate(file): -# if i == 0: # First row should be column name -# sep = line.rstrip()[-1] -# # print(sep) -# # print("first line:", line) -# elif i > 0: -# # print("second line: ", line) -# line = line.rstrip().split(sep) -# print(len(line)) -# dataframe.append(line.rstrip().split(sep)) -# flight_log = pd.DataFrame(data=dataframe[1:], columns=dataframe[0]) -# file.close() - # CUSTOM.date [local] - # CUSTOM.updateTime [local] - # APP.message - # APP.tip - # APP.warning - # print(flight_log) - # Filter non empty message - # print(flight_log.shape) - df_message = flight_log[flight_log.iloc[:, -3].notnull()] - df_tip = flight_log[flight_log.iloc[:, -2].notnull()] - df_warning = flight_log[flight_log.iloc[:, -1].notnull()] - merged = pd.concat([df_message, df_tip, df_warning], ignore_index=True) - remove_duplicate = merged.drop_duplicates() - record_list = [] - for i in range (0, remove_duplicate.shape[0]): - date = remove_duplicate.iloc[i, 0] - time = remove_duplicate.iloc[i, 1] - message = str(remove_duplicate.iloc[i, -3]).strip() - tip = str(remove_duplicate.iloc[i, -2]).strip() - warning = str(remove_duplicate.iloc[i, -1]).strip() - if not message == "" and message != "nan": - # message = str(remove_duplicate.iloc[i, -3]).strip() - # print("message : {}, length: {}".format(message, len(message))) - record_list.append([date, time, message]) - if not tip == "" and tip != "nan": - # message = str(remove_duplicate.iloc[i, -2]).strip() - # print("message : {}, length: {}".format(tip, len(tip))) - record_list.append([date, time, tip]) - if not warning == "" and warning != "nan": - # message = str(remove_duplicate.iloc[i, -1]).strip() - # print("message : {}, length: {}".format(warning, len(warning))) - record_list.append([date, time, warning]) - dataframe = pd.DataFrame(record_list, index=None, columns=["date", "time", "message"]) - file_name = "parsed_" + file_name - dataframe.to_csv(f"{output_dir}/{file_name}.csv", index=False, encoding='utf-8') - # print(dataframe.shape) - return "" - elif file_ext == "": - # Extract the ERROR_POP_LOG file content - with open(full_path, 'r', encoding='utf-8') as file: - # Extract the file contents here - # contents = file.read().strip() - date = file_name.split("-") - if (len(date) == 3 or len(date) == 4): - date = date[1] + "/" + date[0] + "/" + date[2] - elif (): - date = date[1] + "/" + date[0] + "/" + date[2] - else: - date = file_name - record_list = [] - lines = file.readlines() - message = "" - time = "" - for line in lines: - word = line.split(" ") - if len(word) < 3 and word[0] == "##": - time = word[1].strip() - continue - elif len(word) > 2 and word[0] == "##": - time = word[1].strip() - message = " ".join(word[2:]).strip() - else: - message = " ".join(word).strip() - if not message == "" and not time == "": - record_list.append([date, time, message]) - # print(record_list) - dataframe = pd.DataFrame(record_list, index=None, columns=["date", "time", "message"]) - file_name = "parsed_" + file_name - dataframe.to_csv(f"{output_dir}/{file_name}.csv", index=False, encoding='utf-8') - # print(dataframe.shape) - file.close() - return "" - -def read_ios_log(path, file_name, output_dir): - full_path = f"{path}/{file_name}" - # drone_model = folder_data["drone"] - # dataset = folder_data["dataset"] - # controller = folder_data["controller"] - with open(full_path, 'r', encoding='utf-8') as file: - # Extract the file contents here - contents = file.read().strip() - # f.close() - # print(contents) - first_char = contents[0] - second_char = contents[1] - # print(file_name, first_char, second_char) - - # if first_char == "{": - # n = 1 - # print(n) - # n = n + 1 - # # # JSON - # # data = json.loads(f.read()) - # # df = pd.json_normalize(data) - # # # df = pd.read_json(full_path) - # # df.to_csv(f"{path}/{file_name}.csv", index=False, encoding='utf-8') - if first_char == "[" and second_char == "[": - # Dictionary - # string_value = "alphanumeric@123__" - # s = ''.join(filter(str.isalnum, string_value)) - text_split = contents.split("],[") - # print(text_split) - record_list = [] - for record in text_split: - # print(record) - split_record = record.split(",") - # print(split_record) - record = "".join(filter(str.isalnum, record)) - date = split_record[0].split(" ")[0].replace('[', "").replace('"', "") - date = date.split('-') - date = date[1] + "/" + date[2] + "/" + date[0] - time = split_record[0].split(" ")[1].replace('"', "") - # message_type = split_record[1].replace('"', "") - message = split_record[2].replace(']', "").replace('"', "") - record_list.append([date, time, message]) - # print(record) - dataframe = pd.DataFrame(record_list, index=None, columns=["date", "time", "message"]) - file_name = "parsed_" + file_name - dataframe.to_csv(f"{output_dir}/{file_name}.csv", index=False, encoding='utf-8') - # print(text_split) - # elif first_char == "[" and not second_char == "[": # [2017-06-28 05:56:19.955]remove need upgrade groups - # # List - # # print(contents) - # lines = contents.split("\n") - # data_list = [] - # for line in lines: - # text_split = line.split("]") - # # print(line) - # date = "" - # time = "" - # if len(text_split[0].split(" ")) > 1: - # date = text_split[0].split(" ")[0].replace("[", "") - # time = text_split[0].split(" ")[1] - # message = "" - # if len(text_split) > 1: - # message = text_split[1] - # data_list.append([date, time, message]) - # dataframe = pd.DataFrame(data_list, index=None, columns=["date", "time", "message"]) - # dataframe.to_csv(f"{path}/{file_name}.csv", index=False, encoding='utf-8') - # print(f.read()) - file.close() - -def construct_timeline(folderName, path_list): - # os.chdir(folderName) - item_list = os.listdir(folderName) - print(item_list) - # num_folder = 0 - # for file in item_list: - # if (os.path.isdir(file)): - # num_folder += 1 - # print(num_folder) - - for i, item in enumerate(item_list): - if os.path.isdir(item): - print("folder = ", item) - full = os.path.join(folderName, item) - # print(full) - construct_timeline(full, path_list) - else: - file_ext = item.split(".") - file_ext = file_ext[-1] if len(file_ext) > 1 else "" - if(item.find("parsed_") != -1 and file_ext == "csv"): - path_list.append(os.path.join(folderName, item)) - # path_list[i] = os.path.join(folderName, item) - # print(path_list) +import pandas as pd +import os + +def read_android_log(path, file_name, output_dir): + # file_name = "contoh.csv" + full_path = f"{path}/{file_name}" + file_ext = file_name.split(".") + file_ext = file_ext[1] if len(file_ext) > 1 else "" + # print("Ekstensi: %s" % file_ext) + if file_ext == "csv": + flight_log = "" + first_line = "" + first_col = "" + sep = "" + col_num = 0 + data_num = 0 + + # read file first line + with open(full_path, "r") as file: + first_line = file.readline() + first_col = first_line.split(',')[0] + # print("first col: ", first_col) + file.close() + + # print("num of line: ", len(first_line)) + if (first_col == "CUSTOM.date [local]"): + # print("Normal condition") + flight_log = pd.read_csv(full_path, encoding="utf-8") + elif (len(first_line) > 50 and (("No." in list(first_col)) or ("#" in list(first_col)))): + # print('There is an additional col in the first col') + flight_log = pd.read_csv(full_path, encoding="utf-8") + flight_log = flight_log.drop(flight_log.columns[[0]], axis=1) + elif (len(first_line)> 50): + # print("Kondisi first line adalah kolom, namun first col berisi karakter random") + # First line is col, but the first col is a random char from decrypt process + dataframe = [] + with open(full_path) as file: + for i, line in enumerate(file): + if i == 0: # First row should be column name + line = line.rstrip().split(",") + # print("first line:", line) + # print("Length: ", len(line)) + start_index = line.index('CUSTOM.date [local]') + col_num = len(line) - start_index + dataframe.append(line[start_index:]) + # if(len(line) > col_num): + # sisa = len(line) - 169 + # elif "#" in list(line[0]): + # print("ada nih") + # dataframe.append(line[1:]) + # elif (len(line) > 160): + # dataframe.append(line) +# print(sep) + elif i > 0: +# print("second line: ", line, '\n') + line = line.rstrip().split(",") + data_num = len(line) + if (data_num == col_num): + dataframe.append(line) + elif (data_num > col_num): + dataframe.append(line[data_num-col_num:]) + else: + # print("Jumlah data lebih kecil dari jumlah kolom") + print("Number of data is less than number of the column") + # if (i == 1): + # print("i = 1, len :", len(line)) + # col_num = len(line) + # if(len(line) > col_num): + # sisa = len(line) - col_num + # dataframe.append(line[sisa:]) + # elif (len(line[0].split('/')) != 3): + # print(len(line[0])) + # print(line[0].split('/')) + # dataframe.append(line[1:]) + # else: + # dataframe.append(line) + flight_log = pd.DataFrame(data=dataframe[1:], columns=dataframe[0]) + file.close() + else: + # print("Kondisi first line adalah karakter random") + # First line is a random char + dataframe = [] + # read ulang file untuk ambil content + with open(full_path) as file: + for i, line in enumerate(file): + if i == 0: + # sep = line.rstrip()[-1] + sep = "," +# print(sep) + # print("first line:", line) + # print("Length: ", len(line)) + elif i > 0: + line = line.rstrip().split(sep) + # Second row should be column name + if (i == 1): + start_index = line.index('CUSTOM.date [local]') + col_num = len(line) - start_index + dataframe.append(line[start_index:]) + else: + # 3rd... row should be the log records + data_num = len(line) + if (data_num == col_num): + dataframe.append(line) + elif (data_num > col_num): + dataframe.append(line[data_num-col_num:]) + else: + print("The number of columns do not match") + # if(dataframe[0][0] == "CUSTOM.date [local]"): + # flight_log = pd.DataFrame(data=dataframe[1:], columns=dataframe[0]) + # else: + flight_log = pd.DataFrame(data=dataframe[1:], columns=dataframe[0]) + file.close() +# flight_log = "" +# with open(full_path, "r") as file: +# first_line = file.readline() +# first_col = first_line.split(',') +# print("num of line: ", len(first_line)) +# sep = "" +# if (first_col == "CUSTOM.date [local]"): +# print("masuk waras") +# flight_log = pd.read_csv(full_path, encoding="utf-8") +# else: +# print("masuk ndak waras") +# dataframe = [] +# for i, line in enumerate(file): +# if i == 0: # First row should be column name +# sep = line.rstrip()[-1] +# # print(sep) +# # print("first line:", line) +# elif i > 0: +# # print("second line: ", line) +# line = line.rstrip().split(sep) +# print(len(line)) +# dataframe.append(line.rstrip().split(sep)) +# flight_log = pd.DataFrame(data=dataframe[1:], columns=dataframe[0]) +# file.close() + # CUSTOM.date [local] + # CUSTOM.updateTime [local] + # APP.message + # APP.tip + # APP.warning + # print(flight_log) + # Filter non empty message + # print(flight_log.shape) + df_message = flight_log[flight_log.iloc[:, -3].notnull()] + df_tip = flight_log[flight_log.iloc[:, -2].notnull()] + df_warning = flight_log[flight_log.iloc[:, -1].notnull()] + merged = pd.concat([df_message, df_tip, df_warning], ignore_index=True) + remove_duplicate = merged.drop_duplicates() + record_list = [] + for i in range (0, remove_duplicate.shape[0]): + date = remove_duplicate.iloc[i, 0] + time = remove_duplicate.iloc[i, 1] + message = str(remove_duplicate.iloc[i, -3]).strip() + tip = str(remove_duplicate.iloc[i, -2]).strip() + warning = str(remove_duplicate.iloc[i, -1]).strip() + if not message == "" and message != "nan": + # message = str(remove_duplicate.iloc[i, -3]).strip() + # print("message : {}, length: {}".format(message, len(message))) + record_list.append([date, time, message]) + if not tip == "" and tip != "nan": + # message = str(remove_duplicate.iloc[i, -2]).strip() + # print("message : {}, length: {}".format(tip, len(tip))) + record_list.append([date, time, tip]) + if not warning == "" and warning != "nan": + # message = str(remove_duplicate.iloc[i, -1]).strip() + # print("message : {}, length: {}".format(warning, len(warning))) + record_list.append([date, time, warning]) + dataframe = pd.DataFrame(record_list, index=None, columns=["date", "time", "message"]) + file_name = "parsed_" + file_name + dataframe.to_csv(f"{output_dir}/{file_name}.csv", index=False, encoding='utf-8') + # print(dataframe.shape) + return "" + elif file_ext == "": + # Extract the ERROR_POP_LOG file content + with open(full_path, 'r', encoding='utf-8') as file: + # Extract the file contents here + # contents = file.read().strip() + date = file_name.split("-") + if (len(date) == 3 or len(date) == 4): + date = date[1] + "/" + date[0] + "/" + date[2] + elif (): + date = date[1] + "/" + date[0] + "/" + date[2] + else: + date = file_name + record_list = [] + lines = file.readlines() + message = "" + time = "" + for line in lines: + word = line.split(" ") + if len(word) < 3 and word[0] == "##": + time = word[1].strip() + continue + elif len(word) > 2 and word[0] == "##": + time = word[1].strip() + message = " ".join(word[2:]).strip() + else: + message = " ".join(word).strip() + if not message == "" and not time == "": + record_list.append([date, time, message]) + # print(record_list) + dataframe = pd.DataFrame(record_list, index=None, columns=["date", "time", "message"]) + file_name = "parsed_" + file_name + dataframe.to_csv(f"{output_dir}/{file_name}.csv", index=False, encoding='utf-8') + # print(dataframe.shape) + file.close() + return "" + +def read_ios_log(path, file_name, output_dir): + full_path = f"{path}/{file_name}" + # drone_model = folder_data["drone"] + # dataset = folder_data["dataset"] + # controller = folder_data["controller"] + with open(full_path, 'r', encoding='utf-8') as file: + # Extract the file contents here + contents = file.read().strip() + # f.close() + # print(contents) + first_char = contents[0] + second_char = contents[1] + # print(file_name, first_char, second_char) + + # if first_char == "{": + # n = 1 + # print(n) + # n = n + 1 + # # # JSON + # # data = json.loads(f.read()) + # # df = pd.json_normalize(data) + # # # df = pd.read_json(full_path) + # # df.to_csv(f"{path}/{file_name}.csv", index=False, encoding='utf-8') + if first_char == "[" and second_char == "[": + # Dictionary + # string_value = "alphanumeric@123__" + # s = ''.join(filter(str.isalnum, string_value)) + text_split = contents.split("],[") + # print(text_split) + record_list = [] + for record in text_split: + # print(record) + split_record = record.split(",") + # print(split_record) + record = "".join(filter(str.isalnum, record)) + date = split_record[0].split(" ")[0].replace('[', "").replace('"', "") + date = date.split('-') + date = date[1] + "/" + date[2] + "/" + date[0] + time = split_record[0].split(" ")[1].replace('"', "") + # message_type = split_record[1].replace('"', "") + message = split_record[2].replace(']', "").replace('"', "") + record_list.append([date, time, message]) + # print(record) + dataframe = pd.DataFrame(record_list, index=None, columns=["date", "time", "message"]) + file_name = "parsed_" + file_name + dataframe.to_csv(f"{output_dir}/{file_name}.csv", index=False, encoding='utf-8') + # print(text_split) + # elif first_char == "[" and not second_char == "[": # [2017-06-28 05:56:19.955]remove need upgrade groups + # # List + # # print(contents) + # lines = contents.split("\n") + # data_list = [] + # for line in lines: + # text_split = line.split("]") + # # print(line) + # date = "" + # time = "" + # if len(text_split[0].split(" ")) > 1: + # date = text_split[0].split(" ")[0].replace("[", "") + # time = text_split[0].split(" ")[1] + # message = "" + # if len(text_split) > 1: + # message = text_split[1] + # data_list.append([date, time, message]) + # dataframe = pd.DataFrame(data_list, index=None, columns=["date", "time", "message"]) + # dataframe.to_csv(f"{path}/{file_name}.csv", index=False, encoding='utf-8') + # print(f.read()) + file.close() + +def construct_timeline(folderName, path_list): + # os.chdir(folderName) + item_list = os.listdir(folderName) + print(item_list) + # num_folder = 0 + # for file in item_list: + # if (os.path.isdir(file)): + # num_folder += 1 + # print(num_folder) + + for i, item in enumerate(item_list): + if os.path.isdir(item): + print("folder = ", item) + full = os.path.join(folderName, item) + # print(full) + construct_timeline(full, path_list) + else: + file_ext = item.split(".") + file_ext = file_ext[-1] if len(file_ext) > 1 else "" + if(item.find("parsed_") != -1 and file_ext == "csv"): + path_list.append(os.path.join(folderName, item)) + # path_list[i] = os.path.join(folderName, item) + # print(path_list) return path_list \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..133fafe --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,43 @@ +import pytest +import os +import shutil +from dfler.dfler import main +from unittest.mock import patch, MagicMock + +@pytest.fixture +def evidence_dir(tmp_path): + d = tmp_path / "evidence" + d.mkdir() + (d / "android").mkdir() + (d / "android" / "log.txt").write_text("dummy log") + return d + +@pytest.fixture +def output_dir(tmp_path): + d = tmp_path / "output" + d.mkdir() + return d + +def test_check_command(evidence_dir, output_dir): + with patch("sys.argv", ["dfler", "check", "--evidence", str(evidence_dir), "--output", str(output_dir)]): + with patch("dfler.dfler.check_evidence") as mock_check: + main() + mock_check.assert_called_once() + +def test_timeline_command(output_dir): + with patch("sys.argv", ["dfler", "timeline", "--output", str(output_dir)]): + with patch("dfler.dfler.construct_timeline") as mock_timeline: + main() + mock_timeline.assert_called_once() + +def test_ner_command(output_dir): + with patch("sys.argv", ["dfler", "ner", "--output", str(output_dir), "--model", "dummy_model"]): + with patch("dfler.dfler.run_ner") as mock_ner: + main() + mock_ner.assert_called_once() + +def test_report_command(output_dir): + with patch("sys.argv", ["dfler", "report", "--output", str(output_dir)]): + with patch("dfler.dfler.run_report") as mock_report: + main() + mock_report.assert_called_once() From a16e24b48dbc904c0299ac6952c1256b163fdcd7 Mon Sep 17 00:00:00 2001 From: PowerViber Date: Wed, 11 Feb 2026 14:38:12 +0700 Subject: [PATCH 2/3] Removed src, moved to dfler/ --- {src/dfler => dfler}/__init__.py | 0 {src/dfler => dfler}/config.json | 0 {src/dfler => dfler}/dfler.py | 0 {src/dfler => dfler}/generate_report.py | 0 {src/dfler => dfler}/parse.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename {src/dfler => dfler}/__init__.py (100%) rename {src/dfler => dfler}/config.json (100%) rename {src/dfler => dfler}/dfler.py (100%) rename {src/dfler => dfler}/generate_report.py (100%) rename {src/dfler => dfler}/parse.py (100%) diff --git a/src/dfler/__init__.py b/dfler/__init__.py similarity index 100% rename from src/dfler/__init__.py rename to dfler/__init__.py diff --git a/src/dfler/config.json b/dfler/config.json similarity index 100% rename from src/dfler/config.json rename to dfler/config.json diff --git a/src/dfler/dfler.py b/dfler/dfler.py similarity index 100% rename from src/dfler/dfler.py rename to dfler/dfler.py diff --git a/src/dfler/generate_report.py b/dfler/generate_report.py similarity index 100% rename from src/dfler/generate_report.py rename to dfler/generate_report.py diff --git a/src/dfler/parse.py b/dfler/parse.py similarity index 100% rename from src/dfler/parse.py rename to dfler/parse.py From 2931d19779474a03edf2f9681712f5f86d701ad8 Mon Sep 17 00:00:00 2001 From: PowerViber Date: Thu, 12 Feb 2026 12:28:14 +0700 Subject: [PATCH 3/3] Finalize packaging: CLI refactor, workflow, and dependencies --- .github/workflows/publish.yml | 30 ++++++++++++++++++++++++++++++ dfler/dfler.py | 23 +++++++++++++---------- pyproject.toml | 5 +++++ requirements.txt | 4 ++-- 4 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/publish.yml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..9f5f04c --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,30 @@ +name: Publish Python Package + +on: + release: + types: [published] + workflow_dispatch: + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + + - name: Build package + run: python -m build + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/dfler/dfler.py b/dfler/dfler.py index 12d502e..fc02eda 100644 --- a/dfler/dfler.py +++ b/dfler/dfler.py @@ -234,20 +234,23 @@ def run_report(config): def main(): parser = argparse.ArgumentParser(description="Drone Flight Log Entity Recognizer (DFLER)") - # Global arguments - parser.add_argument("--config", help="Path to configuration file") - parser.add_argument("--output", help="Output directory") - parser.add_argument("--evidence", help="Evidence directory") - parser.add_argument("--model", help="Model directory") + # Commons arguments + parent_parser = argparse.ArgumentParser(add_help=False) + parent_parser.add_argument("--config", help="Path to configuration file") + parent_parser.add_argument("--output", help="Output directory") + parent_parser.add_argument("--evidence", help="Evidence directory") + parent_parser.add_argument("--model", help="Model directory") + + parser = argparse.ArgumentParser(description="Drone Flight Log Entity Recognizer (DFLER)") subparsers = parser.add_subparsers(dest="command", help="Command to run") # Subcommands - subparsers.add_parser("check", help="Check evidence files") - subparsers.add_parser("timeline", help="Construct forensic timeline") - subparsers.add_parser("ner", help="Run Named Entity Recognition") - subparsers.add_parser("report", help="Generate forensic report") - subparsers.add_parser("all", help="Run all steps") + subparsers.add_parser("check", help="Check evidence files", parents=[parent_parser]) + subparsers.add_parser("timeline", help="Construct forensic timeline", parents=[parent_parser]) + subparsers.add_parser("ner", help="Run Named Entity Recognition", parents=[parent_parser]) + subparsers.add_parser("report", help="Generate forensic report", parents=[parent_parser]) + subparsers.add_parser("all", help="Run all steps", parents=[parent_parser]) args = parser.parse_args() diff --git a/pyproject.toml b/pyproject.toml index c9313b4..fc6a7fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,3 +33,8 @@ dfler = "dfler.dfler:main" [project.urls] "Homepage" = "https://github.com/DroneNLP/dfler" "Bug Tracker" = "https://github.com/DroneNLP/dfler/issues" + +[tool.setuptools.packages.find] +where = ["."] +include = ["dfler*"] +namespaces = false diff --git a/requirements.txt b/requirements.txt index 0888b16..481c4c0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -simpletransformers=>0.63.6 -seqeval=>1.22.0 +simpletransformers>=0.63.6 +seqeval>=1.22.0 pandas numpy torch