diff --git a/src/digarch_scripts/report/report_ftk_extents.py b/src/digarch_scripts/report/report_ftk_extents.py index ed621c0..eac5cc6 100644 --- a/src/digarch_scripts/report/report_ftk_extents.py +++ b/src/digarch_scripts/report/report_ftk_extents.py @@ -1,5 +1,6 @@ from lxml import etree import json +from collections import defaultdict import re import argparse import os @@ -74,7 +75,7 @@ def parse_xml(path: pathlib.Path): return tree -def create_er_list( +def create_component_list( tree: etree.ElementTree ) -> list[list[list[str], str, str]]: @@ -90,7 +91,7 @@ def create_er_list( namespaces=FO_NAMESPACE )[0] - ers = [] + components = [] hierarchy = [] for child in tree: # skip rows with an indent < 24 @@ -110,54 +111,66 @@ def create_er_list( ) hierarchy.append(child.text) - # only record if entry is an ER + # only record if entry is an ER or DI possible_ref = child.xpath( 'fo:basic-link/fo:page-number-citation', namespaces=FO_NAMESPACE ) - if possible_ref and hierarchy[-1].startswith('ER'): + if possible_ref and (hierarchy[-1].startswith('ER') or hierarchy[-1].startswith('DI')): refid = possible_ref[0].get('ref-id') - ers.append( + components.append( [hierarchy.copy(), refid, hierarchy[-1]] ) - audit_ers(ers) + audit_components(components) - return ers + return components -def audit_ers(ers: list[list[list[str], str, str]]) -> None: - er_numbers_used = {} - for er in ers: - number = re.match(r'ER (\d+):', er[2]) +def audit_components(components: list[list[list[str], str, str]]) -> None: + er_numbers_used = defaultdict(list) + di_numbers_used = defaultdict(list) + for component in components: + number = re.match(r'(ER|DI) (\d+):', component[2]) if not number: LOGGER.warning( - f'ER is missing a number: {er[2]}. Review the ERs with the processing archivist' + f'Component is missing a number: {component[2]}. Review the bookmarks with the processing archivist' ) - er_number = 0 - else: - er_number = int(number[1]) + er_numbers_used[0].append(component[2]) - if er_number not in er_numbers_used.keys(): - er_numbers_used[er_number] = [er[2]] + elif number[1] == 'ER': + er_numbers_used[int(number[2])].append(component[2]) else: - er_numbers_used[er_number].append(er[2]) + di_numbers_used[int(number[2])].append(component[2]) - # test for er number gaps - er_min = min(er_numbers_used.keys()) - er_max = max(er_numbers_used.keys()) - for i in range(er_min, er_max): - if i not in er_numbers_used.keys(): - LOGGER.warning( - f'Collection uses ER {er_min} to ER {er_max}. ER {i} is skipped. Review the ERs with the processing archivist' - ) - # test for duplicate ers - for er_number, er_names in er_numbers_used.items(): - if len(er_names) > 1: - LOGGER.warning( - f'ER {er_number} is used multiple times: {", ".join(er_names)}. Review the ERs with the processing archivist' - ) + def test_for_number_gaps(numbers_used: dict, type: str): + if not numbers_used: + return None + + min_number = min(numbers_used.keys()) + max_number = max(numbers_used.keys()) + for i in range(min_number, max_number): + if i not in numbers_used.keys(): + LOGGER.warning( + f'Collection {type} component range is numbered {min_number} to {max_number}. {i} is skipped. Review the bookmarks with the processing archivist' + ) + + test_for_number_gaps(er_numbers_used, 'ER') + test_for_number_gaps(di_numbers_used, 'DI') + + def test_for_duplicate_numbers(numbers_used: dict, type: str): + if not numbers_used: + return None + + for number, names in numbers_used.items(): + if len(names) > 1: + LOGGER.warning( + f'{type} {number} is used multiple times: {", ".join(names)}. Review the bookmarks with the processing archivist' + ) + + test_for_duplicate_numbers(er_numbers_used, 'ER') + test_for_duplicate_numbers(di_numbers_used, 'DI') return None @@ -197,42 +210,42 @@ def transform_bookmark_tables( return bookmark_contents -def add_extents_to_ers( - er_list: list[list[list[str], str, str]], +def add_extents_to_components( + component_list: list[list[list[str], str, str]], bookmark_tables: list[dict] ) -> list[list[str, int, int]]: ''' - summarizes the extent for each ER by + summarizes the extent for each component by correlating the table of contents with the bookmark tables. - Returns list of lists with hierarchal ER string, file size, and file count. + Returns list of lists with hierarchal component string, file size, and file count. ''' - ers_with_extents = [] + components_with_extents = [] - for er in er_list: - bookmark_id = er[1] - er_name = er[-1] - size, count = get_er_report(bookmark_tables, bookmark_id, er_name) + for component in component_list: + bookmark_id = component[1] + component_name = component[-1] + size, count = get_component_report(bookmark_tables, bookmark_id, component_name) if count == 0: LOGGER.warning( - f'{er_name} does not contain any files. It will be omitted from the report.') + f'{component_name} does not contain any files. It will be omitted from the report.') continue if size == 0: LOGGER.warning( - f'{er_name} contains no files with bytes. This ER is omitted from report. Review this ER with the processing archivist.') + f'{component_name} contains no files with bytes. This component is omitted from report. Review this component with the processing archivist.') continue - ers_with_extents.append([er[0], size, count]) + components_with_extents.append([component[0], size, count]) - return ers_with_extents + return components_with_extents -def get_er_report( - er_files: list[dict], +def get_component_report( + component_files: list[dict], bookmark_id: str, - er_name: str + component_name: str ) -> tuple[int, int]: ''' @@ -244,7 +257,7 @@ def get_er_report( count = 0 prefix = bookmark_id.replace('k', 'f') - for entry in er_files: + for entry in component_files: if entry['bookmark_id'] == prefix: byte_string = entry['Logical Size'] @@ -257,7 +270,7 @@ def get_er_report( file_name = entry['Name'] #extract file name, might have to parse file table better LOGGER.warning( - f'{er_name} contains the following 0-byte file: {file_name}. Review this file with the processing archivist.') + f'{component_name} contains the following 0-byte file: {file_name}. Review this file with the processing archivist.') size += file_size else: @@ -342,14 +355,14 @@ def main() -> None: tree = parse_xml(args.file) print('Creating report ...') - ers = create_er_list(tree) + components = create_component_list(tree) bookmark_tables = transform_bookmark_tables(tree) - ers_with_extents = add_extents_to_ers(ers, bookmark_tables) + components_with_extents = add_extents_to_components(components, bookmark_tables) colltitle = extract_collection_title(tree) dct = {'title': colltitle, 'children': []} - for er in ers_with_extents: - dct = create_report(er, dct) + for component in components_with_extents: + dct = create_report(component, dct) print("Writing report ...") make_json(args.output, dct, colltitle) diff --git a/tests/fixtures/report/Report.xml b/tests/fixtures/report/Report.xml index b12605d..e25ad2c 100644 --- a/tests/fixtures/report/Report.xml +++ b/tests/fixtures/report/Report.xml @@ -1,2 +1,2 @@ -Case SummaryCase InformationFile OverviewEvidence ListBookmarksSharedExtents Test papersER 10: File 21,2023Series 1Subseries(1)ER 1: Text, 2023Subsubseries(2)ER 2: File 15, 2023Subsubsubseries(3)Subsubsubsubseries(4)ER 10: Folder 2, 2023ER 3: Folder 1, 2023Subsubsubseries(3) the secondER 11: File 16, 2023Subsubseries(2) the secondER 23: File 17, 2023Subseries(1) the secondER 4: File 18, 2023Series 2ER 9: File 20,2023Subseries(1) of Series 2ER 8: File 2, 2023Subsubseries(2) of Series 2ER 7: File 19, 2023Series 3ER 12: File 0 (also in ER 1), 2023ER 5: No Files, 2023ER 6: Zero Length, 2023Page of Case Summary6/27/2023Time zone for display: Eastern Daylight TimeCase Information6/27/2023Time zone for display: Eastern Daylight TimeVersionAccessData Forensic Toolkit Version: 7.1.0.290Case OwnerFRED\forenCase NameM12345 Extents TestCase ReferenceCase DescriptionReport Created6/27/2023 3:20:35 PMFile Overview6/27/2023Evidence GroupsUngrouped: 34File ItemsEvidence Items: 1Checked Items: 0Unchecked Items: 34File CategoryArchives: 0Databases: 0Documents: 30Email: 0Executable: 0Folders: 2Graphics: 0Internet/Chat Files: 0Mobile Phone: 0Multimedia: 0OS/File System Files: 1Other Encryption Files: 0Other Known Types: 0Presentations: 0Slack/Free Space: 0Spreadsheets: 0Unknown Types: 1User Types: 0File StatusBad Extensions: 0Data Carved Files: 0Decrypted Files: 0Deleted Files: 0Duplicate Items: 0Email Attachments: 0Email Related Items (From Email): 0Encrypted Files: 0Flagged Ignore: 0Flagged Privileged: 0From Recycle Bin: 0KFF Alert Files: 0KFF Ignorable: 0OCR Graphics: 0OLE Subitems: 0Project VIC Matches: 0User-Decrypted Files: 0LabelsEmail StatusEmail Attachments: 0Email Related Items (From Email): 0Email Reply: 0Forwarded Email: 0Evidence List6/27/2023Display Name: test2Evidence Path: F:\Evidence\Extents Test\test2ID Number/Name: Evidence Type: Live FolderDescription: Time Zone: America/New_YorkPage of All Bookmarks6/27/2023Time zone for display: Eastern Daylight TimeSharedBookmark: Extents Test papers6/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 10: File 21,20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_21.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_21.txtMD5 Hash38b85c3c7827678c33b3f172dfbcc739FolderFalseBookmark: Series 16/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: Subseries(1)6/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 1: Text, 20236/27/2023Comments: Creator: FRED\forenFile Count: 7FilesFile CommentsNamefile_0.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:33:12 PM (2023-05-03 17:33:12 UTC)Pathtest2/file_0.txtMD5 Hashe4328dc3ad8e97cf10f50641424bafecFolderFalseFile CommentsNamefile_1.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_1.txtMD5 Hash88c16a56754e0f17a93d269ae74dde9bFolderFalseFile CommentsNamefile_10.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_10.txtMD5 Hash15e4d9a96c2600d72a46f85d371b42e4FolderFalseFile CommentsNamefile_11.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_11.txtMD5 Hash13740f8d72f0137b683de71596129935FolderFalseFile CommentsNamefile_12.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_12.txtMD5 Hashc1521a239a602e3c2062fd6886e3e659FolderFalseFile CommentsNamefile_13.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:17 PM (2023-05-03 17:19:17 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_13.txtMD5 Hashd71651413daa5ca6c46480a21f868812FolderFalseFile CommentsNamefile_14.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_14.txtMD5 Hashf9f173c4c62fd81332413b5b4a7c0f99FolderFalseBookmark: Subsubseries(2)6/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 2: File 15, 20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_15.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_15.txtMD5 Hash7704d4191615bff8bdd8c9c4bda52e5dFolderFalseBookmark: Subsubsubseries(3)6/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: Subsubsubsubseries(4)6/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 10: Folder 2, 20236/27/2023Comments: Creator: FRED\forenFile Count: 5FilesFile CommentsNamefile_11.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_11.txtMD5 Hash13740f8d72f0137b683de71596129935FolderFalseFile CommentsNamefile_12.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_12.txtMD5 Hashc1521a239a602e3c2062fd6886e3e659FolderFalseFile CommentsNamefile_13.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:17 PM (2023-05-03 17:19:17 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_13.txtMD5 Hashd71651413daa5ca6c46480a21f868812FolderFalseFile CommentsNamefile_14.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_14.txtMD5 Hashf9f173c4c62fd81332413b5b4a7c0f99FolderFalseFile CommentsNamefile_15.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_15.txtMD5 Hash7704d4191615bff8bdd8c9c4bda52e5dFolderFalseBookmark: ER 3: Folder 1, 20236/27/2023Comments: Creator: FRED\forenFile Count: 5FilesFile CommentsNamefile_6.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_6.txtMD5 Hash0059813e3c0ba105c27538c0010cf6b0FolderFalseFile CommentsNamefile_7.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_7.txtMD5 Hash68c0f05c6422571d440d08c1759770bcFolderFalseFile CommentsNamefile_8.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_8.txtMD5 Hash599eee62b6f909314c479168f3fd423dFolderFalseFile CommentsNamefile_9.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_9.txtMD5 Hashc955dc3766e19f947d2af22c3a87f6f3FolderFalseFile CommentsNamefolder_1Physical Sizen/aLogical Size0 BCreated Date5/3/2023 1:23:51 PM (2023-05-03 17:23:51 UTC)Modified Date5/3/2023 1:30:23 PM (2023-05-03 17:30:23 UTC)Accessed Date5/3/2023 1:33:10 PM (2023-05-03 17:33:10 UTC)Pathtest2/folder_1MD5 HashFolderTrueBookmark: Subsubsubseries(3) the second6/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 11: File 16, 20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_16.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:17 PM (2023-05-03 17:19:17 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_16.txtMD5 Hashf67e17268560573531e61f7c5bf050a1FolderFalseBookmark: Subsubseries(2) the second6/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 23: File 17, 20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_17.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_17.txtMD5 Hash649b7ae3accd9110484c988cb12ea31aFolderFalseBookmark: Subseries(1) the second6/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 4: File 18, 20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_18.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_18.txtMD5 Hashcc595e6557db981512e227f0dfcb45daFolderFalseBookmark: Series 26/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 9: File 20,20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_20.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:17 PM (2023-05-03 17:19:17 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_20.txtMD5 Hash73cb9ad0b717bebb5e8c27800cb599f1FolderFalseBookmark: Subseries(1) of Series 26/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 8: File 2, 20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_2.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_2.txtMD5 Hashdb06069ef1c9f40986ffa06db4fe8fd7FolderFalseBookmark: Subsubseries(2) of Series 26/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 7: File 19, 20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_19.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_19.txtMD5 Hashf24f15d357735521f2933b41b9b2c616FolderFalseBookmark: Series 36/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 12: File 0 (also in ER 1), 20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_0.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:33:12 PM (2023-05-03 17:33:12 UTC)Pathtest2/file_0.txtMD5 Hashe4328dc3ad8e97cf10f50641424bafecFolderFalseBookmark: ER 5: No Files, 20236/27/2023Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 6: Zero Length, 20236/27/2023Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile00.txtPhysical Sizen/aLogical Size0 BCreated Date5/3/2023 1:29:29 PM (2023-05-03 17:29:29 UTC)Modified Date5/3/2023 1:29:29 PM (2023-05-03 17:29:29 UTC)Accessed Date5/3/2023 1:33:12 PM (2023-05-03 17:33:12 UTC)Pathtest2/file00.txtMD5 HashFolderFalsePage of Bookmarks, -ER 10: File 21,2023, file_21.txt, ER 10: Folder 2, 2023, file_11.txt, file_12.txt, file_13.txt, file_14.txt, file_15.txt, ER 11: File 16, 2023, file_16.txt, ER 12: File 0 (also in ER 1), 2023, file_0.txt, ER 1: Text, 2023, file_0.txt, file_1.txt, file_10.txt, file_11.txt, file_12.txt, file_13.txt, file_14.txt, ER 23: File 17, 2023, file_17.txt, ER 2: File 15, 2023, file_15.txt, ER 3: Folder 1, 2023, file_6.txt, file_7.txt, file_8.txt, file_9.txt, folder_1, ER 4: File 18, 2023, file_18.txt, ER 5: No Files, 2023, ER 6: Zero Length, 2023, file00.txt, ER 7: File 19, 2023, file_19.txt, ER 8: File 2, 2023, file_2.txt, ER 9: File 20,2023, file_20.txt, Extents Test papers, Series 1, Series 2, Series 3, Subseries(1), Subseries(1) of Series 2, Subseries(1) the second, Subsubseries(2), Subsubseries(2) of Series 2, Subsubseries(2) the second, Subsubsubseries(3), Subsubsubseries(3) the second, Subsubsubsubseries(4), Case Information, -Case Description, Case Name, Case Owner, Case Reference, Report Created, Version, File Overview, -Email Status, Email Attachments, Email Related Items (From Email), Email Reply, Forwarded Email, Evidence Groups, Ungrouped, File Category, Archives, Databases, Documents, Email, Executable, Folders, Graphics, Internet/Chat Files, Mobile Phone, Multimedia, OS/File System Files, Other Encryption Files, Other Known Types, Presentations, Slack/Free Space, Spreadsheets, Unknown Types, User Types, File Items, Evidence Items, Checked Items, Unchecked Items, File Status, Bad Extensions, Data Carved Files, Decrypted Files, Deleted Files, Duplicate Items, Email Attachments, Email Related Items (From Email), Encrypted Files, Flagged Ignore, Flagged Privileged, From Recycle Bin, KFF Alert Files, KFF Ignorable, OCR Graphics, OLE Subitems, Project VIC Matches, User-Decrypted Files, Labels, Evidence List, -test2, +Case SummaryCase InformationFile OverviewEvidence ListBookmarksSharedExtents Test papersER 10: File 21,2023Series 1Subseries(1)ER 1: Text, 2023Subsubseries(2)ER 2: File 15, 2023Subsubsubseries(3)Subsubsubsubseries(4)ER 10: Folder 2, 2023ER 3: Folder 1, 2023Subsubsubseries(3) the secondER 11: File 16, 2023Subsubseries(2) the secondER 23: File 17, 2023Subseries(1) the secondER 4: File 18, 2023Series 2ER 9: File 20,2023Subseries(1) of Series 2ER 8: File 2, 2023Subsubseries(2) of Series 2ER 7: File 19, 2023Series 3DI 1: Blank Disk, 2021ER 12: File 0 (also in ER 1), 2023ER 5: No Files, 2023ER 6: Zero Length, 2023Page of Case Summary8/13/2024Time zone for display: Eastern Daylight TimeCase Information8/13/2024Time zone for display: Eastern Daylight TimeVersionAccessData Forensic Toolkit Version: 7.1.0.290Case OwnerFRED\forenCase NameM12345 Extents TestCase ReferenceCase DescriptionReport Created8/13/2024 3:49:51 PMFile Overview8/13/2024Evidence GroupsUngrouped: 41File ItemsEvidence Items: 2Checked Items: 0Unchecked Items: 41File CategoryArchives: 0Databases: 0Documents: 30Email: 0Executable: 0Folders: 4Graphics: 0Internet/Chat Files: 0Mobile Phone: 0Multimedia: 0OS/File System Files: 3Other Encryption Files: 0Other Known Types: 0Presentations: 0Slack/Free Space: 0Spreadsheets: 0Unknown Types: 4User Types: 0File StatusBad Extensions: 0Data Carved Files: 0Decrypted Files: 0Deleted Files: 0Duplicate Items: 2Email Attachments: 0Email Related Items (From Email): 0Encrypted Files: 0Flagged Ignore: 0Flagged Privileged: 0From Recycle Bin: 0KFF Alert Files: 0KFF Ignorable: 0OCR Graphics: 0OLE Subitems: 0Project VIC Matches: 0User-Decrypted Files: 0LabelsEmail StatusEmail Attachments: 0Email Related Items (From Email): 0Email Reply: 0Forwarded Email: 0Evidence List8/13/2024Display Name: Floppy_Disk.001Evidence Path: C:\Users\foren\Downloads\Floppy_Disk.001ID Number/Name: Evidence Type: Disk ImageDescription: Time Zone: America/New_YorkDisplay Name: test2Evidence Path: F:\Evidence\Extents Test\test2ID Number/Name: Evidence Type: Live FolderDescription: Time Zone: America/New_YorkPage of All Bookmarks8/13/2024Time zone for display: Eastern Daylight TimeSharedBookmark: Extents Test papers8/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 10: File 21,20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_21.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_21.txtMD5 Hash38b85c3c7827678c33b3f172dfbcc739FolderFalseBookmark: Series 18/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: Subseries(1)8/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 1: Text, 20238/13/2024Comments: Creator: FRED\forenFile Count: 7FilesFile CommentsNamefile_0.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:33:12 PM (2023-05-03 17:33:12 UTC)Pathtest2/file_0.txtMD5 Hashe4328dc3ad8e97cf10f50641424bafecFolderFalseFile CommentsNamefile_1.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_1.txtMD5 Hash88c16a56754e0f17a93d269ae74dde9bFolderFalseFile CommentsNamefile_10.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_10.txtMD5 Hash15e4d9a96c2600d72a46f85d371b42e4FolderFalseFile CommentsNamefile_11.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_11.txtMD5 Hash13740f8d72f0137b683de71596129935FolderFalseFile CommentsNamefile_12.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_12.txtMD5 Hashc1521a239a602e3c2062fd6886e3e659FolderFalseFile CommentsNamefile_13.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:17 PM (2023-05-03 17:19:17 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_13.txtMD5 Hashd71651413daa5ca6c46480a21f868812FolderFalseFile CommentsNamefile_14.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_14.txtMD5 Hashf9f173c4c62fd81332413b5b4a7c0f99FolderFalseBookmark: Subsubseries(2)8/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 2: File 15, 20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_15.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_15.txtMD5 Hash7704d4191615bff8bdd8c9c4bda52e5dFolderFalseBookmark: Subsubsubseries(3)8/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: Subsubsubsubseries(4)8/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 10: Folder 2, 20238/13/2024Comments: Creator: FRED\forenFile Count: 5FilesFile CommentsNamefile_11.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_11.txtMD5 Hash13740f8d72f0137b683de71596129935FolderFalseFile CommentsNamefile_12.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_12.txtMD5 Hashc1521a239a602e3c2062fd6886e3e659FolderFalseFile CommentsNamefile_13.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:17 PM (2023-05-03 17:19:17 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_13.txtMD5 Hashd71651413daa5ca6c46480a21f868812FolderFalseFile CommentsNamefile_14.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_14.txtMD5 Hashf9f173c4c62fd81332413b5b4a7c0f99FolderFalseFile CommentsNamefile_15.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_2/file_15.txtMD5 Hash7704d4191615bff8bdd8c9c4bda52e5dFolderFalseBookmark: ER 3: Folder 1, 20238/13/2024Comments: Creator: FRED\forenFile Count: 5FilesFile CommentsNamefile_6.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_6.txtMD5 Hash0059813e3c0ba105c27538c0010cf6b0FolderFalseFile CommentsNamefile_7.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_7.txtMD5 Hash68c0f05c6422571d440d08c1759770bcFolderFalseFile CommentsNamefile_8.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_8.txtMD5 Hash599eee62b6f909314c479168f3fd423dFolderFalseFile CommentsNamefile_9.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/folder_1/file_9.txtMD5 Hashc955dc3766e19f947d2af22c3a87f6f3FolderFalseFile CommentsNamefolder_1Physical Sizen/aLogical Size0 BCreated Date5/3/2023 1:23:51 PM (2023-05-03 17:23:51 UTC)Modified Date5/3/2023 1:30:23 PM (2023-05-03 17:30:23 UTC)Accessed Date5/3/2023 1:33:10 PM (2023-05-03 17:33:10 UTC)Pathtest2/folder_1MD5 HashFolderTrueBookmark: Subsubsubseries(3) the second8/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 11: File 16, 20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_16.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:17 PM (2023-05-03 17:19:17 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_16.txtMD5 Hashf67e17268560573531e61f7c5bf050a1FolderFalseBookmark: Subsubseries(2) the second8/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 23: File 17, 20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_17.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_17.txtMD5 Hash649b7ae3accd9110484c988cb12ea31aFolderFalseBookmark: Subseries(1) the second8/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 4: File 18, 20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_18.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_18.txtMD5 Hashcc595e6557db981512e227f0dfcb45daFolderFalseBookmark: Series 28/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 9: File 20,20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_20.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:17 PM (2023-05-03 17:19:17 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_20.txtMD5 Hash73cb9ad0b717bebb5e8c27800cb599f1FolderFalseBookmark: Subseries(1) of Series 28/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 8: File 2, 20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_2.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_2.txtMD5 Hashdb06069ef1c9f40986ffa06db4fe8fd7FolderFalseBookmark: Subsubseries(2) of Series 28/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 7: File 19, 20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_19.txtPhysical Sizen/aLogical Size16 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:20:07 PM (2023-05-03 17:20:07 UTC)Pathtest2/file_19.txtMD5 Hashf24f15d357735521f2933b41b9b2c616FolderFalseBookmark: Series 38/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: DI 1: Blank Disk, 20218/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsName[root]Physical Size7168 BLogical Size7168 BCreated Daten/aModified Daten/aAccessed Daten/aPathFloppy_Disk.001/NONAME [FAT12]/[root]Bookmark: ER 12: File 0 (also in ER 1), 20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile_0.txtPhysical Sizen/aLogical Size15 BCreated Date5/2/2023 12:20:46 PM (2023-05-02 16:20:46 UTC)Modified Date5/3/2023 1:19:18 PM (2023-05-03 17:19:18 UTC)Accessed Date5/3/2023 1:33:12 PM (2023-05-03 17:33:12 UTC)Pathtest2/file_0.txtMD5 Hashe4328dc3ad8e97cf10f50641424bafecFolderFalseBookmark: ER 5: No Files, 20238/13/2024Comments: Creator: FRED\forenFile Count: 0FilesBookmark: ER 6: Zero Length, 20238/13/2024Comments: Creator: FRED\forenFile Count: 1FilesFile CommentsNamefile00.txtPhysical Sizen/aLogical Size0 BCreated Date5/3/2023 1:29:29 PM (2023-05-03 17:29:29 UTC)Modified Date5/3/2023 1:29:29 PM (2023-05-03 17:29:29 UTC)Accessed Date5/3/2023 1:33:12 PM (2023-05-03 17:33:12 UTC)Pathtest2/file00.txtMD5 HashFolderFalsePage of Bookmarks, -DI 1: Blank Disk, 2021, [root], ER 10: File 21,2023, file_21.txt, ER 10: Folder 2, 2023, file_11.txt, file_12.txt, file_13.txt, file_14.txt, file_15.txt, ER 11: File 16, 2023, file_16.txt, ER 12: File 0 (also in ER 1), 2023, file_0.txt, ER 1: Text, 2023, file_0.txt, file_1.txt, file_10.txt, file_11.txt, file_12.txt, file_13.txt, file_14.txt, ER 23: File 17, 2023, file_17.txt, ER 2: File 15, 2023, file_15.txt, ER 3: Folder 1, 2023, file_6.txt, file_7.txt, file_8.txt, file_9.txt, folder_1, ER 4: File 18, 2023, file_18.txt, ER 5: No Files, 2023, ER 6: Zero Length, 2023, file00.txt, ER 7: File 19, 2023, file_19.txt, ER 8: File 2, 2023, file_2.txt, ER 9: File 20,2023, file_20.txt, Extents Test papers, Series 1, Series 2, Series 3, Subseries(1), Subseries(1) of Series 2, Subseries(1) the second, Subsubseries(2), Subsubseries(2) of Series 2, Subsubseries(2) the second, Subsubsubseries(3), Subsubsubseries(3) the second, Subsubsubsubseries(4), Case Information, -Case Description, Case Name, Case Owner, Case Reference, Report Created, Version, File Overview, -Email Status, Email Attachments, Email Related Items (From Email), Email Reply, Forwarded Email, Evidence Groups, Ungrouped, File Category, Archives, Databases, Documents, Email, Executable, Folders, Graphics, Internet/Chat Files, Mobile Phone, Multimedia, OS/File System Files, Other Encryption Files, Other Known Types, Presentations, Slack/Free Space, Spreadsheets, Unknown Types, User Types, File Items, Evidence Items, Checked Items, Unchecked Items, File Status, Bad Extensions, Data Carved Files, Decrypted Files, Deleted Files, Duplicate Items, Email Attachments, Email Related Items (From Email), Encrypted Files, Flagged Ignore, Flagged Privileged, From Recycle Bin, KFF Alert Files, KFF Ignorable, OCR Graphics, OLE Subitems, Project VIC Matches, User-Decrypted Files, Labels, Evidence List, -Floppy_Disk.001, test2, diff --git a/tests/fixtures/report/report.json b/tests/fixtures/report/report.json index 8b6f34f..3cb8453 100644 --- a/tests/fixtures/report/report.json +++ b/tests/fixtures/report/report.json @@ -139,6 +139,13 @@ { "title": "Series 3", "children": [ + { + "title": "DI 1: Blank Disk, 2021", + "er_number": "DI 1", + "er_name": "Blank Disk, 2021", + "file_size": 7168, + "file_count": 1 + }, { "title": "ER 12: File 0 (also in ER 1), 2023", "er_number": "ER 12", diff --git a/tests/test_report_ftk_extents.py b/tests/test_report_ftk_extents.py index f050e43..b9189d3 100644 --- a/tests/test_report_ftk_extents.py +++ b/tests/test_report_ftk_extents.py @@ -28,53 +28,50 @@ def test_quit_on_invalid_xml(tmp_path): def parsed_report(): return rfe.parse_xml('tests/fixtures/report/Report.xml') -def test_identify_all_ers(parsed_report): - """Function should list every bookmark starting with ER""" - ers = rfe.create_er_list(parsed_report) +@pytest.fixture +def components(parsed_report): + return rfe.create_component_list(parsed_report) - just_ers = [er[0][-1].split(':')[0] for er in ers] +def test_identify_all_components(components): + """Function should list every bookmark starting with ER and DI""" + just_components = [component[0][-1].split(':')[0] for component in components] for i in range(1, 12): - assert f'ER {i}' in just_ers - assert 'ER 23' in just_ers + assert f'ER {i}' in just_components + assert 'ER 23' in just_components -def test_hierarchy_nests_down_correctly(parsed_report): +def test_hierarchy_nests_down_correctly(components): """Function should include organization hierarchy. These are not great tests, but I'm not sure what the better strategy would be""" - ers = rfe.create_er_list(parsed_report) - just_titles = [er[0] for er in ers] + just_titles = [component[0] for component in components] assert ['Extents Test papers', 'Series 1', 'Subseries(1)', 'ER 1: Text, 2023'] in just_titles assert ['Extents Test papers', 'Series 1', 'Subseries(1)', 'Subsubseries(2)', 'ER 2: File 15, 2023'] in just_titles -def test_hierarchy_nests_empty_subseries(parsed_report): +def test_hierarchy_nests_empty_subseries(components): """Function should include organization hierarchy including empty levels""" - ers = rfe.create_er_list(parsed_report) - just_titles = [er[0] for er in ers] + just_titles = [component[0] for component in components] assert ['Extents Test papers', 'Series 1', 'Subseries(1)', 'Subsubseries(2)', 'Subsubsubseries(3)', 'Subsubsubsubseries(4)', 'ER 10: Folder 2, 2023'] in just_titles -def test_hierarchy_nests_up_correctly(parsed_report): +def test_hierarchy_nests_up_correctly(components): """Function should be able to step down in hierarchy""" - ers = rfe.create_er_list(parsed_report) - just_titles = [er[0] for er in ers] + just_titles = [component[0] for component in components] assert ['Extents Test papers', 'Series 1', 'Subseries(1)', 'Subsubseries(2) the second', 'ER 23: File 17, 2023'] in just_titles assert ['Extents Test papers', 'Series 1', 'Subseries(1) the second', 'ER 4: File 18, 2023'] in just_titles -def test_hierarchy_nests_reverse_order_bookmarks(parsed_report): +def test_hierarchy_nests_reverse_order_bookmarks(components): """Function should parse bottom-up hierarchy""" - ers = rfe.create_er_list(parsed_report) - just_titles = [er[0] for er in ers] + just_titles = [component[0] for component in components] assert ['Extents Test papers', 'Series 2', 'ER 9: File 20,2023'] in just_titles assert ['Extents Test papers', 'Series 2', 'Subseries(1) of Series 2', 'ER 8: File 2, 2023'] in just_titles assert ['Extents Test papers', 'Series 2', 'Subseries(1) of Series 2', 'Subsubseries(2) of Series 2', 'ER 7: File 19, 2023'] in just_titles -def test_er_outside_of_series(parsed_report): - """Function should include capture ERs even if they're not in a series""" - ers = rfe.create_er_list(parsed_report) - just_titles = [er[0] for er in ers] +def test_component_outside_of_series(components): + """Function should include capture components even if they're not in a series""" + just_titles = [component[0] for component in components] assert ['Extents Test papers', 'ER 10: File 21,2023'] in just_titles @@ -82,34 +79,47 @@ def test_correct_report_many_files(parsed_report): """Test if file count and byte count is completed correctly""" bookmark_tables = rfe.transform_bookmark_tables(parsed_report) - er_with_many_files = [['ER 1', 'bk6001']] - extents = rfe.add_extents_to_ers(er_with_many_files, bookmark_tables) + component_with_many_files = [['ER 1', 'bk6001']] + extents = rfe.add_extents_to_components(component_with_many_files, bookmark_tables) # bytes assert extents[0][1] == 110 # files assert extents[0][2] == 7 -def test_correct_report_on_er_with_folder_bookmarked(parsed_report): +def test_correct_report_on_component_with_folder_bookmarked(parsed_report): """Test if file count and byte count is completed correctly when bookmark includes a folder that is bookmarked""" bookmark_tables = rfe.transform_bookmark_tables(parsed_report) - er_with_folder = [['ER 10', 'bk12001']] - extents = rfe.add_extents_to_ers(er_with_folder, bookmark_tables) + component_with_folder = [['ER 10', 'bk12001']] + extents = rfe.add_extents_to_components(component_with_folder, bookmark_tables) # bytes assert extents[0][1] == 80 # files assert extents[0][2] == 5 -def test_correct_report_on_er_with_folder_not_bookmarked(parsed_report): +def test_correct_report_on_disk_image(parsed_report): + """Test if file count and byte count is completed correctly + when bookmark includes a folder that is bookmarked""" + bookmark_tables = rfe.transform_bookmark_tables(parsed_report) + + component_with_folder = [['DI 1', 'bk31001']] + extents = rfe.add_extents_to_components(component_with_folder, bookmark_tables) + + # bytes + assert extents[0][1] == 7168 + # files + assert extents[0][2] == 1 + +def test_correct_report_on_component_with_folder_not_bookmarked(parsed_report): """Test if file count and byte count is completed correctly when bookmark includes a folder that isn't bookmarked""" bookmark_tables = rfe.transform_bookmark_tables(parsed_report) - er_with_folder = [['ER 3', 'bk11001']] - extents = rfe.add_extents_to_ers(er_with_folder, bookmark_tables) + component_with_folder = [['ER 3', 'bk11001']] + extents = rfe.add_extents_to_components(component_with_folder, bookmark_tables) # bytes assert extents[0][1] == 60 @@ -120,8 +130,8 @@ def test_correct_report_1_file(parsed_report): """Test if file count and byte count is completed correctly for one file""" bookmark_tables = rfe.transform_bookmark_tables(parsed_report) - er_with_one_file = [['ER 2', 'bk9001']] - extents = rfe.add_extents_to_ers(er_with_one_file, bookmark_tables) + component_with_one_file = [['ER 2', 'bk9001']] + extents = rfe.add_extents_to_components(component_with_one_file, bookmark_tables) # bytes assert extents[0][1] == 16 @@ -129,37 +139,37 @@ def test_correct_report_1_file(parsed_report): assert extents[0][2] == 1 def test_warn_on_no_files_in_er(parsed_report, caplog): - """Test if warning is logged for empty bookmarks and ER is omitted from report""" + """Test if warning is logged for empty bookmarks and component is omitted from report""" bookmark_tables = rfe.transform_bookmark_tables(parsed_report) - er_with_no_files = [[['hier', 'archy', 'list'], 'bk27001', 'ER 5: No Files, 2023']] + component_with_no_files = [[['hier', 'archy', 'list'], 'bk27001', 'ER 5: No Files, 2023']] - extents = rfe.add_extents_to_ers(er_with_no_files, bookmark_tables) + extents = rfe.add_extents_to_components(component_with_no_files, bookmark_tables) assert extents == [] - log_msg = f'{er_with_no_files[0][-1]} does not contain any files. It will be omitted from the report.' + log_msg = f'{component_with_no_files[0][-1]} does not contain any files. It will be omitted from the report.' assert log_msg in caplog.text def test_warn_on_a_no_byte_file_in_er(parsed_report, caplog): - """Test if warning is logged for empty files in an ER""" + """Test if warning is logged for empty files in an component""" bookmark_tables = rfe.transform_bookmark_tables(parsed_report) - er_with_no_bytes = [[['hier', 'archy', 'list'], 'bk28001', 'ER 6: Zero Length, 2023']] - rfe.add_extents_to_ers(er_with_no_bytes, bookmark_tables) - log_msg = f'{er_with_no_bytes[0][-1]} contains the following 0-byte file: file00.txt. Review this file with the processing archivist.' + component_with_no_bytes = [[['hier', 'archy', 'list'], 'bk28001', 'ER 6: Zero Length, 2023']] + rfe.add_extents_to_components(component_with_no_bytes, bookmark_tables) + log_msg = f'{component_with_no_bytes[0][-1]} contains the following 0-byte file: file00.txt. Review this file with the processing archivist.' assert log_msg in caplog.text def test_warn_on_no_bytes_in_er(parsed_report, caplog): - """Test if warning is logged for bookmarks with 0 bytes total and ER is omitted from report""" + """Test if warning is logged for bookmarks with 0 bytes total and component is omitted from report""" bookmark_tables = rfe.transform_bookmark_tables(parsed_report) - er_with_no_bytes = [[['hier', 'archy', 'list'], 'bk28001', 'ER 6: Zero Length, 2023']] - extents = rfe.add_extents_to_ers(er_with_no_bytes, bookmark_tables) + component_with_no_bytes = [[['hier', 'archy', 'list'], 'bk28001', 'ER 6: Zero Length, 2023']] + extents = rfe.add_extents_to_components(component_with_no_bytes, bookmark_tables) assert extents == [] - log_msg = f'{er_with_no_bytes[0][-1]} contains no files with bytes. This ER is omitted from report. Review this ER with the processing archivist.' + log_msg = f'{component_with_no_bytes[0][-1]} contains no files with bytes. This component is omitted from report. Review this component with the processing archivist.' assert log_msg in caplog.text @@ -170,22 +180,22 @@ def test_extract_collection_name_from_report(parsed_report): assert coll_name == 'M12345 Extents Test' @pytest.fixture -def ers_with_extents_list(parsed_report): - ers = rfe.create_er_list(parsed_report) +def components_with_extents_list(parsed_report): + components = rfe.create_component_list(parsed_report) bookmark_tables = rfe.transform_bookmark_tables(parsed_report) - ers_with_extents = rfe.add_extents_to_ers(ers, bookmark_tables) + components_with_extents = rfe.add_extents_to_components(components, bookmark_tables) - return ers_with_extents + return components_with_extents -def test_json_objects_contains_expected_fields(ers_with_extents_list): +def test_json_objects_contains_expected_fields(components_with_extents_list): """Test if final report aligns with expectations for ASpace import""" full_dict = {'title': 'slug', 'children': []} - for er in ers_with_extents_list: - rfe.create_report(er, full_dict) + for component in components_with_extents_list: + rfe.create_report(component, full_dict) - def recursive_validator(er_dict): - for key, value in er_dict.items(): + def recursive_validator(component_dict): + for key, value in component_dict.items(): if key == 'title': assert type(value) is str elif key == 'children': @@ -205,44 +215,44 @@ def recursive_validator(er_dict): recursive_validator(full_dict) -def test_skipped_ER_number_behavior(parsed_report, caplog): - """Test if script flags when ER numbering is not sequential""" - ers = rfe.create_er_list(parsed_report) + +def test_skipped_number_behavior(parsed_report, caplog): + """Test if script flags when component numbering is not sequential""" + components = rfe.create_component_list(parsed_report) for i in range(13, 23): - assert f'Collection uses ER 1 to ER 23. ER {i} is skipped. Review the ERs with the processing archivist' in caplog.text + assert f'Collection ER component range is numbered 1 to 23. {i} is skipped. Review the bookmarks with the processing archivist' in caplog.text -def test_ER_missing_number_behavior(parsed_report, caplog): - """Test if script flags when ER number is reused""" - ers = rfe.create_er_list(parsed_report) - ers[0][2] = "ER ?: File 21,2023" +def test_component_missing_number_behavior(components, caplog): + """Test if script flags when component number is reused""" + components[0][2] = "ER ?: File 21,2023" - rfe.audit_ers(ers) + rfe.audit_components(components) - log_msg = f'ER is missing a number: ER ?: File 21,2023. Review the ERs with the processing archivist' + log_msg = f'Component is missing a number: ER ?: File 21,2023. Review the bookmarks with the processing archivist' assert log_msg in caplog.text -def test_repeated_ER_number_behavior(parsed_report, caplog): - """Test if script flags when ER number is reused""" - ers = rfe.create_er_list(parsed_report) +def test_repeated_component_number_behavior(components, caplog): + """Test if script flags when component number is reused""" + rfe.audit_components(components) - rfe.audit_ers(ers) + log_msg = f'ER 10 is used multiple times: ER 10: File 21,2023, ER 10: Folder 2, 2023. Review the bookmarks with the processing archivist' - log_msg = f'ER 10 is used multiple times: ER 10: File 21,2023, ER 10: Folder 2, 2023. Review the ERs with the processing archivist' assert log_msg in caplog.text + @pytest.fixture def expected_json(): with open('tests/fixtures/report/report.json') as f: report = json.load(f) return report -def test_create_correct_json(ers_with_extents_list, expected_json): +def test_create_correct_json(components_with_extents_list, expected_json): """Test that final report matches total expectations""" dct = {'title': 'coll', 'children': []} - for er in ers_with_extents_list: - dct = rfe.create_report(er, dct) + for component in components_with_extents_list: + dct = rfe.create_report(component, dct) assert dct == expected_json