From df73880844d7d6b78aab773bdd7814710e7eede4 Mon Sep 17 00:00:00 2001 From: Alex J Burke Date: Sat, 10 Feb 2024 13:15:39 +0100 Subject: [PATCH 1/2] Repair run_example and allow configuring html_dir via the command line. Support a directory argument of "." to allow outputting to an output folder in the cwd with an automatic suffix: ./_autoarchaologist While here add top level run_example wrapper shell script in advance of a future change to provide a unified run CLI. --- .gitignore | 4 ++++ Makefile | 4 ++++ ddhf/ddhf/decorated_context.py | 18 +++++++++++++++++- output/.gitkeep | 0 run_example.py | 29 ++++++++++++++++++++--------- 5 files changed, 45 insertions(+), 10 deletions(-) create mode 100644 Makefile create mode 100644 output/.gitkeep diff --git a/.gitignore b/.gitignore index 53bb4e6..049c68f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,9 @@ __pycache__/ # Sphinx documentation docs/_build/ +# Build files +venv/ + # Temporary files _.* +/output/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..90d3a64 --- /dev/null +++ b/Makefile @@ -0,0 +1,4 @@ +default: example + +example: + ./venv/bin/python3 run_example.py -d . diff --git a/ddhf/ddhf/decorated_context.py b/ddhf/ddhf/decorated_context.py index 52104a3..901b7bf 100644 --- a/ddhf/ddhf/decorated_context.py +++ b/ddhf/ddhf/decorated_context.py @@ -110,13 +110,29 @@ def from_argv(self): "AUTOARCHAEOLOGIST_BITSTORE_CACHE": "ddhf_bitstore_cache", } -def main(job, html_subdir="tmp", **kwargs): +def parse_arguments(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument('-o', '--out', default='/tmp/_autoarchaologist') + + args = parser.parse_args(args=argv) + if args.out == '.': + args.out = os.path.join(os.getcwd(), "_autoarchaologist") + return args + +def main(job, html_subdir, **kwargs): + args = parse_arguments() + kwargs["html_dir"] = args.out + ''' A standard main routine to reduce boiler-plate ''' for key in os.environ: i = OK_ENVS.get(key) if i: kwargs[i] = os.environ[key] + if 'html_dir' not in kwargs: + raise AttributeError("missing: html_dir") + + kwargs['html_dir'] = os.path.join(kwargs['html_dir'], html_subdir) kwargs.setdefault('download_links', True) kwargs.setdefault('download_limit', 1 << 20) diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/run_example.py b/run_example.py index ad7c063..5fddbc1 100644 --- a/run_example.py +++ b/run_example.py @@ -1,19 +1,35 @@ +import argparse import os +import sys import autoarchaeologist -from autoarchaeologist.generic.bigdigits import BigDigits +from autoarchaeologist.generic.bigtext import BigText from autoarchaeologist.generic.samesame import SameSame from autoarchaeologist.data_general.absbin import AbsBin from autoarchaeologist.data_general.papertapechecksum import DGC_PaperTapeCheckSum +def parse_arguments(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="/tmp/_autoarchaologist") + + args = parser.parse_args(args=argv) + if args.dir == ".": + args.dir = os.path.join(os.getcwd(), "output", "_autoarchaologist") + return args if __name__ == "__main__": + args = parse_arguments() + + try: + os.mkdir(args.dir) + except FileExistsError: + pass - ctx = autoarchaeologist.Excavation() + ctx = autoarchaeologist.Excavation(html_dir=args.dir) - ctx.add_examiner(BigDigits) + ctx.add_examiner(BigText) ctx.add_examiner(AbsBin) ctx.add_examiner(DGC_PaperTapeCheckSum) ctx.add_examiner(SameSame) @@ -22,11 +38,6 @@ ctx.start_examination() - try: - os.mkdir("/tmp/_autoarchaologist") - except FileExistsError: - pass - - ctx.produce_html(html_dir="/tmp/_autoarchaologist") + ctx.produce_html() print("Now point your browser at", ctx.filename_for(ctx).link) From 51dba3c0c2879c4221add2a3bc95e299c0422442 Mon Sep 17 00:00:00 2001 From: Alex J Burke Date: Mon, 11 Mar 2024 22:58:06 +0100 Subject: [PATCH 2/2] Add basic tests of example excavation/output + groundwork for unified run. Make the minimal set of changes such that we are able to trigger the excavation to a known directory of the in-tree example file. Check that the expected HTML files were written for this known excavation and assert some basic properties of the excavated artifacts. The commit includes reworking of run_example wherein its excavation is declared as a class and its core logic separated out into a run.py file. This is done both to allow the tests to easily make use of the logic and as preliminary bits for subsequent work to provide a unified entry point. --- .gitignore | 1 + Makefile | 3 + run.py | 48 +++++++++++++++ run_example.py | 43 +++++--------- tests/__init__.py | 0 tests/test_run_example.py | 119 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 187 insertions(+), 27 deletions(-) create mode 100644 run.py create mode 100644 tests/__init__.py create mode 100644 tests/test_run_example.py diff --git a/.gitignore b/.gitignore index 049c68f..7ae0c03 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ venv/ # Temporary files _.* /output/ +/tests/_scratch/ diff --git a/Makefile b/Makefile index 90d3a64..919a13e 100644 --- a/Makefile +++ b/Makefile @@ -2,3 +2,6 @@ default: example example: ./venv/bin/python3 run_example.py -d . + +test: + @./venv/bin/python3 -m unittest diff --git a/run.py b/run.py new file mode 100644 index 0000000..63421ec --- /dev/null +++ b/run.py @@ -0,0 +1,48 @@ +import argparse +import os +import sys + +from autoarchaeologist import Excavation + +def parse_arguments(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="/tmp/_autoarchaologist") + parser.add_argument('filename') + + return parser.parse_args(args=argv) + +def process_arguments(args): + if args.dir == ".": + args.dir = os.path.join(os.getcwd(), "output", "_autoarchaologist") + if args.filename is not None: + args.filename = os.path.abspath(args.filename) + else: + raise ValueError() + + return args + +def perform_excavation(args, action_tuple): + match action_tuple: + case "excavator", AnExcavation: + assert issubclass(AnExcavation, Excavation) + ctx = AnExcavation(html_dir=args.dir) + case action, _: + raise NotImplementedError(f"action: {action}") + + ff = ctx.add_file_artifact(args.filename) + + ctx.start_examination() + + return ctx + +if __name__ == "__main__": + args = process_arguments(parse_arguments()) + + try: + os.mkdir(args.dir) + except FileExistsError: + pass + + ctx = perform_excavation(args, ("none", None)) + ctx.produce_html() + print("Now point your browser at", ctx.filename_for(ctx).link) diff --git a/run_example.py b/run_example.py index 5fddbc1..808842b 100644 --- a/run_example.py +++ b/run_example.py @@ -1,43 +1,32 @@ - -import argparse import os import sys +from types import SimpleNamespace -import autoarchaeologist - +from run import parse_arguments, process_arguments, perform_excavation +from autoarchaeologist.base.excavation import Excavation from autoarchaeologist.generic.bigtext import BigText from autoarchaeologist.generic.samesame import SameSame from autoarchaeologist.data_general.absbin import AbsBin from autoarchaeologist.data_general.papertapechecksum import DGC_PaperTapeCheckSum -def parse_arguments(argv=None): - parser = argparse.ArgumentParser() - parser.add_argument("-d", "--dir", default="/tmp/_autoarchaologist") - args = parser.parse_args(args=argv) - if args.dir == ".": - args.dir = os.path.join(os.getcwd(), "output", "_autoarchaologist") - return args - -if __name__ == "__main__": - args = parse_arguments() +class ExampleExcavation(Excavation): + def __init__(self, **kwargs): + super().__init__(**kwargs) - try: - os.mkdir(args.dir) - except FileExistsError: - pass + self.add_examiner(BigText) + self.add_examiner(AbsBin) + self.add_examiner(DGC_PaperTapeCheckSum) + self.add_examiner(SameSame) - ctx = autoarchaeologist.Excavation(html_dir=args.dir) - ctx.add_examiner(BigText) - ctx.add_examiner(AbsBin) - ctx.add_examiner(DGC_PaperTapeCheckSum) - ctx.add_examiner(SameSame) - - ff = ctx.add_file_artifact("examples/30001393.bin") - - ctx.start_examination() +if __name__ == "__main__": + argv = sys.argv[1:] + # force the example as the filename + argv.append("examples/30001393.bin") + args = process_arguments(parse_arguments(argv=argv)) + ctx = perform_excavation(args, ("excavator", ExampleExcavation)) ctx.produce_html() print("Now point your browser at", ctx.filename_for(ctx).link) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_run_example.py b/tests/test_run_example.py new file mode 100644 index 0000000..41ed95d --- /dev/null +++ b/tests/test_run_example.py @@ -0,0 +1,119 @@ +import importlib +import os +import shutil +import sys +from types import SimpleNamespace +import unittest + +TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) +SCRATCH_DIR = os.path.join(TESTS_DIR, "_scratch") +ROOT_DIR = os.path.normpath(os.path.join(TESTS_DIR, "..")) + +sys.path.append(TESTS_DIR) + +from run import perform_excavation +from run_example import ExampleExcavation +from autoarchaeologist.base.artifact import ArtifactBase, ArtifactStream + + +def example_arguments(output_dir): + example_arguments = SimpleNamespace() + example_arguments.dir = output_dir + example_arguments.filename = "examples/30001393.bin" + return example_arguments + + +class Test_RunExampleBasicHtml(unittest.TestCase): + """ + Ensure run_example produces expected HTML files for the example input. + """ + + DIR_TREE = None + + @classmethod + def setUpClass(cls): + args = example_arguments(SCRATCH_DIR) + shutil.rmtree(args.dir, ignore_errors=True) + os.makedirs(args.dir, exist_ok=True) + ctx = perform_excavation(args, ("excavator", ExampleExcavation)) + ctx.produce_html() + cls.DIR_TREE = list(os.walk(args.dir)) + + def toplevel(self): + return self.__class__.DIR_TREE[0] + + def toplevel_dirnames(self): + _, dirs, __ = self.toplevel() + dirs.sort() + return dirs + + def toplevel_filenames(self): + _, __, filenames = self.toplevel() + return filenames + + def test_produces_top_level_index(self): + toplevel_filenames = self.toplevel_filenames() + self.assertTrue("index.html" in toplevel_filenames) + self.assertTrue("index.css" in toplevel_filenames) + + def test_produces_digest_directories(self): + toplevel_dirnames = self.toplevel_dirnames() + self.assertEqual(toplevel_dirnames, ['08', '79', 'fa']) + + +class Test_RunExampleBasicArtifacts(unittest.TestCase): + """ + Ensure run_example excavates the expected artifacts for the example input. + """ + + CTX = None + + @classmethod + def setUpClass(cls): + args = example_arguments(SCRATCH_DIR) + shutil.rmtree(args.dir, ignore_errors=True) + os.makedirs(args.dir, exist_ok=True) + ctx = perform_excavation(args, ("excavator", ExampleExcavation)) + cls.CTX = ctx + + def assertArtifactIsChild(self, artifact, parent): + assert issubclass(artifact.__class__, ArtifactBase) + self.assertEqual(list(artifact.parents), [parent]) + + def excavation(self): + return self.__class__.CTX + + def test_excavated_three_total_artifacts(self): + arfifact_hash_keys = list(self.excavation().hashes.keys()) + self.assertEqual(len(arfifact_hash_keys), 3) + + def test_excavated_one_top_level_artifact(self): + excavatoin_child_count = len(self.excavation().children) + self.assertEqual(excavatoin_child_count, 1) + + def test_produces_top_level_artifact(self): + excavation = self.excavation() + artifact = self.excavation().children[0] + self.assertIsInstance(artifact, ArtifactStream) + self.assertEqual(artifact.digest, '083a3d5e3098aec38ee5d9bc9f9880d3026e120ff8f058782d49ee3ccafd2a6c') + self.assertTrue(artifact.digest in excavation.hashes) + + def test_produces_top_level_artifact_whose_parent_is_excavation(self): + artifact = self.excavation().children[0] + self.assertArtifactIsChild(artifact, self.excavation()) + + def test_produces_two_children_of_the_top_level(self): + excavation = self.excavation() + artifact = excavation.children[0] + artifact_children = sorted(artifact.children, key=lambda a: a.digest) + self.assertEqual(len(artifact_children), 2) + self.assertTrue(artifact_children[0].digest in excavation.hashes) + self.assertTrue(artifact_children[0].digest.startswith('79')) + self.assertArtifactIsChild(artifact_children[0], artifact) + self.assertTrue(artifact_children[1].digest in excavation.hashes) + self.assertTrue(artifact_children[1].digest.startswith('fa')) + self.assertArtifactIsChild(artifact_children[1], artifact) + + +if __name__ == '__main__': + unittest.main()