diff --git a/poetry.lock b/poetry.lock index 4107552..e492af3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "argcomplete" @@ -6,6 +6,7 @@ version = "3.1.2" description = "Bash tab completion for argparse" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "argcomplete-3.1.2-py3-none-any.whl", hash = "sha256:d97c036d12a752d1079f190bc1521c545b941fda89ad85d15afa909b4d1b9a99"}, {file = "argcomplete-3.1.2.tar.gz", hash = "sha256:d5d1e5efd41435260b8f85673b74ea2e883affcbec9f4230c582689e8e78251b"}, @@ -20,6 +21,7 @@ version = "1.8.1" description = "Create and validate BagIt packages" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "bagit-1.8.1-py2.py3-none-any.whl", hash = "sha256:d14dd7e373dd24d41f6748c42f123f7db77098dfa4a0125dbacb4c8bdf767c09"}, {file = "bagit-1.8.1.tar.gz", hash = "sha256:37df1330d2e8640c8dee8ab6d0073ac701f0614d25f5252f9e05263409cee60c"}, @@ -31,6 +33,8 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev"] +markers = "sys_platform == \"win32\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -42,6 +46,7 @@ version = "6.7.0" description = "Add colours to the output of Python's logging module." optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "colorlog-6.7.0-py2.py3-none-any.whl", hash = "sha256:0d33ca236784a1ba3ff9c532d4964126d8a2c44f1f0cb1d2b0728196f512f662"}, {file = "colorlog-6.7.0.tar.gz", hash = "sha256:bd94bd21c1e13fac7bd3153f4bc3a7dc0eb0974b8bc2fdf1a989e474f6e582e5"}, @@ -59,17 +64,38 @@ version = "0.3.7" description = "Distribution utilities" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"}, {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, ] +[[package]] +name = "exceptiongroup" +version = "1.3.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, + {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} + +[package.extras] +test = ["pytest (>=6)"] + [[package]] name = "filelock" version = "3.12.4" description = "A platform independent file lock." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"}, {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"}, @@ -78,7 +104,7 @@ files = [ [package.extras] docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"] -typing = ["typing-extensions (>=4.7.1)"] +typing = ["typing-extensions (>=4.7.1) ; python_version < \"3.11\""] [[package]] name = "iniconfig" @@ -86,6 +112,7 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -97,6 +124,7 @@ version = "2023.4.22" description = "Flexible test automation." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "nox-2023.4.22-py3-none-any.whl", hash = "sha256:0b1adc619c58ab4fa57d6ab2e7823fe47a32e70202f287d78474adcc7bda1891"}, {file = "nox-2023.4.22.tar.gz", hash = "sha256:46c0560b0dc609d7d967dc99e22cb463d3c4caf54a5fda735d6c11b5177e3a9f"}, @@ -117,6 +145,7 @@ version = "23.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, @@ -128,6 +157,7 @@ version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "platformdirs-3.11.0-py3-none-any.whl", hash = "sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e"}, {file = "platformdirs-3.11.0.tar.gz", hash = "sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3"}, @@ -143,6 +173,7 @@ version = "1.3.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, @@ -158,6 +189,7 @@ version = "7.4.2" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"}, {file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"}, @@ -165,19 +197,78 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "tomli" +version = "2.2.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, + {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, + {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, + {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, + {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, + {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, + {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, + {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, + {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, + {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, +] + [[package]] name = "virtualenv" version = "20.24.5" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "virtualenv-20.24.5-py3-none-any.whl", hash = "sha256:b80039f280f4919c77b30f1c23294ae357c4c8701042086e3fc005963e4e537b"}, {file = "virtualenv-20.24.5.tar.gz", hash = "sha256:e8361967f6da6fbdf1426483bfe9fca8287c242ac0bc30429905721cefbff752"}, @@ -190,9 +281,9 @@ platformdirs = ">=3.9.1,<4" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] [metadata] -lock-version = "2.0" -python-versions = "^3.11" -content-hash = "ecb57f3b9ebf9ccd94e4e6f7e32ec938ab4e1191b81fce097d0268c773fb9b26" +lock-version = "2.1" +python-versions = "^3.8" +content-hash = "cb559dd3ddb0bd623f5639384b4353b0c3aa3b16d19091ab4926fa9755d535f7" diff --git a/src/born_digital_docs_scripts/edit_reporter.py b/src/born_digital_docs_scripts/edit_reporter.py new file mode 100644 index 0000000..2621ae8 --- /dev/null +++ b/src/born_digital_docs_scripts/edit_reporter.py @@ -0,0 +1,164 @@ +import argparse +import logging +import re +import pathlib +from pathlib import Path +import mimetypes +import csv +from datetime import datetime +# mimetypes.init() + +def parse_args(): + # validate and return paths for main directory and subdirs + def main_dir(arg): + path = Path(arg) + if not path.is_dir(): + raise argparse.ArgumentTypeError(f"{path} is not a directory") + return path + + def dir_of_dirs(arg): + path = main_dir(arg) + subdirs = [] + for child in path.iterdir(): + if child.is_dir(): + subdirs.append(child) + return subdirs + + '''I suspect since we are using this script in 0_borndigitalstaging + then a single package as input is less important. May only need dir_of_dirs''' + + parser = argparse.ArgumentParser(description="takes package directory") + # argument for single package to be validated + parser.add_argument( + "-p", + "--package", + type=main_dir, + help="input path to an ami package", + # required=True, + dest="packages", + action="append", + ) + parser.add_argument( + "-d", + "--directory", + type=dir_of_dirs, + help="input path to a directory of ami packages", + # required=True, + dest="packages", + action="extend", + ) + parser.add_argument( + '-o', '--output', + "-o", + "--output", + help="report destination directory", + type=str, + required=True + ) + + return parser.parse_args() + +def get_name_bytes_count(source_dir: pathlib.Path) -> list[str, int, int]: + + #Loop through the packages in the given source dir + '''for each package should we write package name, number of files and total bytes of package + #to a dataframe for easy csv generation? --> answered''' + #for package in source_dir.iterdir(): + if source_dir.is_dir(): + name = source_dir.name + file_count = 0 + total_bytes = 0 + + for item in source_dir.rglob("*"): + if item.is_file(): + file_count += 1 + total_bytes += item.stat().st_size + + + #print(name, file_count, total_bytes) + # print(total_bytes) + return name, file_count, total_bytes + + +'''get classmark and division''' +def get_metadata_from_title(source_dir: pathlib.Path) -> tuple[str, str]: + name = source_dir.name + + '''key should be the classmark, value should division, in one to one relatiopnships''' + codes = { "ncov": "theatre", "ncow":"theatre", "mgzdoh":"dance", "mgzidf": "dance" + } + division = "unknown" + for classmark, div in codes.items(): + if classmark in name: + # print(f"the substring {classmark} is in {name}") + division = div + break + + patterns = { "theatre": [r'.*(ncov\w+)', r'.*(ncow\w+)'], + "dance": [r'.*(mgzdoh\w+)', r'.*(mgzidf\w+)'], "unknown": [] + } + classmark = "unknown" + for pattern in patterns[division]: + match = re.search(pattern, name) + if match: + classmark = match.group(1) + break + + return division, classmark + + +#is this package_dir the directory of directories or an individual package? +def get_edit_file_count(source_dir: pathlib.Path) -> int: + em_count = 0 + for item in source_dir.rglob("*"): + #Check item is a file and not directory. Check "em" in filename. + if item.is_file() and "_em" in item.name: #datatype is path here + # this if statement might be doing too much, maybe we lowercase earlier? + mime = mimetypes.guess_type(item) + print(item.name, mime[0]) + '''This returns a tuple with (type, encoding), with type in the form /video/filetype''' + if mime[0].startswith('video') or mime[0].startswith('audio'): #this could potentially match to our standard exactly with 'video/mp4' + print(item.name) + em_count += 1 + return em_count + + +# return len([x for x in package_dir.rglob("*") if "_em." in x]) + #may need further check to see if file is a media file? + #also a check to make sure it's a file first. + +#is this how to type hint a list of lists with multiple data types to a csv output? +def write_report(alldata: list[list], dest) -> csv: + now = datetime.now() + dt = now.strftime("%Y-%m-%d") + header = ['package_name', 'total_files', 'total_bytes_count', 'edit_file_count', 'division', 'classmark'] + + with open(f'{dest}/editreport{dt}.csv', 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(header) + writer.writerows(alldata) + + +def main(): + args = parse_args() + dest = args.output + all_data = [] + # args.packages is a [myd_720_dance, package2, package3...] + for source in args.packages: + # print(source) + data = [] + # each function should get data about one package (myd_720_dance) + data.extend(get_name_bytes_count(source)) + data.append(get_edit_file_count(source)) + + + data.extend(get_metadata_from_title(source)) + + # add all the extracted data to ... something + all_data.append(data) + + # print(all_data) + write_report(all_data, dest) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/born_digital_docs_scripts/lint_bdami.py b/src/born_digital_docs_scripts/lint_bdami.py new file mode 100644 index 0000000..95b8980 --- /dev/null +++ b/src/born_digital_docs_scripts/lint_bdami.py @@ -0,0 +1,190 @@ +import argparse +import logging +import pathlib +import re +from pathlib import Path + +import bagit + +LOGGER = logging.getLogger(__name__) + + +def parse_args(): + # validate and return paths for main directory and subdirs + def main_dir(arg): + path = Path(arg) + if not path.is_dir(): + raise argparse.ArgumentTypeError(f"{path} is not a directory") + return path + + def dir_of_dirs(arg): + path = main_dir(arg) + subdirs = [] + for child in path.iterdir(): + if child.is_dir(): + subdirs.append(child) + return subdirs + + parser = argparse.ArgumentParser(description="takes package directory") + # argument for single package to be validated + parser.add_argument( + "-p", + "--package", + type=main_dir, + help="input path to an ami package", + # required=True, + dest="packages", + action="append", + ) + parser.add_argument( + "-d", + "--directory", + type=dir_of_dirs, + help="input path to a directory of ami packages", + # required=True, + dest="packages", + action="extend", + ) + + return parser.parse_args() + + +def is_valid_bag(package: pathlib.Path) -> bool: + bag = bagit.Bag(str(package)) + return bag.validate() + + +# # get structure would have to change to incorporate a list of packages +# def get_structure(package: pathlib.Path) -> list: +# contents = [] +# meta = [] +# for item in package.iterdir(): +# if item.is_dir() and item.name == "data": +# for subdir in Path(item).iterdir(): +# contents.append(subdir) + +# else: +# meta.append(item.name) + +# # print(contents) +# # print(f'the following files are on the first level: {meta}') +# return contents + + +def valid_structure(package: Path) -> bool: + expected = set( + "ArchiveOriginals", + "EditMasters", + "ServiceCopies", + "Images", + "Transcripts", + "Captions", + "Releases", + "Project Files") + + found = set([x.name for x in (package).iterdir() if x.is_dir()]) + + if found <= expected: + return True + else: + LOGGER.error( + f"{package.name} top-level folders should only be {', '.join(expected)}, found {found}" + ) + return False + + + +# def get_files(package: pathlib.Path) -> list: +# all_items = Path(package).rglob("*") +# all_files = [x for x in all_items if x.is_file()] +# files_dict = [] +# for file in all_files: +# dict = {"name": file.name, "strpath": file.absolute(), "pospath": file} +# files_dict.append(dict) +# # print(test) +# return files_dict + + +# check to see the expected folders are in package based on file extension +def files_named_correctly(package, dir_name, ending) -> bool: + + # what happens if EditMasters doesn't exist + dir = (package / dir_name) + if dir.exists(): + contents = dir.rglob("*") + #else: + #Logger Error could not check folder, folder does not exist + + expected = False + for item in contents: + if not item.stem.endswith(ending): + LOGGER.error(f"{package.name} has item in {dir_name} missing expected {ending}, found {item.name}") + expected = False + + if not expected: + return False + else: + return True + +def edits_named_correctly(package) -> bool: + return files_named_correctly(package, "EditMasters", "_em") + +def service_named_correctly(package) -> bool: + return files_named_correctly(package, "ServiceCopies", "_sc") + +def ao_named_correctly(package) -> bool: + return files_named_correctly(package, "ArchiveOriginals", "_ao") + + types = { + "_ao": "ArchiveOriginals", + "_em": "EditMasters", + "_sc": "ServiceCopies", + "_pm": "PreservationMasters", + } + + # dict ={'name': file.name, + # 'path':file} + + inspect = [] + + # for item in files_dict: + # for key in types: + # if re.search(key, files_dict['name']): + # print(f'{files_dict["name"]} is {types[key]}') + + for item in files_dict: + for key in types: + if re.search(key, item["name"]) and re.search(types[key], item["strpath"]): + print(f'{item["name"]} is in {types[key]} as expected') + else: + inspect.append(item) + + # for item in inspect: + # print(f'what is this?: {item}') + + +# if this works, try with not and result = true/false as written below +# result = True +# for item in contents: +# if not item.name in expected: +# result = False + +# return result + +# #check to see files are in appropriate folders: +# def validate_folders_file_match(): +# return True + + +def main(): + args = parse_args() + print(args) + # for loop for accessing namespace list of one or more + for source in args.packages: + folders = get_structure(source) + files = get_files(source) + validate_folder_content_types(files) + + +if __name__ == "__main__": + main() diff --git a/src/born_digital_docs_scripts/make_sc.py b/src/born_digital_docs_scripts/make_sc.py new file mode 100644 index 0000000..0a8d771 --- /dev/null +++ b/src/born_digital_docs_scripts/make_sc.py @@ -0,0 +1,135 @@ +from pathlib import Path +import logging +import subprocess +import argparse +import re + +# Accept a directory (could hardcode or argparse) +def parse_args() -> argparse.Namespace: + def extant_path(p: str) -> Path: + path = Path(p) + if not path.exists(): + raise argparse.ArgumentTypeError(f"{path} does not exist") + return path + + # def rclone_remote(p: str) -> Path: + # if not re.match(r'*:*', p): + # raise argparse.ArgumentTypeError(f"{p} doesn't looke like an rclone remote") + # return p + + parser = argparse.ArgumentParser(description="path to a directory of born digital ami") + parser.add_argument("--source", "-s", required=True, type=extant_path) + parser.add_argument("--dest", "-d", required=True, type=str) + return parser.parse_args() + + +# Function take directory (or staged excel), Find all EM files, return list +def get_em(path: Path) -> list[Path]: + source = path + ems = [] + for x in source.rglob("*_em.*"): + if not str(x).endswith('mov'): + print(f"is this okay?: {x}") + else: + ems.append(x) + + # print(ems) + return ems + + +# Function takes list of EM files, find if interlaced or not, return list of [path, interlaced] → mediainfo (use Inform argument) +def find_interlace(path: list[Path]) -> list[list[Path,str]]: + interlacing = subprocess.check_output(['mediainfo', + "--Inform=Video;%ScanType%", + path], + encoding='utf-8').strip() + + + + #below for testing purposes + # if len(str(path)) / 2 == 0: # just to create variation for now + # interlacing.append([path, 'interlaced']) + # else: + # interlacing.append([path, 'progressive']) + # print(interlacing) + return path, interlacing + + +# Function take list of of [path, interlaced], create ffmpeg commands (need to adjust interlacing and service file path per command) (may need to create servicecopy folder before ffmpeg runs), return list of commands + + +# Overwrite servicecopies, may need to add flag to ffmpeg to do this (-y?) +def make_commands(file: list[list[Path,str]]) -> list[list[str]]: + em_path = file[0] + em_path_str = str(em_path) + # PosixPath('test_ems/dir_3/data/EditMaster/sample_dig_3_em.mov') + base = em_path.parent.parent + ''' + May be useful to include a check for service copy directory already existing. + added because ffmpeg did not like the directory not already existing + ''' + dest = base / 'ServiceCopies' + subprocess.run(['mkdir', f'{dest}']) + sc_path_str = str(base / 'ServiceCopies' / em_path.name.replace("em.mov", "sc.mp4")) + if file[1] == 'interlaced': + cmd = ['ffmpeg', '-i', em_path_str, '-map', '0:v', '-map', '0:a', '-c:v', 'libx264', '-movflags', '+faststart', '-crf', '20', '-maxrate', '7.5M', '-bufsize', '7.5M', '-vf', 'yadif', '-c:a', 'aac', '-b:a', '320000', '-ar', '48000', sc_path_str] + else: + cmd = ['ffmpeg', '-i', em_path_str, '-map', '0:v', '-map', '0:a', '-c:v', 'libx264', '-movflags', '+faststart', '-crf', '20', '-maxrate', '7.5M', '-bufsize', '7.5M', '-c:a', 'aac', '-b:a', '320000', '-ar', '48000', sc_path_str] + + + return cmd + + + +# # Function take list of commands, run each command, return list of sc files +def make_sc(command: list[list[str]]) -> list[str]: + # logging.DEBUG(f"Running this command {c}") + subprocess.run(command) + sc = command[-1] + logging.info(f"{command[-1]} created") + + return sc + +# # Function take list of sc files, make rclone command, return list of commands +def make_rclone(file: str, dest: str) -> list[list[str]]: + fn = Path(file).name + rc = ['rclone', 'copyto', file, f'{dest}/{fn}', '-P'] + + # print(commands) + return rc + +# # Function take list of rclone commands, run each, return none +def run_rclone(command: list[str]) -> None: + logging.info(f"transferring {command[2]}") + subprocess.run(command) + logging.info(f"{command[2]} has been transferred") + + return None + +def main(): + source = parse_args().source + dest = parse_args().dest + ems = get_em(source) + for em in ems: + em_path = find_interlace(em) + ff_cmds = make_commands(em_path) + if Path(ff_cmds[-1]).exists(): + continue + print(em, ff_cmds[-1]) + if not str(em.name).startswith('myd'): + continue + sc = make_sc(ff_cmds) + rc_cmds = make_rclone(ff_cmds[-1],dest) + + run_rclone(rc_cmds) + # sc + + # for cmd in ff_cmds: + # print(cmd[-1]) + + + + + +if __name__ == "__main__": + main() diff --git a/tests/test_bd_validator.py b/tests/test_bd_validator.py index 982b60e..24e3ffe 100644 --- a/tests/test_bd_validator.py +++ b/tests/test_bd_validator.py @@ -2,7 +2,7 @@ import pytest -import born_digital_docs_scripts.bd_validator as bv +import born_digital_docs_scripts.lint_bdami as bd @pytest.fixture @@ -12,22 +12,22 @@ def good_package(): @pytest.fixture def good_structure(good_package): - return bv.get_structure(good_package) + return bd.get_structure(good_package) def test_is_package_bag(good_package): - result = bv.is_valid_bag(good_package) + result = bd.is_valid_bag(good_package) assert result is True def test_expected_folders_present(good_structure): - result = bv.valid_structure(good_structure) + result = bd.valid_structure(good_structure) assert result def test_warning_unexpected_folder(good_structure): good_structure.append(Path("unknown_folder")) # not sure if this is correct - result = bv.valid_structure(good_structure) + result = bd.valid_structure(good_structure) assert not result diff --git a/tests/test_lint_bdami.py b/tests/test_lint_bdami.py new file mode 100644 index 0000000..df11e54 --- /dev/null +++ b/tests/test_lint_bdami.py @@ -0,0 +1,88 @@ +from pathlib import Path + +import pytest + +import born_digital_docs_scripts.lint_bdami as bd + + +@pytest.fixture +def good_package(tm_path: Path): + pkg = tmp_path.joinpath("fixtures/simple_bdami_pk") + #pkg = tmp_path.joinpath("fixtures/ncov1234") + + ao_folder = pkg.joinpath("data/ArchiveOriginals") + ao_folder.mkdir(parents=True) + #here add a fake video but also a folder with a couple levels + + em_folder = pkg.joinpath("data/EditMasters") + em_folder.mkdir(parents=True) + + sc_folder = pkg.joinpath("data/ServiceCopies") + sc_folder.mkdir(parents=True) + + + ao_filepath = ao_folder.joinpath("myd_mgzidf123456_v01_ao.mp4") + ao_folderpath = ao_folder.joinpath("/myd_mgzidf123456_v01_ao/CLIPS") + ao_mxf = ao_folderpath.joinpath("myd_mgzidf123456_v01_ao.mp4") + ao_xml = ao_folderpath.joinpath("myd_mgzidf123456_v01_ao.xml") + ao_bpav = + + + em_filepath = em_folder.joinpath("myd_mgzidf123456_v01_em.mov") + sc_filepath = sc_folder.joinpath("myd_mgzidf123456_v01_sc.mp4") + + for file in [ + ao_filepath, + em_filepath, + sc_filepath, + (pkg/"bagit.txt"), + (pkg/"manifest-md5.txt") + ] + + return pkg + + +@pytest.fixture +def good_structure(good_package): + return bd.get_structure(good_package) + + +def test_is_package_bag(good_package): + result = bd.is_valid_bag(good_package) + assert result is True + + +def test_expected_folders_present(good_structure): + result = bd.valid_structure(good_structure) + assert result + + +def test_warning_unexpected_folder(good_structure): + good_structure.append(Path("unknown_folder")) # not sure if this is correct + result = bd.valid_structure(good_structure) + assert not result + + +def test_required_folders_present(good_structure): + # do we have these? + assert False + + +def test_warn_on_required_folders_missing(good_structure): + # do we have these? + assert False + + # def test_expected_folders_match_package_contents(good_package): + # present = bv.get_structure(good_package) + assert result + + +# filetypes = {'ArchiveOriginals':'ao', 'EditMasters':'em','ServiceCopies':'sc','Images':['.jpg','.JPEG','.tif','.tiff'],'Transcripts':['.pdf'],'Captions','Releases', 'Project Files'} + +# @pytest.parametrize(filetypes) +# def test_warn_on_folder_file_mismatch(good_package, filetypes): +# corrupt one folder at a time and get the right warning message + +# def arguments_capture_valid_package_path(good_package) + +# def arguments_capture_valid_directory_paths(good_package)