diff --git a/LabGym/logging.yaml b/LabGym/logging.yaml index ba646688..aea270e6 100644 --- a/LabGym/logging.yaml +++ b/LabGym/logging.yaml @@ -16,7 +16,10 @@ version: 1 formatters: myformat: datefmt: "%Y-%m-%d %H:%M:%S" - format: "%(asctime)s\t%(levelname)s\t[%(thread)d:%(name)s:%(module)s:%(lineno)d]\t%(message)s" + + # format: "%(asctime)s\t%(levelname)s\t[%(thread)d:%(name)s:%(module)s:%(lineno)d]\t%(message)s" + format: "%(asctime)s\t%(levelname)s\t[%(module)s:%(lineno)d]\t%(message)s" + mycompactformat: datefmt: "%H:%M:%S" format: "%(asctime)s\t%(levelname)s\t%(message)s" diff --git a/LabGym/myargparse.py b/LabGym/myargparse.py index 6b52f0a4..6a80088d 100644 --- a/LabGym/myargparse.py +++ b/LabGym/myargparse.py @@ -27,7 +27,7 @@ # (none) # Local application/library specific imports. -from LabGym import __version__ as version +from LabGym import __version__, pkghash # result is a dict with keys that are string, and vals that are @@ -176,7 +176,8 @@ def parse_args() -> ResultType: elif arg in ['--version']: # Print version msg to stdout and exit 0. - print(f'version: {version}') + version_with_hash = pkghash.labgym_version_with_hash() + print(f'version: {version_with_hash}') sys.exit() elif arg == '--': diff --git a/LabGym/pkghash/__init__.py b/LabGym/pkghash/__init__.py new file mode 100644 index 00000000..ccb0fd3b --- /dev/null +++ b/LabGym/pkghash/__init__.py @@ -0,0 +1,29 @@ +"""Support hash-included version reporting for LabGym. + +Provide functions to support hash-included version reporting for the +LabGym package. + +The LabGym package defines __version__ in LabGym/__init__.py. + +Example + from LabGym import __version__, pkghash + # __version__ is like '2.9.7' + + version_with_hash = pkghash.labgym_version_with_hash() + # version_with_hash is like '2.9.7 (a48c52287fc078897a30f19b05f1c12a)' + +Notes +* These formats were considered... + 2.9.6.7b2c + 2.9.6+7b2c + 2.9.6 (7b2c) + 2.9.6 (hash: 7b2c) + +* One way to silence the LabGym.pkghash.hash debug messages is to set + the logger's level to INFO, by modifying ~/.labgym/logging.yaml, in + loggers, like + LabGym.pkghash.hash: + level: INFO +""" + +from .lookup import labgym_version_with_hash diff --git a/LabGym/pkghash/hash.py b/LabGym/pkghash/hash.py new file mode 100644 index 00000000..08cf2e05 --- /dev/null +++ b/LabGym/pkghash/hash.py @@ -0,0 +1,182 @@ +"""Support hash-included version reporting for LabGym. + +Provide functions to support hash-included version reporting for the +LabGym package. + +Public Functions + get_hashval -- Return a hashval (a signature) of the contents of the folder. +""" + +# Allow use of newer syntax Python 3.10 type hints in Python 3.9. +from __future__ import annotations + +# Standard library imports. +from contextlib import contextmanager +import hashlib +import itertools +import logging +import os +from pathlib import Path +import re +import sys +import time + +# Related third party imports. +# None + +# Local application/library specific imports. +# None + + +logger = logging.getLogger(__name__) + +_cached_hashvals = {} + + +# from contextlib import chdir # available in Python 3.11+ +# To support earlier Python, implement chdir +@contextmanager +def chdir(path): + """Temporarily chdir.""" + old_cwd = os.getcwd() + try: + os.chdir(path) + yield + finally: + os.chdir(old_cwd) # restore + + +def get_hashval(folder: str) -> str: + """Return a hashval (a signature) of the contents of the folder. + + On first call to this function with this folder, compute the hash + value string, and cache it in a module-level dictionary variable. + On subsequent calls to this function with the same folder, return + the cached hash value string. + """ + + folder_pathobj = Path(folder).resolve() + + hashval = _cached_hashvals.get(folder_pathobj) + + if hashval is not None: + return hashval + + hashval = _walk_and_hash(folder_pathobj) + _cached_hashvals.update({folder_pathobj: hashval}) + + return hashval + + +def _walk_and_hash(folder: Path) -> str: + """Walk a folder and return the accumulated the MD5 hash for files. + + * Skip dirs that don't have __init__.py. + All files of interest are in package dirs, right? + + Actually, no. Generally, a package dir might have source-code + in package subdirs that are not actually traditional packages + themselves. (Implicit Namespace Packages) + + A different approach would be to create an empty file '.nohash' + in dirs that should be skipped, and test for its existence + during the walk. + + * Skip top-level dirs detectron2, detectors, models. + + * Skip non-py-files. + """ + + hasher = hashlib.md5() + + with chdir(folder): + for root, dirs, files in os.walk('.'): + # walk in "sorted" order + dirs.sort() + files.sort() + + # Skip dirs that don't have __init__.py. + if '__init__.py' not in files: + # this is not a "traditional" package dir... + # skip further descent + dirs.clear() + # skip processing files in this dir + continue + + # Skip top-level dirs detectron2, detectors, models. + if root == '.': + for name in ['detectron2', 'detectors', 'models']: + if name in dirs: + dirs.remove(name) + + for file in files: + # Skip non-py-files. + if not file.endswith('.py'): + continue + + file_path = os.path.join(root, file) + _add_file_to_hash(hasher, file_path) + + hashval = hasher.hexdigest() + logger.debug('%s: %r', 'hashval', hashval) + return hashval + + +def _add_file_to_hash(hasher, file_path: str) -> None: + """Add the filepath and the file content to the hash. + + Hash is sensitive to + filename case -- foo.py is different from Foo.py + file rename -- foo.py is different from goo.py + + * replace leading tabs with 4-spaces, + replace trailing \r\n with \n + Why? To normalize the content, as developers might run a genuine, + but smudge-filtered copy. + """ + + filename = Path(file_path).as_posix() # forward slash, even on Windows + + hasher.update(filename.encode('utf-8')) + + try: + # with open(file_path, 'rb') as f: + # # Read file in 8KB chunks to handle large files efficiently + # while chunk := f.read(8192): + # hasher.update(chunk) + + with open(file_path, 'r', encoding='utf-8') as f: + # Read file in 200-line chunks to handle large files efficiently + for chunk in _myreadlines(f): + for i, line in enumerate(chunk): + line = _expand(line) # expand leading tabs to 4 spaces + # replace trailing space, incl LF or CRLF, with LF + line = line.rstrip() + '\n' + chunk[i] = line + + hasher.update(''.join(chunk).encode('utf-8')) + + except (OSError, IOError) as e: + logger.warning(f'Trouble...{e}') + + logger.debug('%s: %r', 'filename, hasher.hexdigest()', + (filename, hasher.hexdigest())) + + +def _myreadlines(f, n=200): + """Read n lines from f, and yield a list of the n strings.""" + while True: + nline_chunk = list(itertools.islice(f, n)) + if not nline_chunk: + break + yield nline_chunk + + +def _expand(line, n=4): + """Expand leading tabs to n spaces.""" + match = re.match(r'^(\t+)', line) + if match: + leading_tabs = match.group(0) + spaces = ' ' * len(leading_tabs) * n + return spaces + line[len(leading_tabs):] + return line diff --git a/LabGym/pkghash/lookup.py b/LabGym/pkghash/lookup.py new file mode 100644 index 00000000..fbb80329 --- /dev/null +++ b/LabGym/pkghash/lookup.py @@ -0,0 +1,59 @@ +"""Support hash-included version reporting for LabGym. + +Provide functions to support hash-included version reporting for the +LabGym package. + +Public Functions + labgym_version_with_hash -- Return a hash-embellished version string + for LabGym. + +The term "hash" is commonly used both as verb and as noun. +To reduce confusion, these terms may be used to improve clarity. + get_hashval -- the function (the verb) + hashval -- the return string value (the noun) from the function + +Examples + pkghash.labgym_version_with_hash() + returns a str like '2.9.6 (be19e53c16ff24a33c48b517d870147b)' + +Why? Isn't LabGym.__version__ sufficiently identifying? +The purpose of this "enhanced" version-string is to make it possible +to discern when the user or developer is running customized/modified LabGym. +""" + +# Allow use of newer syntax Python 3.10 type hints in Python 3.9. +from __future__ import annotations + +# Standard library imports. +import logging +from pathlib import Path +from typing import List + +# Related third party imports. +try: + # tomllib is included in the Python Standard Library since version 3.11 + import tomllib # type: ignore +except ModuleNotFoundError: + import tomli as tomllib # A lil' TOML parser + +# Local application/library specific imports. +from .hash import get_hashval +import LabGym + + +logger = logging.getLogger(__name__) + + +labgym_package_folder = str(Path(LabGym.__file__).parent) +version = LabGym.__version__ + + +def labgym_version_with_hash() -> str: + """Return a hash-embellished version string for LabGym.""" + + hashval: str = get_hashval(labgym_package_folder) + + version_with_longhash = f'{version} ({hashval})' + logger.debug('%s: %r', 'version_with_longhash', version_with_longhash) + + return version_with_longhash diff --git a/LabGym/probes.py b/LabGym/probes.py index 7e6d643a..7ff92f83 100644 --- a/LabGym/probes.py +++ b/LabGym/probes.py @@ -29,7 +29,7 @@ import packaging # Core utilities for Python packages # Local application/library specific imports. -from LabGym import __version__ as version +from LabGym import __version__, pkghash from LabGym import central_logging, registration from LabGym import config from LabGym import userdata_survey @@ -113,7 +113,7 @@ def probes() -> None: # then expire or void the "skip-henceforth" behavior. skip_pass_void = (reginfo is not None and reginfo.get('name') == 'skip' - and packaging.version.parse(version) + and packaging.version.parse(__version__) != packaging.version.parse(reginfo.get('version')) ) @@ -171,6 +171,8 @@ def get_context(anonymous: bool=False) -> dict: except Exception: reginfo_uuid = None + version_with_hash = pkghash.labgym_version_with_hash() + result = { 'schema': 'context 2025-08-10', @@ -180,7 +182,7 @@ def get_context(anonymous: bool=False) -> dict: 'python_version': platform.python_version(), # LabGym sw - 'version': version, # LabGym version + 'version': version_with_hash, # LabGym version # User info # 'username': getpass.getuser(), diff --git a/LabGym/registration.py b/LabGym/registration.py index 4009c6db..e8eb5087 100644 --- a/LabGym/registration.py +++ b/LabGym/registration.py @@ -83,7 +83,7 @@ import yaml # PyYAML, YAML parser and emitter for Python # Local application/library specific imports. -from LabGym import __version__ as version +from LabGym import __version__ from LabGym import central_logging from LabGym import config from LabGym import mywx @@ -338,7 +338,7 @@ def register(central_logger=None) -> None: 'platform': platform.platform(), 'node': platform.node(), - 'version': version, # LabGym version + 'version': __version__, # LabGym version }) try: diff --git a/LabGym/tests/test_myargparse.py b/LabGym/tests/test_myargparse.py index a4fcb9be..b4cc31d7 100644 --- a/LabGym/tests/test_myargparse.py +++ b/LabGym/tests/test_myargparse.py @@ -5,7 +5,7 @@ import pytest from LabGym import myargparse -from LabGym import __version__ as version +from LabGym import __version__ from .exitstatus import exitstatus @@ -89,13 +89,17 @@ def test_parse_args_version(monkeypatch, capsys): # Arrange monkeypatch.setattr(sys, 'argv', ['cmd', '--version']) + hashval = '4cf004ad24fce8272bfda213219707d5' + version_with_hash = f'{__version__} ({hashval})' + monkeypatch.setattr(myargparse.pkghash, 'labgym_version_with_hash', + lambda: version_with_hash) # Act, and assert raises(SystemExit) with pytest.raises(SystemExit) as e: result = myargparse.parse_args() # Assert - assert capsys.readouterr().out == f'version: {version}\n' + assert capsys.readouterr().out == f'version: {version_with_hash}\n' assert exitstatus(e.value) == 0 diff --git a/LabGym/tests/test_pkghash.py b/LabGym/tests/test_pkghash.py new file mode 100644 index 00000000..26563c2b --- /dev/null +++ b/LabGym/tests/test_pkghash.py @@ -0,0 +1,10 @@ +from LabGym import pkghash + +def test_pkghash(capsys): + # Arrange + # Act + version_with_hash = pkghash.labgym_version_with_hash() + # Assert + + with capsys.disabled(): + print('\n' f'version_with_hash: {version_with_hash}') diff --git a/tests/test_load.py b/tests/test_load.py index 16696ff1..2333da76 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -9,44 +9,95 @@ The final test, test_remainder(), imports each of the remaining top- level py-files. + The strength of this catch-all final test is that if a new top-level py-file is introduced, full coverage is preserved and this test-file -may still be suitable. +may not require updating. """ import glob import importlib import logging import os +from pathlib import Path import pprint import sys import textwrap +import LabGym submodules = [] +def test_cwd(): + """ + Test that cwd is the LabGym package's parent dir (the repo dir). + + This condition isn't strictly necessary for these unit tests, but a + a failed assert would indicate either (a) noxfile.py was moved (the + starting cwd is same as location of noxfile.py), or, (b) some other + unit test in the session did a chdir (possibly by exercising code + that used an os.chdir instead of a contextlib.chdir). + + As currently organized, the repo dir contains noxfile.py and the + Labgym package dir. + + "In github running nox on a PR commit, what is my cwd? Is it my + repo dir?" + + Google AI Overview responds: + When running Nox in a GitHub Actions workflow on a PR commit, your + current working directory (cwd) is the root of your repository on + the runner's machine. + + The working directory in GitHub Actions is a specific path on the + runner, typically /home/runner/work// on Linux, which is + where the actions/checkout action places your code by default. + + Nox itself also has a default behavior: it automatically changes its + working directory to the directory containing the noxfile.py script + before running any sessions. + + Therefore: + * The initial working directory for the GitHub Actions step + running Nox is the repository root. + * Once the nox command executes, the working directory for the + individual Nox sessions (the Python functions in your + noxfile.py) will be the directory where the noxfile.py is + located. + + This means you can generally rely on paths within your Nox sessions + being relative to the noxfile.py location, which is usually at the + root of your repository (unless you've specified a different + location in your workflow or using the --noxfile argument). + """ + assert os.path.isfile('noxfile.py') + assert os.getcwd() == os.path.dirname(os.path.dirname(LabGym.__file__)) + + def test_import_LabGym_package(): """Load LabGym.__init__.py and get a list of submodules.""" - import LabGym - - # Confirm the assumption that under pytest, cwd is LabGym package's - # parent dir (the repo dir). - assert os.getcwd() == os.path.dirname(os.path.dirname(LabGym.__file__)) # Prepare a list of all submodule py-files in LabGym dir, but not subdirs. pyfiles = glob.glob(os.path.join(os.path.dirname(LabGym.__file__), '*.py')) pyfiles.sort() # result from glob.glob() isn't sorted submodules.extend([os.path.basename(f).removesuffix('.py') for f in pyfiles]) - logging.debug('%s:\n%s', 'Milepost 0, submodules', - textwrap.indent(pprint.pformat(submodules), ' ')) # Remove __init__.py. There's no need to challenge it, as it was # already loaded by the "import LabGym" statement at the beginning - # of this in this test. + # of this in this test. And furthermore, as it is already loaded, + # importing it (again) would not reload it. OTOH, if it wasn't + # specifically removed here, there would be no harm done. submodules.remove('__init__') + # Add subpackages? This requires maintenance... find subdirs with + # py-files and load them? Loading the package doesn't necessarily + # load all of the package's py-files... (that's why we are + # attempting to load all of LabGym package's first-generation + # py-files) + submodules.extend(['detectron2', 'mywx', 'pkghash', 'selftest']) + def test_imports_with_sysargv_initialized(monkeypatch): """Test that some module imports don't raise exceptions. @@ -67,10 +118,10 @@ def test_imports_with_sysargv_initialized(monkeypatch): submodules.remove('myargparse') -def test_remainder(): +def test_remainder(capsys): """Test that imports for the remaining modules don't raise exceptions.""" - logging.debug('%s:\n%s', 'Milepost 1, submodules', - textwrap.indent(pprint.pformat(submodules), ' ')) + with capsys.disabled(): + print(f"submodules: {textwrap.indent(pprint.pformat(submodules), ' ')}") while len(submodules) > 0: submodule = submodules[0]