From f0826eaaf74167da2d7e4e95c50268ad6b74b515 Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Thu, 1 Jan 2026 12:22:08 -0800 Subject: [PATCH 01/14] Implement and use pkghash.make_version(). --- LabGym/gui_main.py | 5 +++-- LabGym/myargparse.py | 5 +++-- LabGym/pkghash/__init__.py | 18 ++++++++++++++++++ LabGym/pkghash/hash.py | 2 ++ LabGym/pkghash/lookup.py | 15 +++++++++++++++ LabGym/probes.py | 8 +++++--- LabGym/registration.py | 4 ++-- 7 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 LabGym/pkghash/__init__.py create mode 100644 LabGym/pkghash/hash.py create mode 100644 LabGym/pkghash/lookup.py diff --git a/LabGym/gui_main.py b/LabGym/gui_main.py index 8f32a4fc..3316818e 100644 --- a/LabGym/gui_main.py +++ b/LabGym/gui_main.py @@ -34,7 +34,7 @@ import wx.lib.agw.hyperlink as hl # Local application/library specific imports. -from LabGym import __version__ +from LabGym import __version__, pkghash from .gui_utils import add_or_select_notebook_page logger.debug('importing %s ...', '.gui_categorizer') from .gui_categorizer import PanelLv2_GenerateExamples,PanelLv2_TrainCategorizers,PanelLv2_SortBehaviors,PanelLv2_TestCategorizers @@ -367,7 +367,8 @@ class MainFrame(wx.Frame): """Main frame and its notebook.""" def __init__(self): - super().__init__(None, title=f'LabGym v{__version__}') + version_with_hash = pkghash.make_version(__version__) + super().__init__(None, title=f'LabGym v{version_with_hash}') self.SetSize((1000, 600)) diff --git a/LabGym/myargparse.py b/LabGym/myargparse.py index 6b52f0a4..abe82df5 100644 --- a/LabGym/myargparse.py +++ b/LabGym/myargparse.py @@ -27,7 +27,7 @@ # (none) # Local application/library specific imports. -from LabGym import __version__ as version +from LabGym import __version__, pkghash # result is a dict with keys that are string, and vals that are @@ -176,7 +176,8 @@ def parse_args() -> ResultType: elif arg in ['--version']: # Print version msg to stdout and exit 0. - print(f'version: {version}') + version_with_hash = pkghash.make_version(__version__) + print(f'version: {version_with_hash}') sys.exit() elif arg == '--': diff --git a/LabGym/pkghash/__init__.py b/LabGym/pkghash/__init__.py new file mode 100644 index 00000000..27ab070e --- /dev/null +++ b/LabGym/pkghash/__init__.py @@ -0,0 +1,18 @@ +""" +This package supports an enhanced "version" reporting. + +The LabGym package defines its __version__ in LabGym/__init__.py. +It's available to LabGym modules as + from . import __version__ + # or, from LabGym import __version__ + +To support enhanced version reporting that includes hash info for +integrity checking, + from . import __version__, pkghash + # or, from LabGym import __version__, pkghash + + version_with_hash = pkghash.make_version(__version__) +""" + +from .hash import get_hash +from .lookup import lookup, make_version diff --git a/LabGym/pkghash/hash.py b/LabGym/pkghash/hash.py new file mode 100644 index 00000000..ab36f397 --- /dev/null +++ b/LabGym/pkghash/hash.py @@ -0,0 +1,2 @@ +def get_hash(): + return '2718281828459045' diff --git a/LabGym/pkghash/lookup.py b/LabGym/pkghash/lookup.py new file mode 100644 index 00000000..35295e55 --- /dev/null +++ b/LabGym/pkghash/lookup.py @@ -0,0 +1,15 @@ +""" +The term "hash" is commonly used both as verb and as noun. +Here, to avoid confusion, + hash is the name of the package module implementing the function + get_hash is the function (the verb) + hash_value is the return value (the noun) +""" +from . import get_hash + +def lookup(): + pass + +def make_version(version): + hash_value = get_hash() + return f'{version}+{hash_value[:4]}' diff --git a/LabGym/probes.py b/LabGym/probes.py index 7e6d643a..e040f3c0 100644 --- a/LabGym/probes.py +++ b/LabGym/probes.py @@ -29,7 +29,7 @@ import packaging # Core utilities for Python packages # Local application/library specific imports. -from LabGym import __version__ as version +from LabGym import __version__, pkghash from LabGym import central_logging, registration from LabGym import config from LabGym import userdata_survey @@ -113,7 +113,7 @@ def probes() -> None: # then expire or void the "skip-henceforth" behavior. skip_pass_void = (reginfo is not None and reginfo.get('name') == 'skip' - and packaging.version.parse(version) + and packaging.version.parse(__version__) != packaging.version.parse(reginfo.get('version')) ) @@ -171,6 +171,8 @@ def get_context(anonymous: bool=False) -> dict: except Exception: reginfo_uuid = None + version_with_hash = pkghash.make_version(__version__) + result = { 'schema': 'context 2025-08-10', @@ -180,7 +182,7 @@ def get_context(anonymous: bool=False) -> dict: 'python_version': platform.python_version(), # LabGym sw - 'version': version, # LabGym version + 'version': version_with_hash, # LabGym version # User info # 'username': getpass.getuser(), diff --git a/LabGym/registration.py b/LabGym/registration.py index 4009c6db..e8eb5087 100644 --- a/LabGym/registration.py +++ b/LabGym/registration.py @@ -83,7 +83,7 @@ import yaml # PyYAML, YAML parser and emitter for Python # Local application/library specific imports. -from LabGym import __version__ as version +from LabGym import __version__ from LabGym import central_logging from LabGym import config from LabGym import mywx @@ -338,7 +338,7 @@ def register(central_logger=None) -> None: 'platform': platform.platform(), 'node': platform.node(), - 'version': version, # LabGym version + 'version': __version__, # LabGym version }) try: From e221efec65208272e9479dac23c27dc692eb21cb Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Sat, 3 Jan 2026 11:47:47 -0800 Subject: [PATCH 02/14] Implement the hashing. --- LabGym/gui_main.py | 2 +- LabGym/myargparse.py | 2 +- LabGym/pkghash/__init__.py | 31 +++++---- LabGym/pkghash/hash.py | 128 ++++++++++++++++++++++++++++++++++- LabGym/pkghash/lookup.py | 118 +++++++++++++++++++++++++++++--- LabGym/pkghash/versions.toml | 5 ++ LabGym/probes.py | 3 +- 7 files changed, 260 insertions(+), 29 deletions(-) create mode 100644 LabGym/pkghash/versions.toml diff --git a/LabGym/gui_main.py b/LabGym/gui_main.py index 3316818e..0054a42a 100644 --- a/LabGym/gui_main.py +++ b/LabGym/gui_main.py @@ -367,7 +367,7 @@ class MainFrame(wx.Frame): """Main frame and its notebook.""" def __init__(self): - version_with_hash = pkghash.make_version(__version__) + version_with_hash = pkghash.labgym_version_with_hash() super().__init__(None, title=f'LabGym v{version_with_hash}') self.SetSize((1000, 600)) diff --git a/LabGym/myargparse.py b/LabGym/myargparse.py index abe82df5..6a80088d 100644 --- a/LabGym/myargparse.py +++ b/LabGym/myargparse.py @@ -176,7 +176,7 @@ def parse_args() -> ResultType: elif arg in ['--version']: # Print version msg to stdout and exit 0. - version_with_hash = pkghash.make_version(__version__) + version_with_hash = pkghash.labgym_version_with_hash() print(f'version: {version_with_hash}') sys.exit() diff --git a/LabGym/pkghash/__init__.py b/LabGym/pkghash/__init__.py index 27ab070e..e8914c99 100644 --- a/LabGym/pkghash/__init__.py +++ b/LabGym/pkghash/__init__.py @@ -1,18 +1,23 @@ -""" -This package supports an enhanced "version" reporting. +"""Support hash-included version reporting for LabGym. + +Provide functions to support hash-included version reporting for the +LabGym package. + +The LabGym package defines __version__ in LabGym/__init__.py. -The LabGym package defines its __version__ in LabGym/__init__.py. -It's available to LabGym modules as - from . import __version__ - # or, from LabGym import __version__ +Example + from LabGym import __version__, pkghash + # __version__ is like '2.9.6' -To support enhanced version reporting that includes hash info for -integrity checking, - from . import __version__, pkghash - # or, from LabGym import __version__, pkghash + version_with_hash = pkghash.labgym_version_with_hash() + # version_with_hash is like '2.9.6 (7b2c)' - version_with_hash = pkghash.make_version(__version__) +Development Notes + These formats were considered: + 2.9.6.7b2c + 2.9.6+7b2c + 2.9.6 (7b2c) + 2.9.6 (hash: 7b2c) """ -from .hash import get_hash -from .lookup import lookup, make_version +from .lookup import labgym_version_with_hash diff --git a/LabGym/pkghash/hash.py b/LabGym/pkghash/hash.py index ab36f397..55ef7488 100644 --- a/LabGym/pkghash/hash.py +++ b/LabGym/pkghash/hash.py @@ -1,2 +1,126 @@ -def get_hash(): - return '2718281828459045' +"""Support hash-included version reporting for LabGym. + +Provide functions to support hash-included version reporting for the +LabGym package. + +Public Functions + get_hashval -- Return a hashval (a signature) of the contents of the folder. +""" + +# Allow use of newer syntax Python 3.10 type hints in Python 3.9. +from __future__ import annotations + +# Standard library imports. +import hashlib +import logging +import os +from pathlib import Path + +# Related third party imports. +# None + +# Local application/library specific imports. +# None + + +logger = logging.getLogger(__name__) + +_cached_hashvals = {} + + +def get_hashval(folder: str) -> str: + """Return a hashval (a signature) of the contents of the folder. + + On first call to this function with this folder, compute the hash + value string, and cache it in a module-level dictionary variable. + On subsequent calls to this function with the same folder, return + the cached hash value string. + """ + + folder_pathobj = Path(folder).resolve() + + hashval = _cached_hashvals.get(folder_pathobj) + + if hashval is not None: + return hashval + + hashval = _walk_and_hash(folder_pathobj) + _cached_hashvals.update({folder_pathobj: hashval}) + + return hashval + + +def _walk_and_hash(folder: Path) -> str: + """Walk a folder and return the accumulated the MD5 hash for files. + + * Skip dirs that don't have __init__.py. + All files of interest are in package dirs, right? + + Actually, no. Generally, a package dir might have source-code + in package subdirs that are not actually traditional packages + themselves. (Implicit Namespace Packages) + + A different approach would be to create an empty file '.nohash' + in dirs that should be skipped, and test for its existence + during the walk. + + * Skip top-level dirs detectron2, detectors, models. + + * Skip non-py-files. + """ + + hasher = hashlib.md5() + os.chdir(folder) + + for root, dirs, files in os.walk('.'): + # walk "sorted" + dirs.sort() + files.sort() + + # Skip dirs that don't have __init__.py. + if '__init__.py' not in files: + # this is not a "traditional" package dir... + # skip further descent + dirs.clear() + # skip processing files in this dir + continue + + # Skip top-level dirs detectron2, detectors, models. + if root == '.': + for name in ['detectron2', 'detectors', 'models']: + if name in dirs: + dirs.remove(name) + + for file in files: + # Skip non-py-files. + if not file.endswith('.py'): + continue + + file_path = os.path.join(root, file) + _add_file_to_hash(hasher, file_path) + + hashval = hasher.hexdigest() + logger.debug('%s: %r', 'hashval', hashval) + return hashval + + +def _add_file_to_hash(hasher, file_path: str) -> None: + """Add the filepath and the file content to the hash. + + Hash is sensitive to + filename case -- foo.py is different from Foo.py + file rename -- foo.py is different from goo.py + """ + + filename = Path(file_path).as_posix() # forward slash, even on Windows + logger.debug('%s: %r', 'filename', filename) + + hasher.update(filename.encode('utf-8')) + + try: + with open(file_path, 'rb') as f: + # Read file in 8KB chunks to handle large files efficiently + while chunk := f.read(8192): + hasher.update(chunk) + except (OSError, IOError) as e: + logger.warning(f'Trouble...{e}') diff --git a/LabGym/pkghash/lookup.py b/LabGym/pkghash/lookup.py index 35295e55..1031204f 100644 --- a/LabGym/pkghash/lookup.py +++ b/LabGym/pkghash/lookup.py @@ -1,15 +1,111 @@ -""" +"""Support hash-included version reporting for LabGym. + +Provide functions to support hash-included version reporting for the +LabGym package. + +Public Functions + labgym_version_with_hash -- Return a hash-included version string + for LabGym. + The term "hash" is commonly used both as verb and as noun. -Here, to avoid confusion, - hash is the name of the package module implementing the function - get_hash is the function (the verb) - hash_value is the return value (the noun) +To reduce confusion, these terms may be used to improve clarity. + get_hashval -- the function (the verb) + hashval -- the return string value (the noun) from the function + +Examples + pkghash.labgym_version_with_hash() + returns a str like '2.9.6+be19', + or returns '2.9.6' if the version+hashval is in known_versions. + + pkghash.labgym_version_with_hash(maxlen=6) + returns a str like '2.9.6+be19e5', + or returns '2.9.6' if the version+hashval is in known_versions. + + pkghash.labgym_version_with_hash(maxlen=None, + suppress_if_known=False) + returns a str like '2.9.6+be19e53c16ff24a33c48b517d870147b' + even if version+hashval is in known_versions. + +Why? Isn't LabGym.__version__ sufficiently identifying? +The purpose of this "enhanced" version-string is to make obvious +when the user or developer is running customized/modified LabGym. + +This helps to +1. make developer usage scrubbable from LabGym from overall usage + stats. +2. avoid developer-investigation effort for behavior of a + customized LabGym. """ -from . import get_hash -def lookup(): - pass +# Allow use of newer syntax Python 3.10 type hints in Python 3.9. +from __future__ import annotations + +# Standard library imports. +import logging +from pathlib import Path +from typing import List + +# Related third party imports. +try: + # tomllib is included in the Python Standard Library since version 3.11 + import tomllib # type: ignore +except ModuleNotFoundError: + import tomli as tomllib # A lil' TOML parser + +# Local application/library specific imports. +from .hash import get_hashval +import LabGym + + +logger = logging.getLogger(__name__) + + +labgym_package_folder = str(Path(LabGym.__file__).parent) +version = LabGym.__version__ +versions_file = Path(__file__).parent / 'versions.toml' + + +def _get_known_versions() -> List[str]: + """Read list of known version+hash strings from toml-file, and return it.""" + + try: + with open(versions_file, 'rb') as f: + known_versions = tomllib.load(f).get('known_versions') + assert isinstance(known_versions, list) + except Exception as e: + # an unreadable toml-file produces an empty list + logger.warning(f'Trouble reading {versions_file}. {e}') + known_versions = [] + + logger.debug('%s: %r', 'known_versions', known_versions) + + return known_versions + + +def labgym_version_with_hash( + maxlen=4, + suppress_if_known=True, + ) -> str: + """Return a hash-included version string for LabGym. + + If this LabGym package's version_with_hash package matches a + reference value stored in versions.toml, then this LabGym is judged + genuine/unmodified, so the hashval suffix is suppressed (unless + suppress_if_known is True). + """ + + hashval: str = get_hashval(labgym_package_folder) + + known_enhanced_versions = _get_known_versions() + + if (f'{version} ({hashval})' in known_enhanced_versions + and suppress_if_known == True): + result = version # without hashval + + elif maxlen is not None: + result = f'{version} ({hashval[:maxlen]})' + else: + result = f'{version} ({hashval})' -def make_version(version): - hash_value = get_hash() - return f'{version}+{hash_value[:4]}' + logger.debug('%s: %r', 'result', result) + return result diff --git a/LabGym/pkghash/versions.toml b/LabGym/pkghash/versions.toml new file mode 100644 index 00000000..fa39341a --- /dev/null +++ b/LabGym/pkghash/versions.toml @@ -0,0 +1,5 @@ +known_versions = [ + # d41d... is the md5hash of zero bytes, so only matches no-content. + '2.9.6 (d41d8cd98f00b204e9800998ecf8427e)', + '2.9.6 (dfe2c280cb0038f808f13eb2cde9adb0)', +] diff --git a/LabGym/probes.py b/LabGym/probes.py index e040f3c0..1a32cab6 100644 --- a/LabGym/probes.py +++ b/LabGym/probes.py @@ -171,7 +171,8 @@ def get_context(anonymous: bool=False) -> dict: except Exception: reginfo_uuid = None - version_with_hash = pkghash.make_version(__version__) + version_with_hash = pkghash.labgym_version_with_hash( + maxlen=None, suppress_if_known=False) result = { 'schema': 'context 2025-08-10', From a5ac208928c3f62ee484428fbc93518b49250676 Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Sat, 3 Jan 2026 16:19:30 -0800 Subject: [PATCH 03/14] Set level for LabGym.pkghash.hash --- LabGym/logging.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/LabGym/logging.yaml b/LabGym/logging.yaml index ba646688..d38b1375 100644 --- a/LabGym/logging.yaml +++ b/LabGym/logging.yaml @@ -16,7 +16,11 @@ version: 1 formatters: myformat: datefmt: "%Y-%m-%d %H:%M:%S" - format: "%(asctime)s\t%(levelname)s\t[%(thread)d:%(name)s:%(module)s:%(lineno)d]\t%(message)s" + + # format: "%(asctime)s\t%(levelname)s\t[%(thread)d:%(name)s:%(module)s:%(lineno)d]\t%(message)s" + # format: "%(asctime)s\t%(levelname)s\t[%(name)s:%(module)s:%(lineno)d]\t%(message)s" + format: "%(asctime)s\t%(levelname)s\t[%(module)s:%(lineno)d]\t%(message)s" + mycompactformat: datefmt: "%H:%M:%S" format: "%(asctime)s\t%(levelname)s\t%(message)s" @@ -40,4 +44,7 @@ loggers: urllib3.connectionpool: level: INFO + LabGym.pkghash.hash: + level: INFO + disable_existing_loggers: false From f336012e0b252e7d90bae147b024fa369b719762 Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Tue, 6 Jan 2026 12:55:22 -0800 Subject: [PATCH 04/14] Improved robustness wrt CRLF and indent-style. --- LabGym/logging.yaml | 3 -- LabGym/pkghash/__init__.py | 42 +++++++++++++++++++++++++-- LabGym/pkghash/hash.py | 54 +++++++++++++++++++++++++++++++---- LabGym/pkghash/lookup.py | 25 ++++++++-------- LabGym/pkghash/versions.toml | 2 -- docs/contributing/internal.md | 11 +++++++ 6 files changed, 111 insertions(+), 26 deletions(-) diff --git a/LabGym/logging.yaml b/LabGym/logging.yaml index d38b1375..a66edc38 100644 --- a/LabGym/logging.yaml +++ b/LabGym/logging.yaml @@ -44,7 +44,4 @@ loggers: urllib3.connectionpool: level: INFO - LabGym.pkghash.hash: - level: INFO - disable_existing_loggers: false diff --git a/LabGym/pkghash/__init__.py b/LabGym/pkghash/__init__.py index e8914c99..d0b8a567 100644 --- a/LabGym/pkghash/__init__.py +++ b/LabGym/pkghash/__init__.py @@ -12,12 +12,50 @@ version_with_hash = pkghash.labgym_version_with_hash() # version_with_hash is like '2.9.6 (7b2c)' -Development Notes - These formats were considered: +Notes +* These formats were considered... 2.9.6.7b2c 2.9.6+7b2c 2.9.6 (7b2c) 2.9.6 (hash: 7b2c) + +* One way to silence the LabGym.pkghash.hash debug messages is to set + the logger's level to INFO, by modifying ~/.labgym/logging.yaml, in + loggers, like + LabGym.pkghash.hash: + level: INFO + +* As a final step before releasing a new version, the developer should + update versions.toml with the reference version+hashval. + + 1. run + LabGym --debug --version + which reports like + ... + DEBUG:LabGym.pkghash.lookup:version_with_longhash: '2.9.7 (a48c52287fc078897a30f19b05f1c12a)' + DEBUG:LabGym.pkghash.lookup:known_versions: ['2.9.6 (d41d8cd98f00b204e9800998ecf8427e)'] + DEBUG:LabGym.pkghash.lookup:result: '2.9.7 (a48c)' + version: 2.9.7 (a48c) + + 2. update versions.toml from old, like + known_versions = [ + '2.9.6 (d41d8cd98f00b204e9800998ecf8427e)', + ] + to new like + known_versions = [ + # '2.9.6 (d41d8cd98f00b204e9800998ecf8427e)', + '2.9.7 (a48c52287fc078897a30f19b05f1c12a)', + ] + + Should we keep old known_values in the list defined in versions.toml? + They won't be matched, because LabGym.__version__ is updated, but + they have some value, so, yes preserve them as a comment. + + What if this final step (update versions.toml) is inadvertently + skipped during a release? + Only that the user-visible id will contain the hashval even when + LabGym is clean... if there's no reference value to match, then it + can't be suppressed. In other words, a little extra visual noise. """ from .lookup import labgym_version_with_hash diff --git a/LabGym/pkghash/hash.py b/LabGym/pkghash/hash.py index 55ef7488..77957f4d 100644 --- a/LabGym/pkghash/hash.py +++ b/LabGym/pkghash/hash.py @@ -12,9 +12,13 @@ # Standard library imports. import hashlib +import itertools import logging import os from pathlib import Path +import re +import sys +import time # Related third party imports. # None @@ -73,7 +77,7 @@ def _walk_and_hash(folder: Path) -> str: os.chdir(folder) for root, dirs, files in os.walk('.'): - # walk "sorted" + # walk in "sorted" order dirs.sort() files.sort() @@ -110,17 +114,55 @@ def _add_file_to_hash(hasher, file_path: str) -> None: Hash is sensitive to filename case -- foo.py is different from Foo.py file rename -- foo.py is different from goo.py + + * replace leading tabs with 4-spaces, + replace trailing \r\n with \n + Why? To normalize the content, as developers might run a genuine, + but smudge-filtered copy. """ filename = Path(file_path).as_posix() # forward slash, even on Windows - logger.debug('%s: %r', 'filename', filename) hasher.update(filename.encode('utf-8')) try: - with open(file_path, 'rb') as f: - # Read file in 8KB chunks to handle large files efficiently - while chunk := f.read(8192): - hasher.update(chunk) + # with open(file_path, 'rb') as f: + # # Read file in 8KB chunks to handle large files efficiently + # while chunk := f.read(8192): + # hasher.update(chunk) + + with open(file_path, 'r') as f: + # Read file in 200-line chunks to handle large files efficiently + for chunk in _myreadlines(f): + for i, line in enumerate(chunk): + line = _expand(line) # expand leading tabs to 4 spaces + # replace trailing space, incl LF or CRLF, with LF + line = line.rstrip() + '\n' + chunk[i] = line + + hasher.update(''.join(chunk).encode('utf-8')) + except (OSError, IOError) as e: logger.warning(f'Trouble...{e}') + + logger.debug('%s: %r', 'filename, hasher.hexdigest()', + (filename, hasher.hexdigest())) + + +def _myreadlines(f, n=200): + """Read n lines from f, and yield a list of the n strings.""" + while True: + nline_chunk = list(itertools.islice(f, n)) + if not nline_chunk: + break + yield nline_chunk + + +def _expand(line, n=4): + """Expand leading tabs to n spaces.""" + match = re.match(r'^(\t+)', line) + if match: + leading_tabs = match.group(0) + spaces = ' ' * len(leading_tabs) * n + return spaces + line[len(leading_tabs):] + return line diff --git a/LabGym/pkghash/lookup.py b/LabGym/pkghash/lookup.py index 1031204f..c4c7b11b 100644 --- a/LabGym/pkghash/lookup.py +++ b/LabGym/pkghash/lookup.py @@ -14,16 +14,16 @@ Examples pkghash.labgym_version_with_hash() - returns a str like '2.9.6+be19', + returns a str like '2.9.6 (be19)', or returns '2.9.6' if the version+hashval is in known_versions. pkghash.labgym_version_with_hash(maxlen=6) - returns a str like '2.9.6+be19e5', + returns a str like '2.9.6 (be19e5)', or returns '2.9.6' if the version+hashval is in known_versions. pkghash.labgym_version_with_hash(maxlen=None, suppress_if_known=False) - returns a str like '2.9.6+be19e53c16ff24a33c48b517d870147b' + returns a str like '2.9.6 (be19e53c16ff24a33c48b517d870147b)' even if version+hashval is in known_versions. Why? Isn't LabGym.__version__ sufficiently identifying? @@ -31,10 +31,9 @@ when the user or developer is running customized/modified LabGym. This helps to -1. make developer usage scrubbable from LabGym from overall usage - stats. -2. avoid developer-investigation effort for behavior of a - customized LabGym. +1. make developer usage scrubbable from overall LabGym usage stats. +2. avoid developer-investigation-effort for behavior in a customized + LabGym. """ # Allow use of newer syntax Python 3.10 type hints in Python 3.9. @@ -96,16 +95,16 @@ def labgym_version_with_hash( hashval: str = get_hashval(labgym_package_folder) - known_enhanced_versions = _get_known_versions() - - if (f'{version} ({hashval})' in known_enhanced_versions - and suppress_if_known == True): - result = version # without hashval + version_with_longhash = f'{version} ({hashval})' + logger.debug('%s: %r', 'version_with_longhash', version_with_longhash) + if (suppress_if_known == True + and version_with_longhash in _get_known_versions()): + result = version # without hashval elif maxlen is not None: result = f'{version} ({hashval[:maxlen]})' else: - result = f'{version} ({hashval})' + result = version_with_longhash logger.debug('%s: %r', 'result', result) return result diff --git a/LabGym/pkghash/versions.toml b/LabGym/pkghash/versions.toml index fa39341a..df83357b 100644 --- a/LabGym/pkghash/versions.toml +++ b/LabGym/pkghash/versions.toml @@ -1,5 +1,3 @@ known_versions = [ - # d41d... is the md5hash of zero bytes, so only matches no-content. '2.9.6 (d41d8cd98f00b204e9800998ecf8427e)', - '2.9.6 (dfe2c280cb0038f808f13eb2cde9adb0)', ] diff --git a/docs/contributing/internal.md b/docs/contributing/internal.md index 2f1407d3..49a010ef 100644 --- a/docs/contributing/internal.md +++ b/docs/contributing/internal.md @@ -57,6 +57,17 @@ environment](./setup.md) set up properly. Then, follow these instructions. 4. Update the changelog in `docs/changelog.md`. After updating the changelog, run `nox -s docs` to ensure that the docs are built properly. +?. Ensure the working dir is "clean", in that there are no uncommitted + py-file changes, because py-file content impacts hash. + + Compute the hash for the LabGym package, and store it in + LabGym/pkghash/versions.toml. + (See instructions in the docstring in LabGym/pkghash/__init__.py.) + + Commit + `$ git add LabGym/pkghash/versions.toml` + `$ git commit -m "Add reference version+hashval to versions.toml."` + 5. Create a tag for the new version. Note that the Git tag DOES contain the leading "v". For example, if the new From f42e569b23114b7813ec3e47c87e457b5e2cdb9a Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Tue, 6 Jan 2026 13:11:42 -0800 Subject: [PATCH 05/14] Fixed instructions for updating versions.toml. --- LabGym/pkghash/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/LabGym/pkghash/__init__.py b/LabGym/pkghash/__init__.py index d0b8a567..8847dd48 100644 --- a/LabGym/pkghash/__init__.py +++ b/LabGym/pkghash/__init__.py @@ -29,13 +29,13 @@ update versions.toml with the reference version+hashval. 1. run - LabGym --debug --version + LabGym --debug which reports like ... - DEBUG:LabGym.pkghash.lookup:version_with_longhash: '2.9.7 (a48c52287fc078897a30f19b05f1c12a)' - DEBUG:LabGym.pkghash.lookup:known_versions: ['2.9.6 (d41d8cd98f00b204e9800998ecf8427e)'] - DEBUG:LabGym.pkghash.lookup:result: '2.9.7 (a48c)' - version: 2.9.7 (a48c) + DEBUG:lookup:version_with_longhash: '2.9.7 (a48c52287fc078897a30f19b05f1c12a)' + DEBUG:lookup:known_versions: ['2.9.6 (d41d8cd98f00b204e9800998ecf8427e)'] + DEBUG:lookup:result: '2.9.7 (a48c)' + ... 2. update versions.toml from old, like known_versions = [ From 9b82abe43a80490597cea46097f58b34c0b4940c Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Thu, 8 Jan 2026 14:25:51 -0800 Subject: [PATCH 06/14] Revert hash-embellishment on user-facing version string * revert the extra release-step of updating the lookup table. * revert using embellished version in main frame title. --- LabGym/gui_main.py | 5 ++--- docs/contributing/internal.md | 11 ----------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/LabGym/gui_main.py b/LabGym/gui_main.py index 0054a42a..8f32a4fc 100644 --- a/LabGym/gui_main.py +++ b/LabGym/gui_main.py @@ -34,7 +34,7 @@ import wx.lib.agw.hyperlink as hl # Local application/library specific imports. -from LabGym import __version__, pkghash +from LabGym import __version__ from .gui_utils import add_or_select_notebook_page logger.debug('importing %s ...', '.gui_categorizer') from .gui_categorizer import PanelLv2_GenerateExamples,PanelLv2_TrainCategorizers,PanelLv2_SortBehaviors,PanelLv2_TestCategorizers @@ -367,8 +367,7 @@ class MainFrame(wx.Frame): """Main frame and its notebook.""" def __init__(self): - version_with_hash = pkghash.labgym_version_with_hash() - super().__init__(None, title=f'LabGym v{version_with_hash}') + super().__init__(None, title=f'LabGym v{__version__}') self.SetSize((1000, 600)) diff --git a/docs/contributing/internal.md b/docs/contributing/internal.md index 49a010ef..2f1407d3 100644 --- a/docs/contributing/internal.md +++ b/docs/contributing/internal.md @@ -57,17 +57,6 @@ environment](./setup.md) set up properly. Then, follow these instructions. 4. Update the changelog in `docs/changelog.md`. After updating the changelog, run `nox -s docs` to ensure that the docs are built properly. -?. Ensure the working dir is "clean", in that there are no uncommitted - py-file changes, because py-file content impacts hash. - - Compute the hash for the LabGym package, and store it in - LabGym/pkghash/versions.toml. - (See instructions in the docstring in LabGym/pkghash/__init__.py.) - - Commit - `$ git add LabGym/pkghash/versions.toml` - `$ git commit -m "Add reference version+hashval to versions.toml."` - 5. Create a tag for the new version. Note that the Git tag DOES contain the leading "v". For example, if the new From 64260c7190682e6ed81abcc7eff530e8b7191dc5 Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Thu, 8 Jan 2026 15:34:51 -0800 Subject: [PATCH 07/14] Simplify feature -- no more lookup for reference value. --- LabGym/logging.yaml | 1 - LabGym/pkghash/__init__.py | 36 ++------------------- LabGym/pkghash/lookup.py | 63 ++++-------------------------------- LabGym/pkghash/versions.toml | 3 -- LabGym/probes.py | 3 +- 5 files changed, 9 insertions(+), 97 deletions(-) delete mode 100644 LabGym/pkghash/versions.toml diff --git a/LabGym/logging.yaml b/LabGym/logging.yaml index a66edc38..aea270e6 100644 --- a/LabGym/logging.yaml +++ b/LabGym/logging.yaml @@ -18,7 +18,6 @@ formatters: datefmt: "%Y-%m-%d %H:%M:%S" # format: "%(asctime)s\t%(levelname)s\t[%(thread)d:%(name)s:%(module)s:%(lineno)d]\t%(message)s" - # format: "%(asctime)s\t%(levelname)s\t[%(name)s:%(module)s:%(lineno)d]\t%(message)s" format: "%(asctime)s\t%(levelname)s\t[%(module)s:%(lineno)d]\t%(message)s" mycompactformat: diff --git a/LabGym/pkghash/__init__.py b/LabGym/pkghash/__init__.py index 8847dd48..ccb0fd3b 100644 --- a/LabGym/pkghash/__init__.py +++ b/LabGym/pkghash/__init__.py @@ -7,10 +7,10 @@ Example from LabGym import __version__, pkghash - # __version__ is like '2.9.6' + # __version__ is like '2.9.7' version_with_hash = pkghash.labgym_version_with_hash() - # version_with_hash is like '2.9.6 (7b2c)' + # version_with_hash is like '2.9.7 (a48c52287fc078897a30f19b05f1c12a)' Notes * These formats were considered... @@ -24,38 +24,6 @@ loggers, like LabGym.pkghash.hash: level: INFO - -* As a final step before releasing a new version, the developer should - update versions.toml with the reference version+hashval. - - 1. run - LabGym --debug - which reports like - ... - DEBUG:lookup:version_with_longhash: '2.9.7 (a48c52287fc078897a30f19b05f1c12a)' - DEBUG:lookup:known_versions: ['2.9.6 (d41d8cd98f00b204e9800998ecf8427e)'] - DEBUG:lookup:result: '2.9.7 (a48c)' - ... - - 2. update versions.toml from old, like - known_versions = [ - '2.9.6 (d41d8cd98f00b204e9800998ecf8427e)', - ] - to new like - known_versions = [ - # '2.9.6 (d41d8cd98f00b204e9800998ecf8427e)', - '2.9.7 (a48c52287fc078897a30f19b05f1c12a)', - ] - - Should we keep old known_values in the list defined in versions.toml? - They won't be matched, because LabGym.__version__ is updated, but - they have some value, so, yes preserve them as a comment. - - What if this final step (update versions.toml) is inadvertently - skipped during a release? - Only that the user-visible id will contain the hashval even when - LabGym is clean... if there's no reference value to match, then it - can't be suppressed. In other words, a little extra visual noise. """ from .lookup import labgym_version_with_hash diff --git a/LabGym/pkghash/lookup.py b/LabGym/pkghash/lookup.py index c4c7b11b..fbb80329 100644 --- a/LabGym/pkghash/lookup.py +++ b/LabGym/pkghash/lookup.py @@ -4,7 +4,7 @@ LabGym package. Public Functions - labgym_version_with_hash -- Return a hash-included version string + labgym_version_with_hash -- Return a hash-embellished version string for LabGym. The term "hash" is commonly used both as verb and as noun. @@ -14,26 +14,11 @@ Examples pkghash.labgym_version_with_hash() - returns a str like '2.9.6 (be19)', - or returns '2.9.6' if the version+hashval is in known_versions. - - pkghash.labgym_version_with_hash(maxlen=6) - returns a str like '2.9.6 (be19e5)', - or returns '2.9.6' if the version+hashval is in known_versions. - - pkghash.labgym_version_with_hash(maxlen=None, - suppress_if_known=False) returns a str like '2.9.6 (be19e53c16ff24a33c48b517d870147b)' - even if version+hashval is in known_versions. Why? Isn't LabGym.__version__ sufficiently identifying? -The purpose of this "enhanced" version-string is to make obvious -when the user or developer is running customized/modified LabGym. - -This helps to -1. make developer usage scrubbable from overall LabGym usage stats. -2. avoid developer-investigation-effort for behavior in a customized - LabGym. +The purpose of this "enhanced" version-string is to make it possible +to discern when the user or developer is running customized/modified LabGym. """ # Allow use of newer syntax Python 3.10 type hints in Python 3.9. @@ -61,50 +46,14 @@ labgym_package_folder = str(Path(LabGym.__file__).parent) version = LabGym.__version__ -versions_file = Path(__file__).parent / 'versions.toml' - - -def _get_known_versions() -> List[str]: - """Read list of known version+hash strings from toml-file, and return it.""" - try: - with open(versions_file, 'rb') as f: - known_versions = tomllib.load(f).get('known_versions') - assert isinstance(known_versions, list) - except Exception as e: - # an unreadable toml-file produces an empty list - logger.warning(f'Trouble reading {versions_file}. {e}') - known_versions = [] - logger.debug('%s: %r', 'known_versions', known_versions) - - return known_versions - - -def labgym_version_with_hash( - maxlen=4, - suppress_if_known=True, - ) -> str: - """Return a hash-included version string for LabGym. - - If this LabGym package's version_with_hash package matches a - reference value stored in versions.toml, then this LabGym is judged - genuine/unmodified, so the hashval suffix is suppressed (unless - suppress_if_known is True). - """ +def labgym_version_with_hash() -> str: + """Return a hash-embellished version string for LabGym.""" hashval: str = get_hashval(labgym_package_folder) version_with_longhash = f'{version} ({hashval})' logger.debug('%s: %r', 'version_with_longhash', version_with_longhash) - if (suppress_if_known == True - and version_with_longhash in _get_known_versions()): - result = version # without hashval - elif maxlen is not None: - result = f'{version} ({hashval[:maxlen]})' - else: - result = version_with_longhash - - logger.debug('%s: %r', 'result', result) - return result + return version_with_longhash diff --git a/LabGym/pkghash/versions.toml b/LabGym/pkghash/versions.toml deleted file mode 100644 index df83357b..00000000 --- a/LabGym/pkghash/versions.toml +++ /dev/null @@ -1,3 +0,0 @@ -known_versions = [ - '2.9.6 (d41d8cd98f00b204e9800998ecf8427e)', -] diff --git a/LabGym/probes.py b/LabGym/probes.py index 1a32cab6..7ff92f83 100644 --- a/LabGym/probes.py +++ b/LabGym/probes.py @@ -171,8 +171,7 @@ def get_context(anonymous: bool=False) -> dict: except Exception: reginfo_uuid = None - version_with_hash = pkghash.labgym_version_with_hash( - maxlen=None, suppress_if_known=False) + version_with_hash = pkghash.labgym_version_with_hash() result = { 'schema': 'context 2025-08-10', From a8f10f759ce33138dd6e30974f26c2fa8e7f9d8a Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Thu, 8 Jan 2026 20:27:50 -0800 Subject: [PATCH 08/14] Update a myargparse.py unit test. --- LabGym/tests/test_myargparse.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/LabGym/tests/test_myargparse.py b/LabGym/tests/test_myargparse.py index a4fcb9be..b4cc31d7 100644 --- a/LabGym/tests/test_myargparse.py +++ b/LabGym/tests/test_myargparse.py @@ -5,7 +5,7 @@ import pytest from LabGym import myargparse -from LabGym import __version__ as version +from LabGym import __version__ from .exitstatus import exitstatus @@ -89,13 +89,17 @@ def test_parse_args_version(monkeypatch, capsys): # Arrange monkeypatch.setattr(sys, 'argv', ['cmd', '--version']) + hashval = '4cf004ad24fce8272bfda213219707d5' + version_with_hash = f'{__version__} ({hashval})' + monkeypatch.setattr(myargparse.pkghash, 'labgym_version_with_hash', + lambda: version_with_hash) # Act, and assert raises(SystemExit) with pytest.raises(SystemExit) as e: result = myargparse.parse_args() # Assert - assert capsys.readouterr().out == f'version: {version}\n' + assert capsys.readouterr().out == f'version: {version_with_hash}\n' assert exitstatus(e.value) == 0 From 5c36cc663e48f4726e4b5f4032a2752b9f3de6df Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Fri, 9 Jan 2026 10:19:23 -0800 Subject: [PATCH 09/14] Instrument test_load.py and remove cwd assumption. --- tests/test_load.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/test_load.py b/tests/test_load.py index 16696ff1..786bac75 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -30,9 +30,17 @@ def test_import_LabGym_package(): """Load LabGym.__init__.py and get a list of submodules.""" import LabGym + # ?! + print(f'os.getcwd(): {os.getcwd()}') + print(f'LabGym.__file__: {LabGym.__file__!r}') + print(f'os.path.dirname(LabGym.__file__): {os.path.dirname(LabGym.__file__)!r}') + print(f'os.path.dirname(os.path.dirname(LabGym.__file__)): {os.path.dirname(os.path.dirname(LabGym.__file__))!r}') + # is cwd not relevant for this test? + # is cwd not repeatable for this test? + # Confirm the assumption that under pytest, cwd is LabGym package's # parent dir (the repo dir). - assert os.getcwd() == os.path.dirname(os.path.dirname(LabGym.__file__)) + # assert os.getcwd() == os.path.dirname(os.path.dirname(LabGym.__file__)) # Prepare a list of all submodule py-files in LabGym dir, but not subdirs. pyfiles = glob.glob(os.path.join(os.path.dirname(LabGym.__file__), '*.py')) @@ -47,6 +55,9 @@ def test_import_LabGym_package(): # of this in this test. submodules.remove('__init__') + # Add subpackages? + submodules.extend(['detectron2', 'mywx', 'pkghash', 'selftest']) + def test_imports_with_sysargv_initialized(monkeypatch): """Test that some module imports don't raise exceptions. From ed558eec7a961c60966b2c0febf30ad894ce3284 Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Fri, 9 Jan 2026 10:55:16 -0800 Subject: [PATCH 10/14] Instrument unit test... where am I? --- tests/test_load.py | 62 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/tests/test_load.py b/tests/test_load.py index 786bac75..491fb45e 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -18,6 +18,7 @@ import importlib import logging import os +from pathlib import Path import pprint import sys import textwrap @@ -26,29 +27,65 @@ submodules = [] -def test_import_LabGym_package(): +def list_dir_and_first_children(directory_path): + """ + Lists the contents of a directory and the contents of its first-generation children. + + Args: + directory_path (str or Path): The path to the directory to inspect. + """ + base_path = Path(directory_path) + if not base_path.is_dir(): + print(f"Error: {directory_path} is not a valid directory.") + return + + print(f"--- Contents of Directory: {base_path} ---") + + # List immediate contents of the base directory + immediate_contents = list(base_path.iterdir()) + for item in immediate_contents: + print(f"- {item.name} ({'Directory' if item.is_dir() else 'File'})") + + print(f"\n--- Contents of First-Generation Children ---") + + # Iterate through immediate contents and list their children if they are directories + for item in immediate_contents: + if item.is_dir(): + print(f"\n --- Subdirectory: {item.name} ---") + for sub_item in item.iterdir(): + print(f" - {sub_item.name} ({'Directory' if sub_item.is_dir() else 'File'})") + + +def test_import_LabGym_package(capsys): """Load LabGym.__init__.py and get a list of submodules.""" import LabGym # ?! - print(f'os.getcwd(): {os.getcwd()}') - print(f'LabGym.__file__: {LabGym.__file__!r}') - print(f'os.path.dirname(LabGym.__file__): {os.path.dirname(LabGym.__file__)!r}') - print(f'os.path.dirname(os.path.dirname(LabGym.__file__)): {os.path.dirname(os.path.dirname(LabGym.__file__))!r}') + with capsys.disabled(): + print(f'os.getcwd(): {os.getcwd()}') + print(f'LabGym.__file__: {LabGym.__file__!r}') + print(f'os.path.dirname(LabGym.__file__): {os.path.dirname(LabGym.__file__)!r}') + print(f'os.path.dirname(os.path.dirname(LabGym.__file__)): {os.path.dirname(os.path.dirname(LabGym.__file__))!r}') # is cwd not relevant for this test? # is cwd not repeatable for this test? # Confirm the assumption that under pytest, cwd is LabGym package's # parent dir (the repo dir). - # assert os.getcwd() == os.path.dirname(os.path.dirname(LabGym.__file__)) + try: + assert os.getcwd() == os.path.dirname(os.path.dirname(LabGym.__file__)) + except: + # if I'm not in LabGymRepo, where am I? LabGymRepo's parent dir? + with capsys.disabled(): + list_dir_and_first_children(os.getcwd()) + # Prepare a list of all submodule py-files in LabGym dir, but not subdirs. pyfiles = glob.glob(os.path.join(os.path.dirname(LabGym.__file__), '*.py')) pyfiles.sort() # result from glob.glob() isn't sorted submodules.extend([os.path.basename(f).removesuffix('.py') for f in pyfiles]) - logging.debug('%s:\n%s', 'Milepost 0, submodules', - textwrap.indent(pprint.pformat(submodules), ' ')) + # logging.debug('%s:\n%s', 'Milepost 0, submodules', + # textwrap.indent(pprint.pformat(submodules), ' ')) # Remove __init__.py. There's no need to challenge it, as it was # already loaded by the "import LabGym" statement at the beginning @@ -58,6 +95,9 @@ def test_import_LabGym_package(): # Add subpackages? submodules.extend(['detectron2', 'mywx', 'pkghash', 'selftest']) + with capsys.disabled(): + print(f"Milepost 0, submodules: {textwrap.indent(pprint.pformat(submodules), ' ')}") + def test_imports_with_sysargv_initialized(monkeypatch): """Test that some module imports don't raise exceptions. @@ -78,10 +118,10 @@ def test_imports_with_sysargv_initialized(monkeypatch): submodules.remove('myargparse') -def test_remainder(): +def test_remainder(capsys): """Test that imports for the remaining modules don't raise exceptions.""" - logging.debug('%s:\n%s', 'Milepost 1, submodules', - textwrap.indent(pprint.pformat(submodules), ' ')) + with capsys.disabled(): + print(f"Milepost 1, submodules: {textwrap.indent(pprint.pformat(submodules), ' ')}") while len(submodules) > 0: submodule = submodules[0] From a3e44837dd1196a6f52b294b5c539898dc3fd7ff Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Fri, 9 Jan 2026 11:36:34 -0800 Subject: [PATCH 11/14] Employ a context-manager to make chdir temporary. --- LabGym/pkghash/hash.py | 66 +++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/LabGym/pkghash/hash.py b/LabGym/pkghash/hash.py index 77957f4d..6c8cd818 100644 --- a/LabGym/pkghash/hash.py +++ b/LabGym/pkghash/hash.py @@ -11,6 +11,7 @@ from __future__ import annotations # Standard library imports. +from contextlib import contextmanager import hashlib import itertools import logging @@ -32,6 +33,19 @@ _cached_hashvals = {} +# from contextlib import chdir # available in Python 3.11+ +# To support earlier Python, implement chdir +@contextmanager +def chdir(path): + """Temporarily chdir.""" + old_cwd = os.getcwd() + try: + os.chdir(path) + yield + finally: + os.chdir(old_cwd) # restore + + def get_hashval(folder: str) -> str: """Return a hashval (a signature) of the contents of the folder. @@ -74,34 +88,34 @@ def _walk_and_hash(folder: Path) -> str: """ hasher = hashlib.md5() - os.chdir(folder) - - for root, dirs, files in os.walk('.'): - # walk in "sorted" order - dirs.sort() - files.sort() - - # Skip dirs that don't have __init__.py. - if '__init__.py' not in files: - # this is not a "traditional" package dir... - # skip further descent - dirs.clear() - # skip processing files in this dir - continue - - # Skip top-level dirs detectron2, detectors, models. - if root == '.': - for name in ['detectron2', 'detectors', 'models']: - if name in dirs: - dirs.remove(name) - - for file in files: - # Skip non-py-files. - if not file.endswith('.py'): + + with chdir(folder): + for root, dirs, files in os.walk('.'): + # walk in "sorted" order + dirs.sort() + files.sort() + + # Skip dirs that don't have __init__.py. + if '__init__.py' not in files: + # this is not a "traditional" package dir... + # skip further descent + dirs.clear() + # skip processing files in this dir continue - file_path = os.path.join(root, file) - _add_file_to_hash(hasher, file_path) + # Skip top-level dirs detectron2, detectors, models. + if root == '.': + for name in ['detectron2', 'detectors', 'models']: + if name in dirs: + dirs.remove(name) + + for file in files: + # Skip non-py-files. + if not file.endswith('.py'): + continue + + file_path = os.path.join(root, file) + _add_file_to_hash(hasher, file_path) hashval = hasher.hexdigest() logger.debug('%s: %r', 'hashval', hashval) From 029c52f432cd4946c6410d2b8c3126b53398ec29 Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Fri, 9 Jan 2026 11:58:56 -0800 Subject: [PATCH 12/14] In hash.py, open files for hash with encoding specified, for Windws! --- LabGym/pkghash/hash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LabGym/pkghash/hash.py b/LabGym/pkghash/hash.py index 6c8cd818..08cf2e05 100644 --- a/LabGym/pkghash/hash.py +++ b/LabGym/pkghash/hash.py @@ -145,7 +145,7 @@ def _add_file_to_hash(hasher, file_path: str) -> None: # while chunk := f.read(8192): # hasher.update(chunk) - with open(file_path, 'r') as f: + with open(file_path, 'r', encoding='utf-8') as f: # Read file in 200-line chunks to handle large files efficiently for chunk in _myreadlines(f): for i, line in enumerate(chunk): From de49a7cc82e57e58e892f6d519d632323091f377 Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Fri, 9 Jan 2026 12:23:20 -0800 Subject: [PATCH 13/14] Scrub excess instrumentation from test_load.py. --- tests/test_load.py | 68 ++++++++++------------------------------------ 1 file changed, 15 insertions(+), 53 deletions(-) diff --git a/tests/test_load.py b/tests/test_load.py index 491fb45e..25f9fdda 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -27,77 +27,39 @@ submodules = [] -def list_dir_and_first_children(directory_path): - """ - Lists the contents of a directory and the contents of its first-generation children. - - Args: - directory_path (str or Path): The path to the directory to inspect. - """ - base_path = Path(directory_path) - if not base_path.is_dir(): - print(f"Error: {directory_path} is not a valid directory.") - return - - print(f"--- Contents of Directory: {base_path} ---") - - # List immediate contents of the base directory - immediate_contents = list(base_path.iterdir()) - for item in immediate_contents: - print(f"- {item.name} ({'Directory' if item.is_dir() else 'File'})") - - print(f"\n--- Contents of First-Generation Children ---") - - # Iterate through immediate contents and list their children if they are directories - for item in immediate_contents: - if item.is_dir(): - print(f"\n --- Subdirectory: {item.name} ---") - for sub_item in item.iterdir(): - print(f" - {sub_item.name} ({'Directory' if sub_item.is_dir() else 'File'})") - - def test_import_LabGym_package(capsys): """Load LabGym.__init__.py and get a list of submodules.""" import LabGym - # ?! - with capsys.disabled(): - print(f'os.getcwd(): {os.getcwd()}') - print(f'LabGym.__file__: {LabGym.__file__!r}') - print(f'os.path.dirname(LabGym.__file__): {os.path.dirname(LabGym.__file__)!r}') - print(f'os.path.dirname(os.path.dirname(LabGym.__file__)): {os.path.dirname(os.path.dirname(LabGym.__file__))!r}') - # is cwd not relevant for this test? - # is cwd not repeatable for this test? - # Confirm the assumption that under pytest, cwd is LabGym package's # parent dir (the repo dir). - try: - assert os.getcwd() == os.path.dirname(os.path.dirname(LabGym.__file__)) - except: - # if I'm not in LabGymRepo, where am I? LabGymRepo's parent dir? - with capsys.disabled(): - list_dir_and_first_children(os.getcwd()) - + # The assumption isn't strictly necessary for these unit tests, but + # a failed assert would indicate either (a) noxfile.py was moved + # (the starting cwd is same as location of noxfile.py), or, (b) some + # other unit test in the session did a chdir (possibly by exercising + # code that used an os.chdir instead of a contextlib.chdir). + assert os.getcwd() == os.path.dirname(os.path.dirname(LabGym.__file__)) # Prepare a list of all submodule py-files in LabGym dir, but not subdirs. pyfiles = glob.glob(os.path.join(os.path.dirname(LabGym.__file__), '*.py')) pyfiles.sort() # result from glob.glob() isn't sorted submodules.extend([os.path.basename(f).removesuffix('.py') for f in pyfiles]) - # logging.debug('%s:\n%s', 'Milepost 0, submodules', - # textwrap.indent(pprint.pformat(submodules), ' ')) # Remove __init__.py. There's no need to challenge it, as it was # already loaded by the "import LabGym" statement at the beginning - # of this in this test. + # of this in this test. And furthermore, as it is already loaded, + # importing it (again) would not reload it. OTOH, if it wasn't + # specifically removed here, there would be no harm done. submodules.remove('__init__') - # Add subpackages? + # Add subpackages? This requires maintenance... find subdirs with + # py-files and load them? Loading the package doesn't necessarily + # load all of the package's py-files... (that's why we are + # attempting to load all of LabGym package's first-generation + # py-files) submodules.extend(['detectron2', 'mywx', 'pkghash', 'selftest']) - with capsys.disabled(): - print(f"Milepost 0, submodules: {textwrap.indent(pprint.pformat(submodules), ' ')}") - def test_imports_with_sysargv_initialized(monkeypatch): """Test that some module imports don't raise exceptions. @@ -121,7 +83,7 @@ def test_imports_with_sysargv_initialized(monkeypatch): def test_remainder(capsys): """Test that imports for the remaining modules don't raise exceptions.""" with capsys.disabled(): - print(f"Milepost 1, submodules: {textwrap.indent(pprint.pformat(submodules), ' ')}") + print(f"submodules: {textwrap.indent(pprint.pformat(submodules), ' ')}") while len(submodules) > 0: submodule = submodules[0] From d235c0f1725c5f272c6464759e36adf52d98cd00 Mon Sep 17 00:00:00 2001 From: ruck94301 Date: Fri, 9 Jan 2026 13:52:23 -0800 Subject: [PATCH 14/14] Add unit-test of pkghash --- LabGym/tests/test_pkghash.py | 10 ++++++ tests/test_load.py | 62 +++++++++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 12 deletions(-) create mode 100644 LabGym/tests/test_pkghash.py diff --git a/LabGym/tests/test_pkghash.py b/LabGym/tests/test_pkghash.py new file mode 100644 index 00000000..26563c2b --- /dev/null +++ b/LabGym/tests/test_pkghash.py @@ -0,0 +1,10 @@ +from LabGym import pkghash + +def test_pkghash(capsys): + # Arrange + # Act + version_with_hash = pkghash.labgym_version_with_hash() + # Assert + + with capsys.disabled(): + print('\n' f'version_with_hash: {version_with_hash}') diff --git a/tests/test_load.py b/tests/test_load.py index 25f9fdda..2333da76 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -9,9 +9,10 @@ The final test, test_remainder(), imports each of the remaining top- level py-files. + The strength of this catch-all final test is that if a new top-level py-file is introduced, full coverage is preserved and this test-file -may still be suitable. +may not require updating. """ import glob @@ -23,23 +24,60 @@ import sys import textwrap +import LabGym submodules = [] -def test_import_LabGym_package(capsys): - """Load LabGym.__init__.py and get a list of submodules.""" - import LabGym - - # Confirm the assumption that under pytest, cwd is LabGym package's - # parent dir (the repo dir). - # The assumption isn't strictly necessary for these unit tests, but - # a failed assert would indicate either (a) noxfile.py was moved - # (the starting cwd is same as location of noxfile.py), or, (b) some - # other unit test in the session did a chdir (possibly by exercising - # code that used an os.chdir instead of a contextlib.chdir). +def test_cwd(): + """ + Test that cwd is the LabGym package's parent dir (the repo dir). + + This condition isn't strictly necessary for these unit tests, but a + a failed assert would indicate either (a) noxfile.py was moved (the + starting cwd is same as location of noxfile.py), or, (b) some other + unit test in the session did a chdir (possibly by exercising code + that used an os.chdir instead of a contextlib.chdir). + + As currently organized, the repo dir contains noxfile.py and the + Labgym package dir. + + "In github running nox on a PR commit, what is my cwd? Is it my + repo dir?" + + Google AI Overview responds: + When running Nox in a GitHub Actions workflow on a PR commit, your + current working directory (cwd) is the root of your repository on + the runner's machine. + + The working directory in GitHub Actions is a specific path on the + runner, typically /home/runner/work// on Linux, which is + where the actions/checkout action places your code by default. + + Nox itself also has a default behavior: it automatically changes its + working directory to the directory containing the noxfile.py script + before running any sessions. + + Therefore: + * The initial working directory for the GitHub Actions step + running Nox is the repository root. + * Once the nox command executes, the working directory for the + individual Nox sessions (the Python functions in your + noxfile.py) will be the directory where the noxfile.py is + located. + + This means you can generally rely on paths within your Nox sessions + being relative to the noxfile.py location, which is usually at the + root of your repository (unless you've specified a different + location in your workflow or using the --noxfile argument). + """ + assert os.path.isfile('noxfile.py') assert os.getcwd() == os.path.dirname(os.path.dirname(LabGym.__file__)) + +def test_import_LabGym_package(): + """Load LabGym.__init__.py and get a list of submodules.""" + # Prepare a list of all submodule py-files in LabGym dir, but not subdirs. pyfiles = glob.glob(os.path.join(os.path.dirname(LabGym.__file__), '*.py')) pyfiles.sort() # result from glob.glob() isn't sorted