Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion LabGym/logging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ version: 1
formatters:
myformat:
datefmt: "%Y-%m-%d %H:%M:%S"
format: "%(asctime)s\t%(levelname)s\t[%(thread)d:%(name)s:%(module)s:%(lineno)d]\t%(message)s"

# format: "%(asctime)s\t%(levelname)s\t[%(thread)d:%(name)s:%(module)s:%(lineno)d]\t%(message)s"
format: "%(asctime)s\t%(levelname)s\t[%(module)s:%(lineno)d]\t%(message)s"

mycompactformat:
datefmt: "%H:%M:%S"
format: "%(asctime)s\t%(levelname)s\t%(message)s"
Expand Down
5 changes: 3 additions & 2 deletions LabGym/myargparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# (none)

# Local application/library specific imports.
from LabGym import __version__ as version
from LabGym import __version__, pkghash


# result is a dict with keys that are string, and vals that are
Expand Down Expand Up @@ -176,7 +176,8 @@ def parse_args() -> ResultType:

elif arg in ['--version']:
# Print version msg to stdout and exit 0.
print(f'version: {version}')
version_with_hash = pkghash.labgym_version_with_hash()
print(f'version: {version_with_hash}')
sys.exit()

elif arg == '--':
Expand Down
29 changes: 29 additions & 0 deletions LabGym/pkghash/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Support hash-included version reporting for LabGym.

Provide functions to support hash-included version reporting for the
LabGym package.

The LabGym package defines __version__ in LabGym/__init__.py.

Example
from LabGym import __version__, pkghash
# __version__ is like '2.9.7'

version_with_hash = pkghash.labgym_version_with_hash()
# version_with_hash is like '2.9.7 (a48c52287fc078897a30f19b05f1c12a)'

Notes
* These formats were considered...
2.9.6.7b2c
2.9.6+7b2c
2.9.6 (7b2c)
2.9.6 (hash: 7b2c)

* One way to silence the LabGym.pkghash.hash debug messages is to set
the logger's level to INFO, by modifying ~/.labgym/logging.yaml, in
loggers, like
LabGym.pkghash.hash:
level: INFO
"""

from .lookup import labgym_version_with_hash
182 changes: 182 additions & 0 deletions LabGym/pkghash/hash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
"""Support hash-included version reporting for LabGym.

Provide functions to support hash-included version reporting for the
LabGym package.

Public Functions
get_hashval -- Return a hashval (a signature) of the contents of the folder.
"""

# Allow use of newer syntax Python 3.10 type hints in Python 3.9.
from __future__ import annotations

# Standard library imports.
from contextlib import contextmanager
import hashlib
import itertools
import logging
import os
from pathlib import Path
import re
import sys
import time

# Related third party imports.
# None

# Local application/library specific imports.
# None


logger = logging.getLogger(__name__)

_cached_hashvals = {}


# from contextlib import chdir # available in Python 3.11+
# To support earlier Python, implement chdir
@contextmanager
def chdir(path):
"""Temporarily chdir."""
old_cwd = os.getcwd()
try:
os.chdir(path)
yield
finally:
os.chdir(old_cwd) # restore


def get_hashval(folder: str) -> str:
"""Return a hashval (a signature) of the contents of the folder.

On first call to this function with this folder, compute the hash
value string, and cache it in a module-level dictionary variable.
On subsequent calls to this function with the same folder, return
the cached hash value string.
"""

folder_pathobj = Path(folder).resolve()

hashval = _cached_hashvals.get(folder_pathobj)

if hashval is not None:
return hashval

hashval = _walk_and_hash(folder_pathobj)
_cached_hashvals.update({folder_pathobj: hashval})

return hashval


def _walk_and_hash(folder: Path) -> str:
"""Walk a folder and return the accumulated the MD5 hash for files.

* Skip dirs that don't have __init__.py.
All files of interest are in package dirs, right?

Actually, no. Generally, a package dir might have source-code
in package subdirs that are not actually traditional packages
themselves. (Implicit Namespace Packages)

A different approach would be to create an empty file '.nohash'
in dirs that should be skipped, and test for its existence
during the walk.

* Skip top-level dirs detectron2, detectors, models.

* Skip non-py-files.
"""

hasher = hashlib.md5()

with chdir(folder):
for root, dirs, files in os.walk('.'):
# walk in "sorted" order
dirs.sort()
files.sort()

# Skip dirs that don't have __init__.py.
if '__init__.py' not in files:
# this is not a "traditional" package dir...
# skip further descent
dirs.clear()
# skip processing files in this dir
continue

# Skip top-level dirs detectron2, detectors, models.
if root == '.':
for name in ['detectron2', 'detectors', 'models']:
if name in dirs:
dirs.remove(name)

for file in files:
# Skip non-py-files.
if not file.endswith('.py'):
continue

file_path = os.path.join(root, file)
_add_file_to_hash(hasher, file_path)

hashval = hasher.hexdigest()
logger.debug('%s: %r', 'hashval', hashval)
return hashval


def _add_file_to_hash(hasher, file_path: str) -> None:
"""Add the filepath and the file content to the hash.

Hash is sensitive to
filename case -- foo.py is different from Foo.py
file rename -- foo.py is different from goo.py

* replace leading tabs with 4-spaces,
replace trailing \r\n with \n
Why? To normalize the content, as developers might run a genuine,
but smudge-filtered copy.
"""

filename = Path(file_path).as_posix() # forward slash, even on Windows

hasher.update(filename.encode('utf-8'))

try:
# with open(file_path, 'rb') as f:
# # Read file in 8KB chunks to handle large files efficiently
# while chunk := f.read(8192):
# hasher.update(chunk)

with open(file_path, 'r', encoding='utf-8') as f:
# Read file in 200-line chunks to handle large files efficiently
for chunk in _myreadlines(f):
for i, line in enumerate(chunk):
line = _expand(line) # expand leading tabs to 4 spaces
# replace trailing space, incl LF or CRLF, with LF
line = line.rstrip() + '\n'
chunk[i] = line

hasher.update(''.join(chunk).encode('utf-8'))

except (OSError, IOError) as e:
logger.warning(f'Trouble...{e}')

logger.debug('%s: %r', 'filename, hasher.hexdigest()',
(filename, hasher.hexdigest()))


def _myreadlines(f, n=200):
"""Read n lines from f, and yield a list of the n strings."""
while True:
nline_chunk = list(itertools.islice(f, n))
if not nline_chunk:
break
yield nline_chunk


def _expand(line, n=4):
"""Expand leading tabs to n spaces."""
match = re.match(r'^(\t+)', line)
if match:
leading_tabs = match.group(0)
spaces = ' ' * len(leading_tabs) * n
return spaces + line[len(leading_tabs):]
return line
59 changes: 59 additions & 0 deletions LabGym/pkghash/lookup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Support hash-included version reporting for LabGym.

Provide functions to support hash-included version reporting for the
LabGym package.

Public Functions
labgym_version_with_hash -- Return a hash-embellished version string
for LabGym.

The term "hash" is commonly used both as verb and as noun.
To reduce confusion, these terms may be used to improve clarity.
get_hashval -- the function (the verb)
hashval -- the return string value (the noun) from the function

Examples
pkghash.labgym_version_with_hash()
returns a str like '2.9.6 (be19e53c16ff24a33c48b517d870147b)'

Why? Isn't LabGym.__version__ sufficiently identifying?
The purpose of this "enhanced" version-string is to make it possible
to discern when the user or developer is running customized/modified LabGym.
"""

# Allow use of newer syntax Python 3.10 type hints in Python 3.9.
from __future__ import annotations

# Standard library imports.
import logging
from pathlib import Path
from typing import List

# Related third party imports.
try:
# tomllib is included in the Python Standard Library since version 3.11
import tomllib # type: ignore
except ModuleNotFoundError:
import tomli as tomllib # A lil' TOML parser

# Local application/library specific imports.
from .hash import get_hashval
import LabGym


logger = logging.getLogger(__name__)


labgym_package_folder = str(Path(LabGym.__file__).parent)
version = LabGym.__version__


def labgym_version_with_hash() -> str:
"""Return a hash-embellished version string for LabGym."""

hashval: str = get_hashval(labgym_package_folder)

version_with_longhash = f'{version} ({hashval})'
logger.debug('%s: %r', 'version_with_longhash', version_with_longhash)

return version_with_longhash
8 changes: 5 additions & 3 deletions LabGym/probes.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import packaging # Core utilities for Python packages

# Local application/library specific imports.
from LabGym import __version__ as version
from LabGym import __version__, pkghash
from LabGym import central_logging, registration
from LabGym import config
from LabGym import userdata_survey
Expand Down Expand Up @@ -113,7 +113,7 @@ def probes() -> None:
# then expire or void the "skip-henceforth" behavior.
skip_pass_void = (reginfo is not None
and reginfo.get('name') == 'skip'
and packaging.version.parse(version)
and packaging.version.parse(__version__)
!= packaging.version.parse(reginfo.get('version'))
)

Expand Down Expand Up @@ -171,6 +171,8 @@ def get_context(anonymous: bool=False) -> dict:
except Exception:
reginfo_uuid = None

version_with_hash = pkghash.labgym_version_with_hash()

result = {
'schema': 'context 2025-08-10',

Expand All @@ -180,7 +182,7 @@ def get_context(anonymous: bool=False) -> dict:
'python_version': platform.python_version(),

# LabGym sw
'version': version, # LabGym version
'version': version_with_hash, # LabGym version

# User info
# 'username': getpass.getuser(),
Expand Down
4 changes: 2 additions & 2 deletions LabGym/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
import yaml # PyYAML, YAML parser and emitter for Python

# Local application/library specific imports.
from LabGym import __version__ as version
from LabGym import __version__
from LabGym import central_logging
from LabGym import config
from LabGym import mywx
Expand Down Expand Up @@ -338,7 +338,7 @@ def register(central_logger=None) -> None:

'platform': platform.platform(),
'node': platform.node(),
'version': version, # LabGym version
'version': __version__, # LabGym version
})

try:
Expand Down
8 changes: 6 additions & 2 deletions LabGym/tests/test_myargparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

from LabGym import myargparse
from LabGym import __version__ as version
from LabGym import __version__
from .exitstatus import exitstatus


Expand Down Expand Up @@ -89,13 +89,17 @@ def test_parse_args_version(monkeypatch, capsys):
# Arrange
monkeypatch.setattr(sys, 'argv',
['cmd', '--version'])
hashval = '4cf004ad24fce8272bfda213219707d5'
version_with_hash = f'{__version__} ({hashval})'
monkeypatch.setattr(myargparse.pkghash, 'labgym_version_with_hash',
lambda: version_with_hash)

# Act, and assert raises(SystemExit)
with pytest.raises(SystemExit) as e:
result = myargparse.parse_args()

# Assert
assert capsys.readouterr().out == f'version: {version}\n'
assert capsys.readouterr().out == f'version: {version_with_hash}\n'
assert exitstatus(e.value) == 0


Expand Down
10 changes: 10 additions & 0 deletions LabGym/tests/test_pkghash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from LabGym import pkghash

def test_pkghash(capsys):
# Arrange
# Act
version_with_hash = pkghash.labgym_version_with_hash()
# Assert

with capsys.disabled():
print('\n' f'version_with_hash: {version_with_hash}')
Loading