From 539cedc1b481a91bac9fbd30765351137159c705 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 29 Apr 2025 16:28:26 -0400 Subject: [PATCH 01/73] Working prototype of neighbor list implemented within Project class --- signac/project.py | 119 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 12 deletions(-) diff --git a/signac/project.py b/signac/project.py index 18bc80030..e9c4b9427 100644 --- a/signac/project.py +++ b/signac/project.py @@ -12,6 +12,7 @@ import shutil import time import warnings +import functools from collections import defaultdict from collections.abc import Iterable from contextlib import contextmanager @@ -32,8 +33,8 @@ _raise_if_older_schema, _read_config_file, ) -from ._search_indexer import _SearchIndexer -from ._utility import _mkdir_p, _nested_dicts_to_dotted_keys +from ._search_indexer import _SearchIndexer, _DictPlaceholder +from ._utility import _mkdir_p, _nested_dicts_to_dotted_keys, _to_hashable from .errors import ( DestinationExistsError, IncompatibleSchemaVersion, @@ -651,6 +652,24 @@ def detect_schema(self, exclude_const=False, subset=None): The detected project schema. """ + statepoint_index = self.detect_schema_index(exclude_const, subset) + def _collect_by_type(values): + """Construct a mapping of types to a set of elements drawn from the input values.""" + values_by_type = defaultdict(set) + for v in values: + values_by_type[type(v)].add(v) + return values_by_type + + return ProjectSchema( + {key: _collect_by_type(value) for key, value in statepoint_index} + ) + + + def detect_schema_index(self, exclude_const=False, subset=None): + """Return just the state point index not collected by type. + + """ + from .schema import _build_job_statepoint_index index = _SearchIndexer(self._build_index(include_job_document=False)) @@ -661,16 +680,9 @@ def detect_schema(self, exclude_const=False, subset=None): exclude_const=exclude_const, index=index ) - def _collect_by_type(values): - """Construct a mapping of types to a set of elements drawn from the input values.""" - values_by_type = defaultdict(set) - for v in values: - values_by_type[type(v)].add(v) - return values_by_type + return statepoint_index + - return ProjectSchema( - {key: _collect_by_type(value) for key, value in statepoint_index} - ) def _find_job_ids(self, filter=None): """Find the job ids of all jobs matching the filter. @@ -1653,6 +1665,90 @@ def __setstate__(self, state): state["_lock"] = RLock() self.__dict__.update(state) + def _search_cache_for_val(self, sp_dict, cache, key, other_val): + sp_dict.update({key: other_val}) + other_job_id = calc_id(sp_dict) + if other_job_id in cache: + return other_job_id + else: + return None + + def _search_out(self, direction_multiplier, values, current_index, boundary, search_fun): + """Search values towards boundary from current_index using search_fun. + + :param direction_multiplier: 1 means search in the positive direction from the index + :param values: iterator of values to index into + :param current_index: index in values to start searching from. + The value at this index is not accessed directly. + :param search_fun: function taking 1 argument returning jobid if there is a match + :param boundary: the index at which to stop + :param search_fun: function that decides if value exists in project + + """ + + query_index = current_index + direction_multiplier + # search either query_index >= low_boundary or query_index <= high_boundary + while direction_multiplier * query_index <= boundary * direction_multiplier: + val = values[query_index] + jobid = search_fun(val) + if jobid is None: + query_index += direction_multiplier + else: + break + else: + jobid = None + val = None + return jobid, val + + def neighbors_of_job(self, jobid, my_map, my_cache, _sorted_schema): + _sp = my_cache[jobid] + nearby_entry = {} + for key, schema_values in _sorted_schema.items(): # from project + # allow comparison with output of schema, which is hashable + value = _to_hashable(_sp.get(key, _DictPlaceholder)) + if value is _DictPlaceholder: + # Possible if schema is heterogeneous + continue + + value_index = schema_values.index(value) + # need to pass _sp by copy + search_fun = functools.partial(self._search_cache_for_val, dict(_sp), my_cache, key) + previous_job = self._search_out(-1, schema_values, value_index, 0, search_fun) + next_job = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) + + this_d = {} + if next_job[0] is not None: + this_d.update({next_job[1]: my_map[next_job[0]]}) + if previous_job[0] is not None: + this_d.update({previous_job[1]: my_map[previous_job[0]]}) + nearby_entry.update({key: this_d}) + return nearby_entry + + def make_neighbor_list(self, my_map, my_cache, _sorted_schema): + nearby_jobs = {} + for _id in my_cache: + nearby_jobs[my_map[_id]] = self.neighbors_of_job(_id, my_map, my_cache, _sorted_schema) + return nearby_jobs + + def get_neighbors(self, ignore=None): + if ignore is not None: + pass + # _map, _cache = prepare_shadow_project() + else: + _cache = self._sp_cache + _map = {k: k for k in _cache} + schema_index = self.detect_schema_index() + + def _collect_by_value(values): + out_vals = set() + for v in values: + out_vals.add(v) + return sorted(out_vals) + sorted_schema = {k: _collect_by_value(v) for k,v in schema_index} + + #sorted_schema = dict(sorted(sorted_schema, key=lambda t: t[0])) + + return self.make_neighbor_list(_map, _cache, sorted_schema) @contextmanager def TemporaryProject(cls=None, **kwargs): @@ -2145,7 +2241,6 @@ def _repr_html_(self): """ return repr(self) + self._repr_html_jobs() - def init_project(path=None): """Initialize a project. From 65a93d492e63e0c59002ba160f7c7419a910bd8d Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 21 May 2025 22:55:53 -0400 Subject: [PATCH 02/73] Support nested keys & keys with different types, improve readability Internal functions now have to take dotted keys to work with the output of detect_schema. Allow moving between neighboring jobs of different types by sorting values within each type, then joining these in order of alphabetized type name. --- signac/project.py | 147 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 103 insertions(+), 44 deletions(-) diff --git a/signac/project.py b/signac/project.py index e9c4b9427..1a1ab255b 100644 --- a/signac/project.py +++ b/signac/project.py @@ -34,7 +34,7 @@ _read_config_file, ) from ._search_indexer import _SearchIndexer, _DictPlaceholder -from ._utility import _mkdir_p, _nested_dicts_to_dotted_keys, _to_hashable +from ._utility import _mkdir_p, _nested_dicts_to_dotted_keys, _to_hashable, _dotted_dict_to_nested_dicts from .errors import ( DestinationExistsError, IncompatibleSchemaVersion, @@ -663,8 +663,8 @@ def _collect_by_type(values): return ProjectSchema( {key: _collect_by_type(value) for key, value in statepoint_index} ) - - + + def detect_schema_index(self, exclude_const=False, subset=None): """Return just the state point index not collected by type. @@ -1665,34 +1665,72 @@ def __setstate__(self, state): state["_lock"] = RLock() self.__dict__.update(state) + # key and other_val provided separately to be used with functools.partial def _search_cache_for_val(self, sp_dict, cache, key, other_val): + """Return job id of similar job if present in cache. + + The similar job is obtained by modifying sp_dict to + include {key: other_val}. + + Internally converts sp_dict from dotted keys to nested dicts format. + + Parameters + ---------- + sp_dict : dict + sp_dict must not be a reference to a state point because it will be + modified in this function + cache : dict + state point cache + key : str + The key whose value to change + other_val + The new value of key to search for + + Returns + ------- + job id of similar job + None, if not present + """ sp_dict.update({key: other_val}) + # schema output not compatible with dotted key notation + sp_dict = _dotted_dict_to_nested_dicts(sp_dict) other_job_id = calc_id(sp_dict) if other_job_id in cache: return other_job_id else: return None - def _search_out(self, direction_multiplier, values, current_index, boundary, search_fun): - """Search values towards boundary from current_index using search_fun. + def _search_out(self, search_direction, values, current_index, boundary_index, search_fun): + """Search in values towards boundary_index from current_index using search_fun. - :param direction_multiplier: 1 means search in the positive direction from the index - :param values: iterator of values to index into - :param current_index: index in values to start searching from. - The value at this index is not accessed directly. - :param search_fun: function taking 1 argument returning jobid if there is a match - :param boundary: the index at which to stop - :param search_fun: function that decides if value exists in project + Parameters + ---------- + search_direction : int, 1 or -1 + 1 means search in the positive direction from the index + values : iterable + values to index into when searching + current_index : int + index into values to start searching from. + The value at this index is not accessed directly. + search_fun : function + unary function returning jobid if it exists and None otherwise + boundary_index : int + the index at which to stop + Returns + ------- + Tuple of (jobid, val) + jobid : str + job id of the nearest job in the search_direction + val : value of the key at the neighbor jobid """ - - query_index = current_index + direction_multiplier + query_index = current_index + search_direction # search either query_index >= low_boundary or query_index <= high_boundary - while direction_multiplier * query_index <= boundary * direction_multiplier: + while search_direction * query_index <= boundary_index * search_direction: val = values[query_index] jobid = search_fun(val) if jobid is None: - query_index += direction_multiplier + query_index += search_direction else: break else: @@ -1700,34 +1738,52 @@ def _search_out(self, direction_multiplier, values, current_index, boundary, sea val = None return jobid, val - def neighbors_of_job(self, jobid, my_map, my_cache, _sorted_schema): - _sp = my_cache[jobid] + def neighbors_of_job(self, jobid, shadow_map, dotted_sp_cache, sorted_schema): + """Return neighbor list of job with jobid. + + dotted_sp_cache must be in dotted key format, which is accessed by calling + _nested_dicts_to_dotted_keys on each state point in the cache. + + Parameters + ---------- + jobid : str + Job id of job of which to find neighbors + shadow_map : dict + Map from job id to shadow job id if a key is ignored, used when + user provides `ignore` to get_neighbors. Otherwise, it is the identity map + dotted_sp_cache : dict + Map from job id to state point **in dotted keys format** + sorted_schema : dict + Map from key (in dotted notation) to sorted values of the key to search over + """ + + _sp = dotted_sp_cache[jobid] + nearby_entry = {} - for key, schema_values in _sorted_schema.items(): # from project + for key, schema_values in sorted_schema.items(): # from project # allow comparison with output of schema, which is hashable value = _to_hashable(_sp.get(key, _DictPlaceholder)) if value is _DictPlaceholder: # Possible if schema is heterogeneous continue - value_index = schema_values.index(value) # need to pass _sp by copy - search_fun = functools.partial(self._search_cache_for_val, dict(_sp), my_cache, key) - previous_job = self._search_out(-1, schema_values, value_index, 0, search_fun) - next_job = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) + search_fun = functools.partial(self._search_cache_for_val, dict(_sp), dotted_sp_cache, key) + previous_jobid, previous_val = self._search_out(-1, schema_values, value_index, 0, search_fun) + next_jobid, next_val = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) this_d = {} - if next_job[0] is not None: - this_d.update({next_job[1]: my_map[next_job[0]]}) - if previous_job[0] is not None: - this_d.update({previous_job[1]: my_map[previous_job[0]]}) + if next_jobid is not None: + this_d.update({next_val: shadow_map[next_jobid]}) + if previous_jobid is not None: + this_d.update({previous_val: shadow_map[previous_jobid]}) nearby_entry.update({key: this_d}) return nearby_entry - - def make_neighbor_list(self, my_map, my_cache, _sorted_schema): + + def make_neighbor_list(self, shadow_map, dotted_sp_cache, sorted_schema): nearby_jobs = {} - for _id in my_cache: - nearby_jobs[my_map[_id]] = self.neighbors_of_job(_id, my_map, my_cache, _sorted_schema) + for _id in dotted_sp_cache: + nearby_jobs[shadow_map[_id]] = self.neighbors_of_job(_id, shadow_map, dotted_sp_cache, sorted_schema) return nearby_jobs def get_neighbors(self, ignore=None): @@ -1735,19 +1791,22 @@ def get_neighbors(self, ignore=None): pass # _map, _cache = prepare_shadow_project() else: - _cache = self._sp_cache - _map = {k: k for k in _cache} - schema_index = self.detect_schema_index() - - def _collect_by_value(values): - out_vals = set() - for v in values: - out_vals.add(v) - return sorted(out_vals) - sorted_schema = {k: _collect_by_value(v) for k,v in schema_index} - - #sorted_schema = dict(sorted(sorted_schema, key=lambda t: t[0])) - + _cache = dict(self._sp_cache) # copy + # the state point cache is incompatible with nested key notation + for _id, _sp in _cache.items(): + _cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} + _map = {k : k for k in _cache} + schema = self.detect_schema() + sorted_schema = {} + for key in schema: + # sort values by the names of the types + tuples_to_sort = [] + for typ in schema[key]: + tuples_to_sort.append((typ.__name__, sorted(schema[key][typ]))) + combined_values = [] + for _, v in sorted(tuples_to_sort, key = lambda x: x[0]): + combined_values.extend(v) + sorted_schema[key] = combined_values return self.make_neighbor_list(_map, _cache, sorted_schema) @contextmanager From b40db7b50c7ff416fab5423f80c2b7900f9456bc Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Fri, 6 Jun 2025 10:48:42 -0400 Subject: [PATCH 03/73] Update docstring --- signac/project.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/signac/project.py b/signac/project.py index 1a1ab255b..4c1bc2035 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1712,10 +1712,10 @@ def _search_out(self, search_direction, values, current_index, boundary_index, s current_index : int index into values to start searching from. The value at this index is not accessed directly. - search_fun : function - unary function returning jobid if it exists and None otherwise boundary_index : int the index at which to stop + search_fun : function + unary function returning jobid if it exists and None otherwise Returns ------- @@ -1781,6 +1781,17 @@ def neighbors_of_job(self, jobid, shadow_map, dotted_sp_cache, sorted_schema): return nearby_entry def make_neighbor_list(self, shadow_map, dotted_sp_cache, sorted_schema): + """Iterate over jobs and get neighbors of each job. + + Parameters + ---------- + shadow_map : dict + Map from shadow job id to job id if ignoring certain keys, otherwise the identity map. + dotted_sp_cache : dict + Map from job id OR shadow job id to state point OR shadow state point in dotted key format + sorted_schema : dict + Map of keys to their values to search over + """ nearby_jobs = {} for _id in dotted_sp_cache: nearby_jobs[shadow_map[_id]] = self.neighbors_of_job(_id, shadow_map, dotted_sp_cache, sorted_schema) From 290c9044a06c6b70d8048c3265561ec0cb86adf2 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Fri, 6 Jun 2025 10:48:56 -0400 Subject: [PATCH 04/73] Add idea for type of return value --- signac/project.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/signac/project.py b/signac/project.py index 4c1bc2035..6ef8eee65 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1736,6 +1736,8 @@ def _search_out(self, search_direction, values, current_index, boundary_index, s else: jobid = None val = None + # todo + # return {val: jobid} return jobid, val def neighbors_of_job(self, jobid, shadow_map, dotted_sp_cache, sorted_schema): From b91eb34cffe0f362c48c4bfc23a4ab67e131787e Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Fri, 6 Jun 2025 14:39:21 -0400 Subject: [PATCH 05/73] Pass statepoint, not job id and dotted_sp_cache to neighbors_of_job The shadow map will be applied outside this function --- signac/project.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/signac/project.py b/signac/project.py index 6ef8eee65..f32534c12 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1740,7 +1740,8 @@ def _search_out(self, search_direction, values, current_index, boundary_index, s # return {val: jobid} return jobid, val - def neighbors_of_job(self, jobid, shadow_map, dotted_sp_cache, sorted_schema): + + def neighbors_of_job(self, statepoint, dotted_sp_cache, sorted_schema): """Return neighbor list of job with jobid. dotted_sp_cache must be in dotted key format, which is accessed by calling @@ -1748,37 +1749,32 @@ def neighbors_of_job(self, jobid, shadow_map, dotted_sp_cache, sorted_schema): Parameters ---------- - jobid : str - Job id of job of which to find neighbors - shadow_map : dict - Map from job id to shadow job id if a key is ignored, used when - user provides `ignore` to get_neighbors. Otherwise, it is the identity map + statepoint : dict + Place to search from. Could be the shadow state point. dotted_sp_cache : dict - Map from job id to state point **in dotted keys format** + Map from job id OR shadow job id to state point OR shadow state point in dotted key format sorted_schema : dict Map from key (in dotted notation) to sorted values of the key to search over """ - _sp = dotted_sp_cache[jobid] - nearby_entry = {} for key, schema_values in sorted_schema.items(): # from project # allow comparison with output of schema, which is hashable - value = _to_hashable(_sp.get(key, _DictPlaceholder)) + value = _to_hashable(statepoint.get(key, _DictPlaceholder)) if value is _DictPlaceholder: # Possible if schema is heterogeneous continue value_index = schema_values.index(value) - # need to pass _sp by copy - search_fun = functools.partial(self._search_cache_for_val, dict(_sp), dotted_sp_cache, key) + # need to pass statepoint by copy + search_fun = functools.partial(self._search_cache_for_val, dict(statepoint), dotted_sp_cache, key) previous_jobid, previous_val = self._search_out(-1, schema_values, value_index, 0, search_fun) next_jobid, next_val = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) this_d = {} if next_jobid is not None: - this_d.update({next_val: shadow_map[next_jobid]}) + this_d.update({next_val: next_jobid}) if previous_jobid is not None: - this_d.update({previous_val: shadow_map[previous_jobid]}) + this_d.update({previous_val: previous_jobid}) nearby_entry.update({key: this_d}) return nearby_entry @@ -1795,8 +1791,9 @@ def make_neighbor_list(self, shadow_map, dotted_sp_cache, sorted_schema): Map of keys to their values to search over """ nearby_jobs = {} - for _id in dotted_sp_cache: - nearby_jobs[shadow_map[_id]] = self.neighbors_of_job(_id, shadow_map, dotted_sp_cache, sorted_schema) + for _id, _sp in dotted_sp_cache.items(): + shadow_job_neighbors = self.neighbors_of_job(_sp, dotted_sp_cache, sorted_schema) + nearby_jobs[shadow_map[_id]] = {key: shadow_map[shadow_id] for key, shadow_id in shadow_job_neighbors} return nearby_jobs def get_neighbors(self, ignore=None): From 371ddeaf7e60f1139ed0ffe208da8223fbd54093 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Fri, 6 Jun 2025 14:42:02 -0400 Subject: [PATCH 06/73] Add API entry points --- signac/__main__.py | 9 +++++++++ signac/job.py | 4 ++++ signac/project.py | 13 +++++++++++++ 3 files changed, 26 insertions(+) diff --git a/signac/__main__.py b/signac/__main__.py index d6b9d0cfb..19b29ba2c 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -196,6 +196,15 @@ def main_statepoint(args): else: print(json.dumps(job.statepoint(), indent=args.indent, sort_keys=args.sort)) +def main_neigbors(args): + # TODO + project = get_project() + if args.job_id: + jobs = (_open_job_by_id(project, jid) for jid in args.job_id) + for job in jobs: + print(job.get_neighbors()) + pass + def main_document(args): """Handle document subcommand.""" diff --git a/signac/job.py b/signac/job.py index 6a92038b5..7668e285f 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1012,3 +1012,7 @@ def __deepcopy__(self, memo): setattr(result, key, deepcopy(value, memo)) result._lock = RLock() return result + + def neighbors(self, ignore=None): + # TODO, do we expose this to each job? + pass diff --git a/signac/project.py b/signac/project.py index f32534c12..133ae3da1 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1740,6 +1740,11 @@ def _search_out(self, search_direction, values, current_index, boundary_index, s # return {val: jobid} return jobid, val + def job_my_neighbor(self, ignore, sorted_schema): + """Prototype going from job to neighbor with minimal mess""" + nl = self.neighbors_of_job() + for key, value in nl: + pass def neighbors_of_job(self, statepoint, dotted_sp_cache, sorted_schema): """Return neighbor list of job with jobid. @@ -1778,6 +1783,14 @@ def neighbors_of_job(self, statepoint, dotted_sp_cache, sorted_schema): nearby_entry.update({key: this_d}) return nearby_entry + def shadow_neighbor_list_to_neighbor_list(self, shadow_neighbor_list, shadow_map): + """Replace shadow job ids with actual job ids.""" + neighbor_list = dict() + for jobid, neighbors in shadow_neighbor_list: + for neighbor_key, neighbor_vals in neighbors: + neighbor_list[jobid] = {neighbor_key: {k: shadow_map[i] for k,i in neighbor_vals.items()}} + return neighbor_list + def make_neighbor_list(self, shadow_map, dotted_sp_cache, sorted_schema): """Iterate over jobs and get neighbors of each job. From a891c8df228c0879dcf50a2e8cab07087705726e Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Fri, 6 Jun 2025 14:43:07 -0400 Subject: [PATCH 07/73] Prototype code --- signac/_search_indexer.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/signac/_search_indexer.py b/signac/_search_indexer.py index 0f561a615..c2f0894ef 100644 --- a/signac/_search_indexer.py +++ b/signac/_search_indexer.py @@ -254,7 +254,22 @@ class _SearchIndexer(dict): ``_SearchIndexer(iterable, **kwargs)``. """ - + def build_all_index(self): + # figure out keys from all jobs + + # go through jobs, getting keys of each job + for _id, spdoc in self.items(): + v = spdoc["sp"] + if type(v) is list: + index[_to_hashable(v)].add(_id) + elif type(v) is dict: + index[_DictPlaceholder].add(_id) + else: + index[v].add(_id) + + def get_index(self, key): + pass + def build_index(self, key): """Build index for a given key. @@ -279,7 +294,7 @@ def build_index(self, key): logger.debug(f"Building index for key '{key}'...") nodes = key.split(".") index = _TypedSetDefaultDict() - + # breakpoint() for _id, doc in self.items(): try: v = doc From a3e7d5b35dc88cdfdac084b7b9e634e0977a41ae Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 2 Jul 2025 23:44:37 -0400 Subject: [PATCH 08/73] Add prepare_shadow_project from dashboard navigator and fix bug --- signac/project.py | 138 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 131 insertions(+), 7 deletions(-) diff --git a/signac/project.py b/signac/project.py index 133ae3da1..17bc1149e 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1665,6 +1665,107 @@ def __setstate__(self, state): state["_lock"] = RLock() self.__dict__.update(state) + def prepare_shadow_project(self, ignore): + """Detect neighbors and build cache for shadow project which comes from ignored keys. + + Ignoring a key creates a subset of jobs, now identified with different job ids. + Call it shadow job id because we're making a projection of the project. + + We can map from the shadow job id to the actual job id in the use cases identified. + Raise ValueError if this mapping is ill defined. + + We can detect the neighbor list on the shadow project then map it back + to the real project. + + TODO: Belongs in signac core eventually. + + Returns shadow_map, shadow_cache + + shadow_map is a map from shadow job id to project job id. + + shadow_cache is an in-memory state point cache for the shadow project + mapping job id --> shadow state point + + + Use cases: + + 1) Seed that is different for every job. + + 2) State point key that changes in sync with another key. + + Case 1: + + {"a": 1, "b": 2, "seed": 0} -> jobid1 + {"a": 1, "b": 3, "seed": 1} -> jobid2 + {"a": 1, "b": 2} -> shadowid1 + {"a": 1, "b": 3} -> shadowid2 + + shadowid1 <---> jobid1 + shadowid2 <---> jobid2 + + Breaking case 1 with repeated shadow jobs + {"a": 1, "b": 2, "seed": 0} -> jobid1 + {"a": 1, "b": 3, "seed": 1} -> jobid2 + {"a": 1, "b": 3, "seed": 2} -> jobid3 + + {"a": 1, "b": 2} -> shadowid1 + {"a": 1, "b": 3} -> shadowid2 + {"a": 1, "b": 3} -> shadowid2 *conflict* No longer bijection. Maybe we can just keep track of these? Should be few cases. + Now we have shadowid2 .---> jobid2 + \\--> jobid3 + + Case 2: + + {"a1": 10, "a2": 20} -> jobid1 + {"a1": 2, "a2": 4} -> jobid2 + + {"a1": 10} -> shadowid1 + {"a1": 2} -> shadowid2 + + Can still make the mapping between ids. + + Breaking case 2: + {"a1": 10, "a2": 20} -> jobid1 + {"a1": 2, "a2": 4} -> jobid2 + {"a1": 2, "a2": 5} -> jobid3 + + {"a1": 10} -> shadowid1 + {"a1": 2} -> shadowid2 + {"a1": 2} -> shadowid2 -- + Now we have shadowid2 .---> jobid2 + \\--> jobid3 + """ + + shadow_cache = {} # like a state point cache + job_to_shadow = {} # goes from job id to shadow. Call it the projection? + for job in self: + shadow_sp = dict(job.cached_statepoint) + for ig in ignore: + shadow_sp.pop(ig, None) + shadow_id = calc_id(shadow_sp) + shadow_cache[shadow_id] = shadow_sp + job_to_shadow[job.id] = shadow_id + + if len(set(job_to_shadow.values())) != len(job_to_shadow): + # map that has duplicates + duplicate_map = {} + for k,v in job_to_shadow.items(): + try: + duplicate_map[v].append(k) + except KeyError: + duplicate_map[v] = [k] + # one of the breaking cases + # figure out who breaks + counts = Counter(job_to_shadow.values()) + bads = [] + for k,v in counts.items(): + if v>1: + bads.append(k) + err_str = "\n".join(f"Job ids: {', '.join(duplicate_map[b])}." for b in bads) + raise ValueError(f"Ignoring {ignore} makes it impossible to distinguish some jobs:\n{err_str}") + shadow_map = {v: k for k, v in job_to_shadow.items()} + return shadow_map, shadow_cache + # key and other_val provided separately to be used with functools.partial def _search_cache_for_val(self, sp_dict, cache, key, other_val): """Return job id of similar job if present in cache. @@ -1786,9 +1887,12 @@ def neighbors_of_job(self, statepoint, dotted_sp_cache, sorted_schema): def shadow_neighbor_list_to_neighbor_list(self, shadow_neighbor_list, shadow_map): """Replace shadow job ids with actual job ids.""" neighbor_list = dict() - for jobid, neighbors in shadow_neighbor_list: - for neighbor_key, neighbor_vals in neighbors: - neighbor_list[jobid] = {neighbor_key: {k: shadow_map[i] for k,i in neighbor_vals.items()}} + for jobid, neighbors in shadow_neighbor_list.items(): + this_d = {} + for neighbor_key, neighbor_vals in neighbors.items(): + # neighbor_list.update({shadow_map[jobid]: {neighbor_key :{k: shadow_map[i] for k,i in neighbor_vals.items()}}}) + this_d[neighbor_key] = {k: shadow_map[i] for k,i in neighbor_vals.items()} + neighbor_list[shadow_map[jobid]] = this_d return neighbor_list def make_neighbor_list(self, shadow_map, dotted_sp_cache, sorted_schema): @@ -1805,15 +1909,30 @@ def make_neighbor_list(self, shadow_map, dotted_sp_cache, sorted_schema): """ nearby_jobs = {} for _id, _sp in dotted_sp_cache.items(): - shadow_job_neighbors = self.neighbors_of_job(_sp, dotted_sp_cache, sorted_schema) - nearby_jobs[shadow_map[_id]] = {key: shadow_map[shadow_id] for key, shadow_id in shadow_job_neighbors} + # shadow_job_neighbors = self.neighbors_of_job(_sp, dotted_sp_cache, sorted_schema) + # print(shadow_job_neighbors) + nearby_jobs[_id] = self.neighbors_of_job(_sp, dotted_sp_cache, sorted_schema) + # {key: shadow_map[shadow_id] for key, shadow_id in shadow_job_neighbors.items()} + # breakpoint() + nearby_jobs = self.shadow_neighbor_list_to_neighbor_list(nearby_jobs, shadow_map) + # print(f"neighbors of {_id} are {shadow_job_neighbors}") + # this_d = {} + # #{key: shadow_map[shadow_id] for key, shadow_id in shadow_job_neighbors.items()} + # for key, shadow_id in shadow_job_neighbors.items(): + # if shadow_id != dict(): + # print(f"shadow_id: {shadow_id}") + # this_d[key] = shadow_map[shadow_id] + # nearby_jobs[shadow_map[_id]] = this_d return nearby_jobs def get_neighbors(self, ignore=None): if ignore is not None: - pass - # _map, _cache = prepare_shadow_project() + if not isinstance(ignore, list): + ignore = [ignore] + _map, _cache = self.prepare_shadow_project(ignore = ignore) else: + ignore = [None] + self.update_cache() _cache = dict(self._sp_cache) # copy # the state point cache is incompatible with nested key notation for _id, _sp in _cache.items(): @@ -1830,6 +1949,11 @@ def get_neighbors(self, ignore=None): for _, v in sorted(tuples_to_sort, key = lambda x: x[0]): combined_values.extend(v) sorted_schema[key] = combined_values + need_to_ignore = [sorted_schema.pop(ig, None) for ig in ignore] + if any(a is None for a in need_to_ignore): + import warnings + ignore = [None] + warnings.warn("Ignored key not present in project.", RuntimeWarning) return self.make_neighbor_list(_map, _cache, sorted_schema) @contextmanager From 500934d2a78ecf5c3cddac36bce6b96172b4ff94 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Mon, 28 Jul 2025 14:10:33 -0400 Subject: [PATCH 09/73] Make ignore an empty list by default --- signac/project.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/signac/project.py b/signac/project.py index 17bc1149e..6ea9bb712 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1665,7 +1665,7 @@ def __setstate__(self, state): state["_lock"] = RLock() self.__dict__.update(state) - def prepare_shadow_project(self, ignore): + def prepare_shadow_project(self, ignore: list): """Detect neighbors and build cache for shadow project which comes from ignored keys. Ignoring a key creates a subset of jobs, now identified with different job ids. @@ -1925,13 +1925,12 @@ def make_neighbor_list(self, shadow_map, dotted_sp_cache, sorted_schema): # nearby_jobs[shadow_map[_id]] = this_d return nearby_jobs - def get_neighbors(self, ignore=None): - if ignore is not None: - if not isinstance(ignore, list): - ignore = [ignore] + def get_neighbors(self, ignore = []): + if not isinstance(ignore, list): + ignore = [ignore] + if len(ignore) > 0: _map, _cache = self.prepare_shadow_project(ignore = ignore) else: - ignore = [None] self.update_cache() _cache = dict(self._sp_cache) # copy # the state point cache is incompatible with nested key notation @@ -1949,10 +1948,10 @@ def get_neighbors(self, ignore=None): for _, v in sorted(tuples_to_sort, key = lambda x: x[0]): combined_values.extend(v) sorted_schema[key] = combined_values - need_to_ignore = [sorted_schema.pop(ig, None) for ig in ignore] - if any(a is None for a in need_to_ignore): - import warnings - ignore = [None] + need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] + if any(a is _DictPlaceholder for a in need_to_ignore): + # reset back to default + ignore = [] warnings.warn("Ignored key not present in project.", RuntimeWarning) return self.make_neighbor_list(_map, _cache, sorted_schema) From ad8395b541537555467facf27ec3cb5df9bf3ebf Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Mon, 28 Jul 2025 14:11:08 -0400 Subject: [PATCH 10/73] Update comments --- signac/project.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/signac/project.py b/signac/project.py index 6ea9bb712..a5a442b7d 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1677,14 +1677,12 @@ def prepare_shadow_project(self, ignore: list): We can detect the neighbor list on the shadow project then map it back to the real project. - TODO: Belongs in signac core eventually. - Returns shadow_map, shadow_cache shadow_map is a map from shadow job id to project job id. shadow_cache is an in-memory state point cache for the shadow project - mapping job id --> shadow state point + mapping shadow job id --> shadow state point Use cases: @@ -1703,14 +1701,15 @@ def prepare_shadow_project(self, ignore: list): shadowid1 <---> jobid1 shadowid2 <---> jobid2 - Breaking case 1 with repeated shadow jobs + Breaking case 1 with repeated shadow jobs: + {"a": 1, "b": 2, "seed": 0} -> jobid1 {"a": 1, "b": 3, "seed": 1} -> jobid2 {"a": 1, "b": 3, "seed": 2} -> jobid3 {"a": 1, "b": 2} -> shadowid1 {"a": 1, "b": 3} -> shadowid2 - {"a": 1, "b": 3} -> shadowid2 *conflict* No longer bijection. Maybe we can just keep track of these? Should be few cases. + {"a": 1, "b": 3} -> shadowid2 *conflict* No longer bijection. Now we have shadowid2 .---> jobid2 \\--> jobid3 @@ -1736,8 +1735,8 @@ def prepare_shadow_project(self, ignore: list): \\--> jobid3 """ - shadow_cache = {} # like a state point cache - job_to_shadow = {} # goes from job id to shadow. Call it the projection? + shadow_cache = {} # like a state point cache, but for the shadow project + job_to_shadow = {} # goes from job id to shadow id. Call it the projection? for job in self: shadow_sp = dict(job.cached_statepoint) for ig in ignore: @@ -1763,6 +1762,8 @@ def prepare_shadow_project(self, ignore: list): bads.append(k) err_str = "\n".join(f"Job ids: {', '.join(duplicate_map[b])}." for b in bads) raise ValueError(f"Ignoring {ignore} makes it impossible to distinguish some jobs:\n{err_str}") + + # map from shadow job id to project job id shadow_map = {v: k for k, v in job_to_shadow.items()} return shadow_map, shadow_cache From 9275818adfd17dbeb5f35c82bdf4be726f6c789b Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Mon, 28 Jul 2025 14:11:19 -0400 Subject: [PATCH 11/73] Import Counter --- signac/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signac/project.py b/signac/project.py index a5a442b7d..2aab32d7d 100644 --- a/signac/project.py +++ b/signac/project.py @@ -13,7 +13,7 @@ import time import warnings import functools -from collections import defaultdict +from collections import defaultdict, Counter from collections.abc import Iterable from contextlib import contextmanager from copy import deepcopy From 16520d26edbb0174b46fc49fc026fd0a282816e0 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Mon, 28 Jul 2025 14:24:08 -0400 Subject: [PATCH 12/73] Add tests --- tests/test_project.py | 125 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/tests/test_project.py b/tests/test_project.py index 46957a392..378426911 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -12,6 +12,7 @@ import sys import textwrap from contextlib import contextmanager, redirect_stderr +from itertools import product from tarfile import TarFile from tempfile import TemporaryDirectory from time import time @@ -836,6 +837,127 @@ def test_schema_format(self): assert s_format2 == S_FORMAT2 + def test_neighbors(self): + a_vals = [1, 2] + b_vals = [3, 4, 5] + for a,b in product(a_vals, b_vals): + self.project.open_job({"a": a, "b": b}).init() + + neighbors = self.project.get_neighbors() + + for a,b in product(a_vals, b_vals): + job = self.project.open_job({"a": a, "b": b}) + this_neighbors = neighbors[job.id] + + # a neighbors + if a == 1: + assert this_neighbors["a"][2] == self.project.open_job({"a": 2, "b": b}).id + elif a == 2: + assert this_neighbors["a"][1] == self.project.open_job({"a": 1, "b": b}).id + + # b neighbors + if b == 3: + assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + elif b == 4: + assert this_neighbors["b"][3] == self.project.open_job({"a": a, "b": 3}).id + assert this_neighbors["b"][5] == self.project.open_job({"a": a, "b": 5}).id + elif b == 5: + assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + + def test_neighbors_ignore(self): + b_vals = [3, 4, 5] + for b in b_vals: + self.project.open_job({"b": b, "2b": 2 * b}).init() + + neighbors = self.project.get_neighbors(ignore = "2b") + + for b in b_vals: + job = self.project.open_job({"b": b, "2b": 2 * b}) + this_neighbors = neighbors[job.id] + + if b == 3: + assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id + elif b == 4: + assert this_neighbors["b"][3] == self.project.open_job({"b": 3, "2b": 6}).id + assert this_neighbors["b"][5] == self.project.open_job({"b": 5, "2b": 10}).id + elif b == 5: + assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id + + def test_neighbors_nested(self): + a_vals = [{"c": 2}, {"c": 3}, {"c": 4}] + for a in a_vals: + self.project.open_job({"a": a}).init() + + neighbors = self.project.get_neighbors() + + for a in a_vals: + job = self.project.open_job({"a": a}) + this_neighbors = neighbors[job.id] + # note how the inconsistency in neighborlist access syntax comes from schema + if a == 2: + assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id + elif a == 3: + assert this_neighbors["a.c"][2] == self.project.open_job({"a": {"c": 2}}).id + assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id + elif a == 4: + assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id + + @pytest.mark.xfail(reason = "Schema doesn't distinguish truthy values. Asserting False til fix.") + def test_neighbors_varied_types(self): + a_vals = [True, None, False, 1, "1", "2", 1.2, 1.3, 2, "x", "y", {"c": 2}, [3,4], [5,6]] + a_types = [type(a) for a in a_vals] + + for a in a_vals: + self.project.open_job({"a": a}).init() + + neighbors = self.project.get_neighbors() + print(neighbors) + print(a_types) + + for job in self.project: + this_neighbors = neighbors[job.id] + print(f"{job.sp.a=} has neighbors {this_neighbors}") + print("Schema doesn't distinguish 1 and True, 0 and False") + assert False +# I manually sorted the output here: +# job.sp.a=None has neighbors {'a': {False: '260210482a322cd86398136bd3f79f96'}} +# job.sp.a=False has neighbors {'a': {1.2: '26c562927aa486aa8029af535ec39645', None: '542bac9c870e9cd102c3909922945a4d'}} +# job.sp.a=1.2 has neighbors {'a': {1: '42b7b4f2921788ea14dac5566e6f06d0', False: '260210482a322cd86398136bd3f79f96'}} +# job.sp.a=1 has neighbors {'a': {2: '9f8a8e5ba8c70c774d410a9107e2a32b', 1.2: '26c562927aa486aa8029af535ec39645'}} +# job.sp.a=2 has neighbors {'a': {'1': '44550aefb0b85d9db968d11e4fdfa6bc', 1: '42b7b4f2921788ea14dac5566e6f06d0'}} +# job.sp.a='1' has neighbors {'a': {'2': '7f73bfec07cbee1bda5fbaab4b45acd6', 2: '9f8a8e5ba8c70c774d410a9107e2a32b'}} +# job.sp.a='2' has neighbors {'a': {'x': 'f5239c9772076e520bcbef45c51aae76', '1': '44550aefb0b85d9db968d11e4fdfa6bc'}} +# job.sp.a='x' has neighbors {'a': {'y': 'e9257974c07297468e235b1ec5a98174', '2': '7f73bfec07cbee1bda5fbaab4b45acd6'}} +# job.sp.a='y' has neighbors {'a': {(3, 4): '8ddd8542fd23352d5987ab4d73337e52', 'x': 'f5239c9772076e520bcbef45c51aae76'}} +# job.sp.a=[3, 4] has neighbors {'a': {(5, 6): '015c092e565ba53f0c2d9630db3a13ec', 'y': 'e9257974c07297468e235b1ec5a98174'}} +# job.sp.a=[5, 6] has neighbors {'a': {(3, 4): '8ddd8542fd23352d5987ab4d73337e52'}} + +# job.sp.a={'c': 2} has neighbors {'a.c': {}} + +# problem: Not in the main sequence. It takes the place of 1 +# job.sp.a=True has neighbors {'a': {2: '9f8a8e5ba8c70c774d410a9107e2a32b', 1.2: '26c562927aa486aa8029af535ec39645'}} + +# schema doesn't distinguish these + + def test_neighbors_no(self): + self.project.open_job({"a": 1}).init() + self.project.open_job({"b": 1}).init() + neighbors = self.project.get_neighbors() + + for job in self.project: + for v in neighbors[job.id].values(): + assert len(v) == 0 + + def test_neighbors_ignore_dups(self): + a_vals = [1,2] + b_vals = [3,4,5] + for a,b in product(a_vals, b_vals): + self.project.open_job({"a": a, "b": b}).init() + with pytest.raises(ValueError): + self.project.get_neighbors(ignore = "a") + with pytest.raises(ValueError): + self.project.get_neighbors(ignore = "b") + def test_jobs_groupby(self): def get_sp(i): return {"a": i, "b": i % 2, "c": i % 3} @@ -2373,6 +2495,9 @@ def test_no_migration(self): assert len(migrations) == 0 +class TestProjectNeighbors(TestProjectBase): + pass + def _initialize_v1_project(dirname, with_workspace=True, with_other_files=True): # Create v1 config file. cfg_fn = os.path.join(dirname, "signac.rc") From 2715d4652977e5f9c9003c140370282da4f4e9a0 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 29 Jul 2025 11:33:06 -0400 Subject: [PATCH 13/73] Avoid 0 and 1 in neighborlist test because conflated with bools --- tests/test_project.py | 46 ++++++++++++++----------------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 378426911..de1ae713e 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -902,42 +902,26 @@ def test_neighbors_nested(self): elif a == 4: assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id - @pytest.mark.xfail(reason = "Schema doesn't distinguish truthy values. Asserting False til fix.") def test_neighbors_varied_types(self): - a_vals = [True, None, False, 1, "1", "2", 1.2, 1.3, 2, "x", "y", {"c": 2}, [3,4], [5,6]] - a_types = [type(a) for a in a_vals] + # in sort order + # NoneType is first because it's capitalized + a_vals = [None, False, True, 1.2, 1.3, 2, "1", "2", "x", "y", (3,4), (5,6)] + job_ids = [] for a in a_vals: - self.project.open_job({"a": a}).init() + job = self.project.open_job({"a": a}).init() + job_ids.append(job.id) neighbors = self.project.get_neighbors() - print(neighbors) - print(a_types) - for job in self.project: - this_neighbors = neighbors[job.id] - print(f"{job.sp.a=} has neighbors {this_neighbors}") - print("Schema doesn't distinguish 1 and True, 0 and False") - assert False -# I manually sorted the output here: -# job.sp.a=None has neighbors {'a': {False: '260210482a322cd86398136bd3f79f96'}} -# job.sp.a=False has neighbors {'a': {1.2: '26c562927aa486aa8029af535ec39645', None: '542bac9c870e9cd102c3909922945a4d'}} -# job.sp.a=1.2 has neighbors {'a': {1: '42b7b4f2921788ea14dac5566e6f06d0', False: '260210482a322cd86398136bd3f79f96'}} -# job.sp.a=1 has neighbors {'a': {2: '9f8a8e5ba8c70c774d410a9107e2a32b', 1.2: '26c562927aa486aa8029af535ec39645'}} -# job.sp.a=2 has neighbors {'a': {'1': '44550aefb0b85d9db968d11e4fdfa6bc', 1: '42b7b4f2921788ea14dac5566e6f06d0'}} -# job.sp.a='1' has neighbors {'a': {'2': '7f73bfec07cbee1bda5fbaab4b45acd6', 2: '9f8a8e5ba8c70c774d410a9107e2a32b'}} -# job.sp.a='2' has neighbors {'a': {'x': 'f5239c9772076e520bcbef45c51aae76', '1': '44550aefb0b85d9db968d11e4fdfa6bc'}} -# job.sp.a='x' has neighbors {'a': {'y': 'e9257974c07297468e235b1ec5a98174', '2': '7f73bfec07cbee1bda5fbaab4b45acd6'}} -# job.sp.a='y' has neighbors {'a': {(3, 4): '8ddd8542fd23352d5987ab4d73337e52', 'x': 'f5239c9772076e520bcbef45c51aae76'}} -# job.sp.a=[3, 4] has neighbors {'a': {(5, 6): '015c092e565ba53f0c2d9630db3a13ec', 'y': 'e9257974c07297468e235b1ec5a98174'}} -# job.sp.a=[5, 6] has neighbors {'a': {(3, 4): '8ddd8542fd23352d5987ab4d73337e52'}} - -# job.sp.a={'c': 2} has neighbors {'a.c': {}} - -# problem: Not in the main sequence. It takes the place of 1 -# job.sp.a=True has neighbors {'a': {2: '9f8a8e5ba8c70c774d410a9107e2a32b', 1.2: '26c562927aa486aa8029af535ec39645'}} - -# schema doesn't distinguish these + for i,a in enumerate(a_vals): + jobid = job_ids[i] + if i > 0: + prev_val = a_vals[i-1] + assert neighbors[jobid]["a"][prev_val] == job_ids[i-1] + if i < len(a_vals) - 1: + next_val = a_vals[i+1] + assert neighbors[jobid]["a"][next_val] == job_ids[i+1] def test_neighbors_no(self): self.project.open_job({"a": 1}).init() @@ -2497,7 +2481,7 @@ def test_no_migration(self): class TestProjectNeighbors(TestProjectBase): pass - + def _initialize_v1_project(dirname, with_workspace=True, with_other_files=True): # Create v1 config file. cfg_fn = os.path.join(dirname, "signac.rc") From 6b2a66e7b7902d5a07e0a181f44083a009ccfbfe Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 30 Jul 2025 13:05:49 -0400 Subject: [PATCH 14/73] Clean up code that gives duplicates error message --- signac/project.py | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/signac/project.py b/signac/project.py index 2aab32d7d..6ff4a02a8 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1734,37 +1734,28 @@ def prepare_shadow_project(self, ignore: list): Now we have shadowid2 .---> jobid2 \\--> jobid3 """ - shadow_cache = {} # like a state point cache, but for the shadow project - job_to_shadow = {} # goes from job id to shadow id. Call it the projection? + job_projection = {} # goes from job id to shadow id for job in self: shadow_sp = dict(job.cached_statepoint) for ig in ignore: shadow_sp.pop(ig, None) shadow_id = calc_id(shadow_sp) shadow_cache[shadow_id] = shadow_sp - job_to_shadow[job.id] = shadow_id - - if len(set(job_to_shadow.values())) != len(job_to_shadow): - # map that has duplicates - duplicate_map = {} - for k,v in job_to_shadow.items(): - try: - duplicate_map[v].append(k) - except KeyError: - duplicate_map[v] = [k] - # one of the breaking cases - # figure out who breaks - counts = Counter(job_to_shadow.values()) - bads = [] - for k,v in counts.items(): - if v>1: - bads.append(k) - err_str = "\n".join(f"Job ids: {', '.join(duplicate_map[b])}." for b in bads) + job_projection[job.id] = shadow_id + + if len(set(job_projection.values())) != len(job_projection): + # Make a helpful error message for map that has duplicates + shadow_to_job = defaultdict(list) + counts = defaultdict(int) + for job_id, shadow_id in job_projection.items(): + shadow_to_job[shadow_id].append(job_id) + counts[shadow_id] += 1 + bad_jobids = [shadow_to_job[shadow_id] for shadow_id, num in counts.items() if num > 1] + err_str = "\n".join(f"Job ids: {', '.join(j)}." for j in bad_jobids) raise ValueError(f"Ignoring {ignore} makes it impossible to distinguish some jobs:\n{err_str}") - # map from shadow job id to project job id - shadow_map = {v: k for k, v in job_to_shadow.items()} + shadow_map = {v: k for k, v in job_projection.items()} return shadow_map, shadow_cache # key and other_val provided separately to be used with functools.partial From 53c4fab14b858aad7fa0c7761984999f4568c11c Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 30 Jul 2025 13:07:35 -0400 Subject: [PATCH 15/73] Code cleanup --- signac/project.py | 60 +++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/signac/project.py b/signac/project.py index 6ff4a02a8..c58f11f6d 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1665,8 +1665,8 @@ def __setstate__(self, state): state["_lock"] = RLock() self.__dict__.update(state) - def prepare_shadow_project(self, ignore: list): - """Detect neighbors and build cache for shadow project which comes from ignored keys. + def _prepare_shadow_project(self, ignore: list): + """Build cache and mapping for shadow project, which comes from ignored keys. Ignoring a key creates a subset of jobs, now identified with different job ids. Call it shadow job id because we're making a projection of the project. @@ -1840,7 +1840,7 @@ def job_my_neighbor(self, ignore, sorted_schema): pass def neighbors_of_job(self, statepoint, dotted_sp_cache, sorted_schema): - """Return neighbor list of job with jobid. + """Return neighbor list of given state point. dotted_sp_cache must be in dotted key format, which is accessed by calling _nested_dicts_to_dotted_keys on each state point in the cache. @@ -1877,7 +1877,7 @@ def neighbors_of_job(self, statepoint, dotted_sp_cache, sorted_schema): return nearby_entry def shadow_neighbor_list_to_neighbor_list(self, shadow_neighbor_list, shadow_map): - """Replace shadow job ids with actual job ids.""" + """Replace shadow job ids with actual job ids in the neighbor list.""" neighbor_list = dict() for jobid, neighbors in shadow_neighbor_list.items(): this_d = {} @@ -1898,53 +1898,53 @@ def make_neighbor_list(self, shadow_map, dotted_sp_cache, sorted_schema): Map from job id OR shadow job id to state point OR shadow state point in dotted key format sorted_schema : dict Map of keys to their values to search over + + Returns + ------- + neighborlist : dict + """ nearby_jobs = {} for _id, _sp in dotted_sp_cache.items(): - # shadow_job_neighbors = self.neighbors_of_job(_sp, dotted_sp_cache, sorted_schema) - # print(shadow_job_neighbors) nearby_jobs[_id] = self.neighbors_of_job(_sp, dotted_sp_cache, sorted_schema) # {key: shadow_map[shadow_id] for key, shadow_id in shadow_job_neighbors.items()} - # breakpoint() nearby_jobs = self.shadow_neighbor_list_to_neighbor_list(nearby_jobs, shadow_map) - # print(f"neighbors of {_id} are {shadow_job_neighbors}") - # this_d = {} - # #{key: shadow_map[shadow_id] for key, shadow_id in shadow_job_neighbors.items()} - # for key, shadow_id in shadow_job_neighbors.items(): - # if shadow_id != dict(): - # print(f"shadow_id: {shadow_id}") - # this_d[key] = shadow_map[shadow_id] - # nearby_jobs[shadow_map[_id]] = this_d return nearby_jobs def get_neighbors(self, ignore = []): if not isinstance(ignore, list): ignore = [ignore] - if len(ignore) > 0: - _map, _cache = self.prepare_shadow_project(ignore = ignore) - else: - self.update_cache() - _cache = dict(self._sp_cache) # copy - # the state point cache is incompatible with nested key notation - for _id, _sp in _cache.items(): - _cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} - _map = {k : k for k in _cache} + # For each state point parameter, make a flat list sorted by values it takes in the project. + # This is almost like schema, but the schema separates items by type. + # The schema also uses dotted keys. + # To sort between different types, put in order of the name of the type schema = self.detect_schema() sorted_schema = {} - for key in schema: - # sort values by the names of the types + for key, schema_values in schema.items(): tuples_to_sort = [] - for typ in schema[key]: - tuples_to_sort.append((typ.__name__, sorted(schema[key][typ]))) + for type_name in schema_values: + tuples_to_sort.append((type_name.__name__, sorted(schema_values[type_name]))) combined_values = [] for _, v in sorted(tuples_to_sort, key = lambda x: x[0]): combined_values.extend(v) sorted_schema[key] = combined_values need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] if any(a is _DictPlaceholder for a in need_to_ignore): - # reset back to default - ignore = [] warnings.warn("Ignored key not present in project.", RuntimeWarning) + + if len(ignore) > 0: + _map, _cache = self._prepare_shadow_project(ignore = ignore) + # nl = make_neighbor_list(_map, _cache, sorted_schema) + # return self.shadow_neighbor_list_to_neighbor_list(nl, shadow_map) + else: + self.update_cache() + _cache = dict(self._sp_cache) # copy + # the state point cache is incompatible with nested key notation + for _id, _sp in _cache.items(): + _cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} + _map = {k : k for k in _cache} + + return self.make_neighbor_list(_map, _cache, sorted_schema) @contextmanager From 43a7872fd8357fc508faf4ac2433c6df113dee74 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 30 Jul 2025 14:29:22 -0400 Subject: [PATCH 16/73] Only convert from shadow job ids if needed (if ignoring keys) --- signac/project.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/signac/project.py b/signac/project.py index c58f11f6d..6ce29bb47 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1882,34 +1882,29 @@ def shadow_neighbor_list_to_neighbor_list(self, shadow_neighbor_list, shadow_map for jobid, neighbors in shadow_neighbor_list.items(): this_d = {} for neighbor_key, neighbor_vals in neighbors.items(): - # neighbor_list.update({shadow_map[jobid]: {neighbor_key :{k: shadow_map[i] for k,i in neighbor_vals.items()}}}) this_d[neighbor_key] = {k: shadow_map[i] for k,i in neighbor_vals.items()} neighbor_list[shadow_map[jobid]] = this_d return neighbor_list - def make_neighbor_list(self, shadow_map, dotted_sp_cache, sorted_schema): + def build_neighbor_list(self, dotted_sp_cache, sorted_schema): """Iterate over jobs and get neighbors of each job. Parameters ---------- - shadow_map : dict - Map from shadow job id to job id if ignoring certain keys, otherwise the identity map. dotted_sp_cache : dict - Map from job id OR shadow job id to state point OR shadow state point in dotted key format + Map from job id to state point OR shadow job id to shadow state point in dotted key format sorted_schema : dict Map of keys to their values to search over Returns ------- - neighborlist : dict - + neighbor_list : dict + {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}}} """ - nearby_jobs = {} + neighbor_list = {} for _id, _sp in dotted_sp_cache.items(): - nearby_jobs[_id] = self.neighbors_of_job(_sp, dotted_sp_cache, sorted_schema) - # {key: shadow_map[shadow_id] for key, shadow_id in shadow_job_neighbors.items()} - nearby_jobs = self.shadow_neighbor_list_to_neighbor_list(nearby_jobs, shadow_map) - return nearby_jobs + neighbor_list[_id] = self.neighbors_of_job(_sp, dotted_sp_cache, sorted_schema) + return neighbor_list def get_neighbors(self, ignore = []): if not isinstance(ignore, list): @@ -1933,19 +1928,16 @@ def get_neighbors(self, ignore = []): warnings.warn("Ignored key not present in project.", RuntimeWarning) if len(ignore) > 0: - _map, _cache = self._prepare_shadow_project(ignore = ignore) - # nl = make_neighbor_list(_map, _cache, sorted_schema) - # return self.shadow_neighbor_list_to_neighbor_list(nl, shadow_map) + shadow_map, shadow_cache = self._prepare_shadow_project(ignore = ignore) + nl = self.build_neighbor_list(shadow_cache, sorted_schema) + return self.shadow_neighbor_list_to_neighbor_list(nl, shadow_map) else: self.update_cache() _cache = dict(self._sp_cache) # copy # the state point cache is incompatible with nested key notation for _id, _sp in _cache.items(): _cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} - _map = {k : k for k in _cache} - - - return self.make_neighbor_list(_map, _cache, sorted_schema) + return self.build_neighbor_list(_cache, sorted_schema) @contextmanager def TemporaryProject(cls=None, **kwargs): From 9bb408703ce90fb79d7969bfecc91304c309798f Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 31 Jul 2025 12:27:50 -0400 Subject: [PATCH 17/73] Streamline output from search function --- signac/project.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/signac/project.py b/signac/project.py index 6ce29bb47..e5975ca9c 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1825,13 +1825,8 @@ def _search_out(self, search_direction, values, current_index, boundary_index, s if jobid is None: query_index += search_direction else: - break - else: - jobid = None - val = None - # todo - # return {val: jobid} - return jobid, val + return {val: jobid} + return None def job_my_neighbor(self, ignore, sorted_schema): """Prototype going from job to neighbor with minimal mess""" @@ -1865,14 +1860,14 @@ def neighbors_of_job(self, statepoint, dotted_sp_cache, sorted_schema): value_index = schema_values.index(value) # need to pass statepoint by copy search_fun = functools.partial(self._search_cache_for_val, dict(statepoint), dotted_sp_cache, key) - previous_jobid, previous_val = self._search_out(-1, schema_values, value_index, 0, search_fun) - next_jobid, next_val = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) + prev_neighbor = self._search_out(-1, schema_values, value_index, 0, search_fun) + next_neighbor = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) this_d = {} - if next_jobid is not None: - this_d.update({next_val: next_jobid}) - if previous_jobid is not None: - this_d.update({previous_val: previous_jobid}) + if next_neighbor is not None: + this_d.update(next_neighbor) + if prev_neighbor is not None: + this_d.update(prev_neighbor) nearby_entry.update({key: this_d}) return nearby_entry From df9a0bb14ac67c2022ec1135e01afd74e85b77e9 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 31 Jul 2025 12:33:47 -0400 Subject: [PATCH 18/73] Improve function names --- signac/project.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/signac/project.py b/signac/project.py index e5975ca9c..6a13c3aee 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1830,11 +1830,11 @@ def _search_out(self, search_direction, values, current_index, boundary_index, s def job_my_neighbor(self, ignore, sorted_schema): """Prototype going from job to neighbor with minimal mess""" - nl = self.neighbors_of_job() + nl = self.neighbors_of_sp() for key, value in nl: pass - def neighbors_of_job(self, statepoint, dotted_sp_cache, sorted_schema): + def neighbors_of_sp(self, statepoint, dotted_sp_cache, sorted_schema): """Return neighbor list of given state point. dotted_sp_cache must be in dotted key format, which is accessed by calling @@ -1882,7 +1882,7 @@ def shadow_neighbor_list_to_neighbor_list(self, shadow_neighbor_list, shadow_map return neighbor_list def build_neighbor_list(self, dotted_sp_cache, sorted_schema): - """Iterate over jobs and get neighbors of each job. + """Iterate over cached state points and get neighbors of each state point. Parameters ---------- @@ -1898,7 +1898,7 @@ def build_neighbor_list(self, dotted_sp_cache, sorted_schema): """ neighbor_list = {} for _id, _sp in dotted_sp_cache.items(): - neighbor_list[_id] = self.neighbors_of_job(_sp, dotted_sp_cache, sorted_schema) + neighbor_list[_id] = self.neighbors_of_sp(_sp, dotted_sp_cache, sorted_schema) return neighbor_list def get_neighbors(self, ignore = []): From ae89ad875ab052304b121a779babc01bea0d9881 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 31 Jul 2025 12:51:50 -0400 Subject: [PATCH 19/73] Add old neighbor code --- signac/neighbor.py | 163 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 signac/neighbor.py diff --git a/signac/neighbor.py b/signac/neighbor.py new file mode 100644 index 000000000..c062e1f55 --- /dev/null +++ b/signac/neighbor.py @@ -0,0 +1,163 @@ +import signac + +def prepare_shadow_project(project, ignore): + """Detect neighbors and build cache for shadow project which comes from ignored keys. + + Ignoring a key creates a subset of jobs, now identified with different job ids. + Call it shadow job id because we're making a projection of the project. + + We can map from the shadow job id to the actual job id in the use cases identified. + Raise ValueError if this mapping is ill defined. + + We can detect the neighbor list on the shadow project then map it back + to the real project. + + TODO: Belongs in signac core eventually. + + Returns shadow_map, shadow_cache + + shadow_map is a map from shadow job id to project job id. + + shadow_cache is an in-memory state point cache for the shadow project + mapping job id --> shadow state point + + + Use cases: + + 1) Seed that is different for every job. + + 2) State point key that changes in sync with another key. + + Case 1: + + {"a": 1, "b": 2, "seed": 0} -> jobid1 + {"a": 1, "b": 3, "seed": 1} -> jobid2 + {"a": 1, "b": 2} -> shadowid1 + {"a": 1, "b": 3} -> shadowid2 + + shadowid1 <---> jobid1 + shadowid2 <---> jobid2 + + Breaking case 1 with repeated shadow jobs + {"a": 1, "b": 2, "seed": 0} -> jobid1 + {"a": 1, "b": 3, "seed": 1} -> jobid2 + {"a": 1, "b": 3, "seed": 2} -> jobid3 + + {"a": 1, "b": 2} -> shadowid1 + {"a": 1, "b": 3} -> shadowid2 + {"a": 1, "b": 3} -> shadowid2 *conflict* No longer bijection. Maybe we can just keep track of these? Should be few cases. + Now we have shadowid2 .---> jobid2 + \\--> jobid3 + + Case 2: + + {"a1": 10, "a2": 20} -> jobid1 + {"a1": 2, "a2": 4} -> jobid2 + + {"a1": 10} -> shadowid1 + {"a1": 2} -> shadowid2 + + Can still make the mapping between ids. + + Breaking case 2: + {"a1": 10, "a2": 20} -> jobid1 + {"a1": 2, "a2": 4} -> jobid2 + {"a1": 2, "a2": 5} -> jobid3 + + {"a1": 10} -> shadowid1 + {"a1": 2} -> shadowid2 + {"a1": 2} -> shadowid2 -- + Now we have shadowid2 .---> jobid2 + \\--> jobid3 + """ + + shadow_cache = {} # like a state point cache + job_to_shadow = {} # goes from job id to shadow. Call it the projection? + for job in project: + shadow_sp = dict(job.cached_statepoint) + for ig in ignore: + shadow_sp.pop(ig, None) + shadow_id = calc_id(shadow_sp) + shadow_cache[shadow_id] = shadow_sp + job_to_shadow[job.id] = shadow_id + + if len(set(job_to_shadow.values())) != len(job_to_shadow): + # map that has duplicates + duplicate_map = {} + for k,v in job_to_shadow.items(): + try: + duplicate_map[v].append(k) + except KeyError: + duplicate_map[v] = [k] + # one of the breaking cases + # figure out who breaks + counts = Counter(job_to_shadow.values()) + bads = [] + for k,v in counts.items(): + if v>1: + bads.append(k) + err_str = "\n".join(f"Job ids: {', '.join(duplicate_map[b])}." for b in bads) + raise ValueError(f"Ignoring key '{self.ignore}' makes it impossible to distinguish some jobs:\n{err_str}") + shadow_map = {v: k for k, v in job_to_shadow.items()} + return shadow_map, shadow_cache + +def make_neighbor_list(self, my_map, my_cache, _sorted_schema): + nearby_jobs = {} + for _id, _sp in my_cache.items(): + nearby_entry = {} + for key, schema_values in _sorted_schema.items(): # from project + # allow comparison with output of schema, which is hashable + value = _to_hashable(_sp.get(key, _DictPlaceholder)) + if value is _DictPlaceholder: + # Possible if schema is heterogeneous + continue + + value_index = schema_values.index(value) + # need to pass _sp by copy + search_fun = functools.partial(self._search_cache_for_val, dict(_sp), my_cache, key) + previous_job = self._search_out(-1, schema_values, value_index, 0, search_fun) + next_job = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) + + this_d = {} + if next_job[0] is not None: + this_d.update({next_job[1]: my_map[next_job[0]]}) + if previous_job[0] is not None: + this_d.update({previous_job[1]: my_map[previous_job[0]]}) + nearby_entry.update({key: this_d}) + nearby_jobs[my_map[_id]] = nearby_entry + return nearby_jobs + +def _search_out(self, direction_multiplier, values, current_index, boundary, search_fun): + """Search values towards boundary from current_index using search_fun. + + :param direction_multiplier: 1 means search in the positive direction from the index + :param values: iterator of values to index into + :param current_index: index in values to start searching from. + The value at this index is not accessed directly. + :param search_fun: function taking 1 argument returning jobid if there is a match + :param boundary: the index at which to stop + :param search_fun: function that decides if value exists in project + + """ + + query_index = current_index + direction_multiplier + # search either query_index >= low_boundary or query_index <= high_boundary + while direction_multiplier * query_index <= boundary * direction_multiplier: + val = values[query_index] + jobid = search_fun(val) + if jobid is None: + query_index += direction_multiplier + else: + break + else: + jobid = None + val = None + return jobid, val + +def _search_cache_for_val(self, sp_dict, cache, key, other_val): + sp_dict.update({key: other_val}) + other_job_id = calc_id(sp_dict) + if other_job_id in cache: + return other_job_id + else: + return None From 30a0478037fd1266d33a7418e3c6c8ee7356da63 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 31 Jul 2025 12:52:00 -0400 Subject: [PATCH 20/73] Move neighbor code to separate module --- signac/neighbor.py | 242 +++++++++++++++++++++++++++++---------------- 1 file changed, 157 insertions(+), 85 deletions(-) diff --git a/signac/neighbor.py b/signac/neighbor.py index c062e1f55..7170ff12e 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -1,7 +1,5 @@ -import signac - -def prepare_shadow_project(project, ignore): - """Detect neighbors and build cache for shadow project which comes from ignored keys. +def _prepare_shadow_project(self, ignore: list): + """Build cache and mapping for shadow project, which comes from ignored keys. Ignoring a key creates a subset of jobs, now identified with different job ids. Call it shadow job id because we're making a projection of the project. @@ -12,14 +10,12 @@ def prepare_shadow_project(project, ignore): We can detect the neighbor list on the shadow project then map it back to the real project. - TODO: Belongs in signac core eventually. - Returns shadow_map, shadow_cache shadow_map is a map from shadow job id to project job id. shadow_cache is an in-memory state point cache for the shadow project - mapping job id --> shadow state point + mapping shadow job id --> shadow state point Use cases: @@ -38,14 +34,15 @@ def prepare_shadow_project(project, ignore): shadowid1 <---> jobid1 shadowid2 <---> jobid2 - Breaking case 1 with repeated shadow jobs + Breaking case 1 with repeated shadow jobs: + {"a": 1, "b": 2, "seed": 0} -> jobid1 {"a": 1, "b": 3, "seed": 1} -> jobid2 {"a": 1, "b": 3, "seed": 2} -> jobid3 {"a": 1, "b": 2} -> shadowid1 {"a": 1, "b": 3} -> shadowid2 - {"a": 1, "b": 3} -> shadowid2 *conflict* No longer bijection. Maybe we can just keep track of these? Should be few cases. + {"a": 1, "b": 3} -> shadowid2 *conflict* No longer bijection. Now we have shadowid2 .---> jobid2 \\--> jobid3 @@ -70,94 +67,169 @@ def prepare_shadow_project(project, ignore): Now we have shadowid2 .---> jobid2 \\--> jobid3 """ - - shadow_cache = {} # like a state point cache - job_to_shadow = {} # goes from job id to shadow. Call it the projection? - for job in project: + shadow_cache = {} # like a state point cache, but for the shadow project + job_projection = {} # goes from job id to shadow id + for job in self: shadow_sp = dict(job.cached_statepoint) for ig in ignore: shadow_sp.pop(ig, None) shadow_id = calc_id(shadow_sp) shadow_cache[shadow_id] = shadow_sp - job_to_shadow[job.id] = shadow_id - - if len(set(job_to_shadow.values())) != len(job_to_shadow): - # map that has duplicates - duplicate_map = {} - for k,v in job_to_shadow.items(): - try: - duplicate_map[v].append(k) - except KeyError: - duplicate_map[v] = [k] - # one of the breaking cases - # figure out who breaks - counts = Counter(job_to_shadow.values()) - bads = [] - for k,v in counts.items(): - if v>1: - bads.append(k) - err_str = "\n".join(f"Job ids: {', '.join(duplicate_map[b])}." for b in bads) - raise ValueError(f"Ignoring key '{self.ignore}' makes it impossible to distinguish some jobs:\n{err_str}") - shadow_map = {v: k for k, v in job_to_shadow.items()} + job_projection[job.id] = shadow_id + + if len(set(job_projection.values())) != len(job_projection): + # Make a helpful error message for map that has duplicates + shadow_to_job = defaultdict(list) + counts = defaultdict(int) + for job_id, shadow_id in job_projection.items(): + shadow_to_job[shadow_id].append(job_id) + counts[shadow_id] += 1 + bad_jobids = [shadow_to_job[shadow_id] for shadow_id, num in counts.items() if num > 1] + err_str = "\n".join(f"Job ids: {', '.join(j)}." for j in bad_jobids) + raise ValueError(f"Ignoring {ignore} makes it impossible to distinguish some jobs:\n{err_str}") + # map from shadow job id to project job id + shadow_map = {v: k for k, v in job_projection.items()} return shadow_map, shadow_cache - -def make_neighbor_list(self, my_map, my_cache, _sorted_schema): - nearby_jobs = {} - for _id, _sp in my_cache.items(): - nearby_entry = {} - for key, schema_values in _sorted_schema.items(): # from project - # allow comparison with output of schema, which is hashable - value = _to_hashable(_sp.get(key, _DictPlaceholder)) - if value is _DictPlaceholder: - # Possible if schema is heterogeneous - continue - - value_index = schema_values.index(value) - # need to pass _sp by copy - search_fun = functools.partial(self._search_cache_for_val, dict(_sp), my_cache, key) - previous_job = self._search_out(-1, schema_values, value_index, 0, search_fun) - next_job = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) - - this_d = {} - if next_job[0] is not None: - this_d.update({next_job[1]: my_map[next_job[0]]}) - if previous_job[0] is not None: - this_d.update({previous_job[1]: my_map[previous_job[0]]}) - nearby_entry.update({key: this_d}) - nearby_jobs[my_map[_id]] = nearby_entry - return nearby_jobs - -def _search_out(self, direction_multiplier, values, current_index, boundary, search_fun): - """Search values towards boundary from current_index using search_fun. - - :param direction_multiplier: 1 means search in the positive direction from the index - :param values: iterator of values to index into - :param current_index: index in values to start searching from. - The value at this index is not accessed directly. - :param search_fun: function taking 1 argument returning jobid if there is a match - :param boundary: the index at which to stop - :param search_fun: function that decides if value exists in project +# key and other_val provided separately to be used with functools.partial +def _search_cache_for_val(self, sp_dict, cache, key, other_val): + """Return job id of similar job if present in cache. + + The similar job is obtained by modifying sp_dict to + include {key: other_val}. + + Internally converts sp_dict from dotted keys to nested dicts format. + + Parameters + ---------- + sp_dict : dict + sp_dict must not be a reference to a state point because it will be + modified in this function + cache : dict + state point cache + key : str + The key whose value to change + other_val + The new value of key to search for + + Returns + ------- + job id of similar job + None, if not present """ + sp_dict.update({key: other_val}) + # schema output not compatible with dotted key notation + sp_dict = _dotted_dict_to_nested_dicts(sp_dict) + other_job_id = calc_id(sp_dict) + if other_job_id in cache: + return other_job_id + else: + return None - query_index = current_index + direction_multiplier +def _search_out(self, search_direction, values, current_index, boundary_index, search_fun): + """Search in values towards boundary_index from current_index using search_fun. + + Parameters + ---------- + search_direction : int, 1 or -1 + 1 means search in the positive direction from the index + values : iterable + values to index into when searching + current_index : int + index into values to start searching from. + The value at this index is not accessed directly. + boundary_index : int + the index at which to stop + search_fun : function + unary function returning jobid if it exists and None otherwise + + Returns + ------- + Tuple of (jobid, val) + jobid : str + job id of the nearest job in the search_direction + val : value of the key at the neighbor jobid + """ + query_index = current_index + search_direction # search either query_index >= low_boundary or query_index <= high_boundary - while direction_multiplier * query_index <= boundary * direction_multiplier: + while search_direction * query_index <= boundary_index * search_direction: val = values[query_index] jobid = search_fun(val) if jobid is None: - query_index += direction_multiplier + query_index += search_direction else: - break - else: - jobid = None - val = None - return jobid, val + return {val: jobid} + return None + +def job_my_neighbor(self, ignore, sorted_schema): + """Prototype going from job to neighbor with minimal mess""" + nl = self.neighbors_of_sp() + for key, value in nl: + pass + +def neighbors_of_sp(self, statepoint, dotted_sp_cache, sorted_schema): + """Return neighbor list of given state point. + + dotted_sp_cache must be in dotted key format, which is accessed by calling + _nested_dicts_to_dotted_keys on each state point in the cache. + + Parameters + ---------- + statepoint : dict + Place to search from. Could be the shadow state point. + dotted_sp_cache : dict + Map from job id OR shadow job id to state point OR shadow state point in dotted key format + sorted_schema : dict + Map from key (in dotted notation) to sorted values of the key to search over + """ -def _search_cache_for_val(self, sp_dict, cache, key, other_val): - sp_dict.update({key: other_val}) - other_job_id = calc_id(sp_dict) - if other_job_id in cache: - return other_job_id - else: - return None + nearby_entry = {} + for key, schema_values in sorted_schema.items(): # from project + # allow comparison with output of schema, which is hashable + value = _to_hashable(statepoint.get(key, _DictPlaceholder)) + if value is _DictPlaceholder: + # Possible if schema is heterogeneous + continue + value_index = schema_values.index(value) + # need to pass statepoint by copy + search_fun = functools.partial(self._search_cache_for_val, dict(statepoint), dotted_sp_cache, key) + prev_neighbor = self._search_out(-1, schema_values, value_index, 0, search_fun) + next_neighbor = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) + + this_d = {} + if next_neighbor is not None: + this_d.update(next_neighbor) + if prev_neighbor is not None: + this_d.update(prev_neighbor) + nearby_entry.update({key: this_d}) + return nearby_entry + +def shadow_neighbor_list_to_neighbor_list(self, shadow_neighbor_list, shadow_map): + """Replace shadow job ids with actual job ids in the neighbor list.""" + neighbor_list = dict() + for jobid, neighbors in shadow_neighbor_list.items(): + this_d = {} + for neighbor_key, neighbor_vals in neighbors.items(): + this_d[neighbor_key] = {k: shadow_map[i] for k,i in neighbor_vals.items()} + neighbor_list[shadow_map[jobid]] = this_d + return neighbor_list + +def build_neighbor_list(self, dotted_sp_cache, sorted_schema): + """Iterate over cached state points and get neighbors of each state point. + + Parameters + ---------- + dotted_sp_cache : dict + Map from job id to state point OR shadow job id to shadow state point in dotted key format + sorted_schema : dict + Map of keys to their values to search over + + Returns + ------- + neighbor_list : dict + {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}}} + """ + neighbor_list = {} + for _id, _sp in dotted_sp_cache.items(): + neighbor_list[_id] = self.neighbors_of_sp(_sp, dotted_sp_cache, sorted_schema) + return neighbor_list From 2e04fe4f42420cd0b963b6311f9ca573eec2bbfc Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 31 Jul 2025 14:41:59 -0400 Subject: [PATCH 21/73] Remove internal neighbor code from Project class --- signac/neighbor.py | 37 ++++--- signac/project.py | 255 +++------------------------------------------ 2 files changed, 36 insertions(+), 256 deletions(-) diff --git a/signac/neighbor.py b/signac/neighbor.py index 7170ff12e..a5ca8046d 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -1,4 +1,11 @@ -def _prepare_shadow_project(self, ignore: list): +from functools import partial +from collections import defaultdict + +from .job import calc_id +from ._utility import _to_hashable, _dotted_dict_to_nested_dicts +from ._search_indexer import _DictPlaceholder + +def prepare_shadow_project(sp_cache, ignore: list): """Build cache and mapping for shadow project, which comes from ignored keys. Ignoring a key creates a subset of jobs, now identified with different job ids. @@ -69,13 +76,13 @@ def _prepare_shadow_project(self, ignore: list): """ shadow_cache = {} # like a state point cache, but for the shadow project job_projection = {} # goes from job id to shadow id - for job in self: - shadow_sp = dict(job.cached_statepoint) + for jobid, sp in sp_cache.items(): + shadow_sp = dict(sp) for ig in ignore: shadow_sp.pop(ig, None) shadow_id = calc_id(shadow_sp) shadow_cache[shadow_id] = shadow_sp - job_projection[job.id] = shadow_id + job_projection[jobid] = shadow_id if len(set(job_projection.values())) != len(job_projection): # Make a helpful error message for map that has duplicates @@ -92,7 +99,7 @@ def _prepare_shadow_project(self, ignore: list): return shadow_map, shadow_cache # key and other_val provided separately to be used with functools.partial -def _search_cache_for_val(self, sp_dict, cache, key, other_val): +def _search_cache_for_val(sp_dict, cache, key, other_val): """Return job id of similar job if present in cache. The similar job is obtained by modifying sp_dict to @@ -126,7 +133,7 @@ def _search_cache_for_val(self, sp_dict, cache, key, other_val): else: return None -def _search_out(self, search_direction, values, current_index, boundary_index, search_fun): +def _search_out(search_direction, values, current_index, boundary_index, search_fun): """Search in values towards boundary_index from current_index using search_fun. Parameters @@ -161,13 +168,13 @@ def _search_out(self, search_direction, values, current_index, boundary_index, s return {val: jobid} return None -def job_my_neighbor(self, ignore, sorted_schema): +def job_my_neighbor(ignore, sorted_schema): """Prototype going from job to neighbor with minimal mess""" - nl = self.neighbors_of_sp() + nl = neighbors_of_sp() for key, value in nl: pass -def neighbors_of_sp(self, statepoint, dotted_sp_cache, sorted_schema): +def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): """Return neighbor list of given state point. dotted_sp_cache must be in dotted key format, which is accessed by calling @@ -192,9 +199,9 @@ def neighbors_of_sp(self, statepoint, dotted_sp_cache, sorted_schema): continue value_index = schema_values.index(value) # need to pass statepoint by copy - search_fun = functools.partial(self._search_cache_for_val, dict(statepoint), dotted_sp_cache, key) - prev_neighbor = self._search_out(-1, schema_values, value_index, 0, search_fun) - next_neighbor = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) + search_fun = partial(_search_cache_for_val, dict(statepoint), dotted_sp_cache, key) + prev_neighbor = _search_out(-1, schema_values, value_index, 0, search_fun) + next_neighbor = _search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) this_d = {} if next_neighbor is not None: @@ -204,7 +211,7 @@ def neighbors_of_sp(self, statepoint, dotted_sp_cache, sorted_schema): nearby_entry.update({key: this_d}) return nearby_entry -def shadow_neighbor_list_to_neighbor_list(self, shadow_neighbor_list, shadow_map): +def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): """Replace shadow job ids with actual job ids in the neighbor list.""" neighbor_list = dict() for jobid, neighbors in shadow_neighbor_list.items(): @@ -214,7 +221,7 @@ def shadow_neighbor_list_to_neighbor_list(self, shadow_neighbor_list, shadow_map neighbor_list[shadow_map[jobid]] = this_d return neighbor_list -def build_neighbor_list(self, dotted_sp_cache, sorted_schema): +def build_neighbor_list(dotted_sp_cache, sorted_schema): """Iterate over cached state points and get neighbors of each state point. Parameters @@ -231,5 +238,5 @@ def build_neighbor_list(self, dotted_sp_cache, sorted_schema): """ neighbor_list = {} for _id, _sp in dotted_sp_cache.items(): - neighbor_list[_id] = self.neighbors_of_sp(_sp, dotted_sp_cache, sorted_schema) + neighbor_list[_id] = neighbors_of_sp(_sp, dotted_sp_cache, sorted_schema) return neighbor_list diff --git a/signac/project.py b/signac/project.py index 6a13c3aee..208a56796 100644 --- a/signac/project.py +++ b/signac/project.py @@ -33,6 +33,7 @@ _raise_if_older_schema, _read_config_file, ) +from .neighbor import build_neighbor_list, shadow_neighbor_list_to_neighbor_list, prepare_shadow_project from ._search_indexer import _SearchIndexer, _DictPlaceholder from ._utility import _mkdir_p, _nested_dicts_to_dotted_keys, _to_hashable, _dotted_dict_to_nested_dicts from .errors import ( @@ -1665,241 +1666,11 @@ def __setstate__(self, state): state["_lock"] = RLock() self.__dict__.update(state) - def _prepare_shadow_project(self, ignore: list): - """Build cache and mapping for shadow project, which comes from ignored keys. - - Ignoring a key creates a subset of jobs, now identified with different job ids. - Call it shadow job id because we're making a projection of the project. - - We can map from the shadow job id to the actual job id in the use cases identified. - Raise ValueError if this mapping is ill defined. - - We can detect the neighbor list on the shadow project then map it back - to the real project. - - Returns shadow_map, shadow_cache - - shadow_map is a map from shadow job id to project job id. - - shadow_cache is an in-memory state point cache for the shadow project - mapping shadow job id --> shadow state point - - - Use cases: - - 1) Seed that is different for every job. - - 2) State point key that changes in sync with another key. - - Case 1: - - {"a": 1, "b": 2, "seed": 0} -> jobid1 - {"a": 1, "b": 3, "seed": 1} -> jobid2 - {"a": 1, "b": 2} -> shadowid1 - {"a": 1, "b": 3} -> shadowid2 - - shadowid1 <---> jobid1 - shadowid2 <---> jobid2 - - Breaking case 1 with repeated shadow jobs: - - {"a": 1, "b": 2, "seed": 0} -> jobid1 - {"a": 1, "b": 3, "seed": 1} -> jobid2 - {"a": 1, "b": 3, "seed": 2} -> jobid3 - - {"a": 1, "b": 2} -> shadowid1 - {"a": 1, "b": 3} -> shadowid2 - {"a": 1, "b": 3} -> shadowid2 *conflict* No longer bijection. - Now we have shadowid2 .---> jobid2 - \\--> jobid3 - - Case 2: - - {"a1": 10, "a2": 20} -> jobid1 - {"a1": 2, "a2": 4} -> jobid2 - - {"a1": 10} -> shadowid1 - {"a1": 2} -> shadowid2 - - Can still make the mapping between ids. - - Breaking case 2: - {"a1": 10, "a2": 20} -> jobid1 - {"a1": 2, "a2": 4} -> jobid2 - {"a1": 2, "a2": 5} -> jobid3 - - {"a1": 10} -> shadowid1 - {"a1": 2} -> shadowid2 - {"a1": 2} -> shadowid2 -- - Now we have shadowid2 .---> jobid2 - \\--> jobid3 - """ - shadow_cache = {} # like a state point cache, but for the shadow project - job_projection = {} # goes from job id to shadow id - for job in self: - shadow_sp = dict(job.cached_statepoint) - for ig in ignore: - shadow_sp.pop(ig, None) - shadow_id = calc_id(shadow_sp) - shadow_cache[shadow_id] = shadow_sp - job_projection[job.id] = shadow_id - - if len(set(job_projection.values())) != len(job_projection): - # Make a helpful error message for map that has duplicates - shadow_to_job = defaultdict(list) - counts = defaultdict(int) - for job_id, shadow_id in job_projection.items(): - shadow_to_job[shadow_id].append(job_id) - counts[shadow_id] += 1 - bad_jobids = [shadow_to_job[shadow_id] for shadow_id, num in counts.items() if num > 1] - err_str = "\n".join(f"Job ids: {', '.join(j)}." for j in bad_jobids) - raise ValueError(f"Ignoring {ignore} makes it impossible to distinguish some jobs:\n{err_str}") - # map from shadow job id to project job id - shadow_map = {v: k for k, v in job_projection.items()} - return shadow_map, shadow_cache - - # key and other_val provided separately to be used with functools.partial - def _search_cache_for_val(self, sp_dict, cache, key, other_val): - """Return job id of similar job if present in cache. - - The similar job is obtained by modifying sp_dict to - include {key: other_val}. - - Internally converts sp_dict from dotted keys to nested dicts format. - - Parameters - ---------- - sp_dict : dict - sp_dict must not be a reference to a state point because it will be - modified in this function - cache : dict - state point cache - key : str - The key whose value to change - other_val - The new value of key to search for - - Returns - ------- - job id of similar job - None, if not present - """ - sp_dict.update({key: other_val}) - # schema output not compatible with dotted key notation - sp_dict = _dotted_dict_to_nested_dicts(sp_dict) - other_job_id = calc_id(sp_dict) - if other_job_id in cache: - return other_job_id - else: - return None - - def _search_out(self, search_direction, values, current_index, boundary_index, search_fun): - """Search in values towards boundary_index from current_index using search_fun. - - Parameters - ---------- - search_direction : int, 1 or -1 - 1 means search in the positive direction from the index - values : iterable - values to index into when searching - current_index : int - index into values to start searching from. - The value at this index is not accessed directly. - boundary_index : int - the index at which to stop - search_fun : function - unary function returning jobid if it exists and None otherwise - - Returns - ------- - Tuple of (jobid, val) - jobid : str - job id of the nearest job in the search_direction - val : value of the key at the neighbor jobid - """ - query_index = current_index + search_direction - # search either query_index >= low_boundary or query_index <= high_boundary - while search_direction * query_index <= boundary_index * search_direction: - val = values[query_index] - jobid = search_fun(val) - if jobid is None: - query_index += search_direction - else: - return {val: jobid} - return None - - def job_my_neighbor(self, ignore, sorted_schema): - """Prototype going from job to neighbor with minimal mess""" - nl = self.neighbors_of_sp() - for key, value in nl: - pass - - def neighbors_of_sp(self, statepoint, dotted_sp_cache, sorted_schema): - """Return neighbor list of given state point. - - dotted_sp_cache must be in dotted key format, which is accessed by calling - _nested_dicts_to_dotted_keys on each state point in the cache. - - Parameters - ---------- - statepoint : dict - Place to search from. Could be the shadow state point. - dotted_sp_cache : dict - Map from job id OR shadow job id to state point OR shadow state point in dotted key format - sorted_schema : dict - Map from key (in dotted notation) to sorted values of the key to search over - """ - - nearby_entry = {} - for key, schema_values in sorted_schema.items(): # from project - # allow comparison with output of schema, which is hashable - value = _to_hashable(statepoint.get(key, _DictPlaceholder)) - if value is _DictPlaceholder: - # Possible if schema is heterogeneous - continue - value_index = schema_values.index(value) - # need to pass statepoint by copy - search_fun = functools.partial(self._search_cache_for_val, dict(statepoint), dotted_sp_cache, key) - prev_neighbor = self._search_out(-1, schema_values, value_index, 0, search_fun) - next_neighbor = self._search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) - - this_d = {} - if next_neighbor is not None: - this_d.update(next_neighbor) - if prev_neighbor is not None: - this_d.update(prev_neighbor) - nearby_entry.update({key: this_d}) - return nearby_entry - - def shadow_neighbor_list_to_neighbor_list(self, shadow_neighbor_list, shadow_map): - """Replace shadow job ids with actual job ids in the neighbor list.""" - neighbor_list = dict() - for jobid, neighbors in shadow_neighbor_list.items(): - this_d = {} - for neighbor_key, neighbor_vals in neighbors.items(): - this_d[neighbor_key] = {k: shadow_map[i] for k,i in neighbor_vals.items()} - neighbor_list[shadow_map[jobid]] = this_d - return neighbor_list - - def build_neighbor_list(self, dotted_sp_cache, sorted_schema): - """Iterate over cached state points and get neighbors of each state point. - - Parameters - ---------- - dotted_sp_cache : dict - Map from job id to state point OR shadow job id to shadow state point in dotted key format - sorted_schema : dict - Map of keys to their values to search over - - Returns - ------- - neighbor_list : dict - {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}}} - """ - neighbor_list = {} - for _id, _sp in dotted_sp_cache.items(): - neighbor_list[_id] = self.neighbors_of_sp(_sp, dotted_sp_cache, sorted_schema) - return neighbor_list + # def job_my_neighbor(self, ignore, sorted_schema): + # """Prototype going from job to neighbor with minimal mess""" + # nl = self.neighbors_of_sp() + # for key, value in nl: + # pass def get_neighbors(self, ignore = []): if not isinstance(ignore, list): @@ -1922,17 +1693,19 @@ def get_neighbors(self, ignore = []): if any(a is _DictPlaceholder for a in need_to_ignore): warnings.warn("Ignored key not present in project.", RuntimeWarning) + self.update_cache() + _cache = dict(self._sp_cache) # copy + if len(ignore) > 0: - shadow_map, shadow_cache = self._prepare_shadow_project(ignore = ignore) - nl = self.build_neighbor_list(shadow_cache, sorted_schema) - return self.shadow_neighbor_list_to_neighbor_list(nl, shadow_map) + shadow_map, shadow_cache = prepare_shadow_project(_cache, ignore = ignore) + nl = build_neighbor_list(shadow_cache, sorted_schema) + return shadow_neighbor_list_to_neighbor_list(nl, shadow_map) else: - self.update_cache() - _cache = dict(self._sp_cache) # copy + # the state point cache is incompatible with nested key notation for _id, _sp in _cache.items(): _cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} - return self.build_neighbor_list(_cache, sorted_schema) + return build_neighbor_list(_cache, sorted_schema) @contextmanager def TemporaryProject(cls=None, **kwargs): From bc371ddbc58debb2da4e0f25c39d936aacea7d79 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 31 Jul 2025 15:25:47 -0400 Subject: [PATCH 22/73] Prototype API for accessing 1 job's neighbors --- signac/job.py | 12 +++++++++--- signac/neighbor.py | 6 ------ signac/project.py | 26 +++++++++++++------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/signac/job.py b/signac/job.py index 7668e285f..a0d8a7f20 100644 --- a/signac/job.py +++ b/signac/job.py @@ -27,6 +27,7 @@ from .h5store import H5StoreManager from .sync import sync_jobs + logger = logging.getLogger(__name__) @@ -979,6 +980,14 @@ def close(self): except IndexError: pass + def neighbors(self, ignore = []): + """Prototype going from job to neighbor with minimal mess""" + from .neighbor import neighbors_of_sp + sp_cache = self._project._sp_cache + sorted_schema = self._project.flat_schema() + neighbors = neighbors_of_sp(self.cached_statepoint, sp_cache, sorted_schema) + return neighbors + def __enter__(self): self.open() return self @@ -1013,6 +1022,3 @@ def __deepcopy__(self, memo): result._lock = RLock() return result - def neighbors(self, ignore=None): - # TODO, do we expose this to each job? - pass diff --git a/signac/neighbor.py b/signac/neighbor.py index a5ca8046d..7c02dc515 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -168,12 +168,6 @@ def _search_out(search_direction, values, current_index, boundary_index, search_ return {val: jobid} return None -def job_my_neighbor(ignore, sorted_schema): - """Prototype going from job to neighbor with minimal mess""" - nl = neighbors_of_sp() - for key, value in nl: - pass - def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): """Return neighbor list of given state point. diff --git a/signac/project.py b/signac/project.py index 208a56796..a80cfd381 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1666,19 +1666,12 @@ def __setstate__(self, state): state["_lock"] = RLock() self.__dict__.update(state) - # def job_my_neighbor(self, ignore, sorted_schema): - # """Prototype going from job to neighbor with minimal mess""" - # nl = self.neighbors_of_sp() - # for key, value in nl: - # pass - - def get_neighbors(self, ignore = []): - if not isinstance(ignore, list): - ignore = [ignore] - # For each state point parameter, make a flat list sorted by values it takes in the project. - # This is almost like schema, but the schema separates items by type. - # The schema also uses dotted keys. - # To sort between different types, put in order of the name of the type + def flat_schema(self): + """For each state point parameter, make a flat list sorted by values it takes in the project. + This is almost like schema, but the schema separates items by type. + The schema also uses dotted keys. + To sort between different types, put in order of the name of the type + """ schema = self.detect_schema() sorted_schema = {} for key, schema_values in schema.items(): @@ -1689,6 +1682,13 @@ def get_neighbors(self, ignore = []): for _, v in sorted(tuples_to_sort, key = lambda x: x[0]): combined_values.extend(v) sorted_schema[key] = combined_values + return sorted_schema + + def get_neighbors(self, ignore = []): + if not isinstance(ignore, list): + ignore = [ignore] + + sorted_schema = self.flat_schema() need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] if any(a is _DictPlaceholder for a in need_to_ignore): warnings.warn("Ignored key not present in project.", RuntimeWarning) From 2786117deae228357f941585695d19db61b74135 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Mon, 4 Aug 2025 17:15:45 -0400 Subject: [PATCH 23/73] Add shell command to print neighbors of job by id --- signac/__main__.py | 20 +++++++++++++++----- signac/job.py | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/signac/__main__.py b/signac/__main__.py index 19b29ba2c..87bcfb45e 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -196,14 +196,12 @@ def main_statepoint(args): else: print(json.dumps(job.statepoint(), indent=args.indent, sort_keys=args.sort)) -def main_neigbors(args): - # TODO +def main_neighbors(args): project = get_project() if args.job_id: jobs = (_open_job_by_id(project, jid) for jid in args.job_id) for job in jobs: - print(job.get_neighbors()) - pass + pprint(job.get_neighbors()) def main_document(args): @@ -216,7 +214,6 @@ def main_document(args): else: print(json.dumps(job.document(), indent=args.indent, sort_keys=args.sort)) - def main_remove(args): """Handle remove subcommand.""" project = get_project() @@ -976,6 +973,19 @@ def main(): ) parser_statepoint.set_defaults(func=main_statepoint) + parser_neighbor = subparsers.add_parser( + "neighbors", + description = "Print the neighbors of the job" + ) + parser_neighbor.add_argument( + "job_id", + nargs="*", + type=str, + help="One or more job ids. The corresponding jobs must be initialized.", + ) + parser_neighbor.set_defaults(func=main_neighbors) + + parser_diff = subparsers.add_parser( "diff", description="Find the difference among job state points." ) diff --git a/signac/job.py b/signac/job.py index a0d8a7f20..05d365cae 100644 --- a/signac/job.py +++ b/signac/job.py @@ -980,7 +980,7 @@ def close(self): except IndexError: pass - def neighbors(self, ignore = []): + def get_neighbors(self, ignore = []): """Prototype going from job to neighbor with minimal mess""" from .neighbor import neighbors_of_sp sp_cache = self._project._sp_cache From 6176290e49d047464051dd8e25d482d1fad527c6 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 10:29:31 -0400 Subject: [PATCH 24/73] Update docstrings --- signac/neighbor.py | 36 ++++++++++++++++++++++++------------ signac/project.py | 21 ++++++++++++++++++++- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/signac/neighbor.py b/signac/neighbor.py index 7c02dc515..7546ed1f5 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -9,7 +9,7 @@ def prepare_shadow_project(sp_cache, ignore: list): """Build cache and mapping for shadow project, which comes from ignored keys. Ignoring a key creates a subset of jobs, now identified with different job ids. - Call it shadow job id because we're making a projection of the project. + Call it "shadow" job id because we're making a projection of the project. We can map from the shadow job id to the actual job id in the use cases identified. Raise ValueError if this mapping is ill defined. @@ -100,20 +100,19 @@ def prepare_shadow_project(sp_cache, ignore: list): # key and other_val provided separately to be used with functools.partial def _search_cache_for_val(sp_dict, cache, key, other_val): - """Return job id of similar job if present in cache. + """Return job id of a job similar to sp_dict if present in cache. - The similar job is obtained by modifying sp_dict to - include {key: other_val}. + The similar job is obtained by modifying sp_dict to include {key: other_val}. Internally converts sp_dict from dotted keys to nested dicts format. Parameters ---------- sp_dict : dict - sp_dict must not be a reference to a state point because it will be - modified in this function + state point of job to modify. sp_dict must not be a reference to a state point because it + will be modified in this function cache : dict - state point cache + project state point cache to search in key : str The key whose value to change other_val @@ -152,10 +151,12 @@ def _search_out(search_direction, values, current_index, boundary_index, search_ Returns ------- - Tuple of (jobid, val) + {val: jobid} if jobid found per search_fun jobid : str job id of the nearest job in the search_direction val : value of the key at the neighbor jobid + + None otherwise """ query_index = current_index + search_direction # search either query_index >= low_boundary or query_index <= high_boundary @@ -169,7 +170,9 @@ def _search_out(search_direction, values, current_index, boundary_index, search_ return None def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): - """Return neighbor list of given state point. + """Return neighbors of given state point by searching along sorted_schema in dotted_sp_cache. + + State point and cache must both use either job ids or shadow job ids. dotted_sp_cache must be in dotted key format, which is accessed by calling _nested_dicts_to_dotted_keys on each state point in the cache. @@ -177,9 +180,9 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): Parameters ---------- statepoint : dict - Place to search from. Could be the shadow state point. + Place to search from dotted_sp_cache : dict - Map from job id OR shadow job id to state point OR shadow state point in dotted key format + Map from job id to state point in dotted key format sorted_schema : dict Map from key (in dotted notation) to sorted values of the key to search over """ @@ -206,7 +209,15 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): return nearby_entry def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): - """Replace shadow job ids with actual job ids in the neighbor list.""" + """Replace shadow job ids with actual job ids in the neighbor list. + + Parameters + ---------- + shadow_neighbor_list : dict + neighbor list containing shadow job ids + shadow_map : dict + map from shadow job id to project job id + """ neighbor_list = dict() for jobid, neighbors in shadow_neighbor_list.items(): this_d = {} @@ -234,3 +245,4 @@ def build_neighbor_list(dotted_sp_cache, sorted_schema): for _id, _sp in dotted_sp_cache.items(): neighbor_list[_id] = neighbors_of_sp(_sp, dotted_sp_cache, sorted_schema) return neighbor_list + diff --git a/signac/project.py b/signac/project.py index a80cfd381..0392a030f 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1668,6 +1668,7 @@ def __setstate__(self, state): def flat_schema(self): """For each state point parameter, make a flat list sorted by values it takes in the project. + This is almost like schema, but the schema separates items by type. The schema also uses dotted keys. To sort between different types, put in order of the name of the type @@ -1685,6 +1686,25 @@ def flat_schema(self): return sorted_schema def get_neighbors(self, ignore = []): + """Builds a neighbor list of jobs in the project. + + A neighbor of a job differs in the value of one state point. If a change of certain state + point parameters should not define. For example, this is useful of two state point + parameters always change together. + + The neighbor list is a dictionary of dictionaries of dictionaries in the following format: + {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}, ...}, ...} + + Parameters + ---------- + ignore : list of str + List of keys to ignore when building neighbor list. + + Returns + ------- + neighbor_list + + """ if not isinstance(ignore, list): ignore = [ignore] @@ -1701,7 +1721,6 @@ def get_neighbors(self, ignore = []): nl = build_neighbor_list(shadow_cache, sorted_schema) return shadow_neighbor_list_to_neighbor_list(nl, shadow_map) else: - # the state point cache is incompatible with nested key notation for _id, _sp in _cache.items(): _cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} From 53990bf5e5d079fdf13f43a44ce45bae31407a47 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 10:35:43 -0400 Subject: [PATCH 25/73] Move code paths handling ignore to neighbor module --- signac/neighbor.py | 12 +++++++++++- signac/project.py | 14 ++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/signac/neighbor.py b/signac/neighbor.py index 7546ed1f5..7baf89409 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -2,7 +2,7 @@ from collections import defaultdict from .job import calc_id -from ._utility import _to_hashable, _dotted_dict_to_nested_dicts +from ._utility import _to_hashable, _dotted_dict_to_nested_dicts, _nested_dicts_to_dotted_keys from ._search_indexer import _DictPlaceholder def prepare_shadow_project(sp_cache, ignore: list): @@ -246,3 +246,13 @@ def build_neighbor_list(dotted_sp_cache, sorted_schema): neighbor_list[_id] = neighbors_of_sp(_sp, dotted_sp_cache, sorted_schema) return neighbor_list +def get_neighbor_list(sp_cache, sorted_schema, ignore): + if len(ignore) > 0: + shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) + nl = build_neighbor_list(shadow_cache, sorted_schema) + return shadow_neighbor_list_to_neighbor_list(nl, shadow_map) + else: + # the state point cache is incompatible with nested key notation + for _id, _sp in sp_cache.items(): + sp_cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} + return build_neighbor_list(sp_cache, sorted_schema) diff --git a/signac/project.py b/signac/project.py index 0392a030f..7e8f0485e 100644 --- a/signac/project.py +++ b/signac/project.py @@ -33,7 +33,7 @@ _raise_if_older_schema, _read_config_file, ) -from .neighbor import build_neighbor_list, shadow_neighbor_list_to_neighbor_list, prepare_shadow_project +from .neighbor import get_neighbor_list from ._search_indexer import _SearchIndexer, _DictPlaceholder from ._utility import _mkdir_p, _nested_dicts_to_dotted_keys, _to_hashable, _dotted_dict_to_nested_dicts from .errors import ( @@ -1711,20 +1711,14 @@ def get_neighbors(self, ignore = []): sorted_schema = self.flat_schema() need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] if any(a is _DictPlaceholder for a in need_to_ignore): + ignore = [] warnings.warn("Ignored key not present in project.", RuntimeWarning) self.update_cache() _cache = dict(self._sp_cache) # copy - if len(ignore) > 0: - shadow_map, shadow_cache = prepare_shadow_project(_cache, ignore = ignore) - nl = build_neighbor_list(shadow_cache, sorted_schema) - return shadow_neighbor_list_to_neighbor_list(nl, shadow_map) - else: - # the state point cache is incompatible with nested key notation - for _id, _sp in _cache.items(): - _cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} - return build_neighbor_list(_cache, sorted_schema) + return get_neighbor_list(_cache, sorted_schema, ignore) + @contextmanager def TemporaryProject(cls=None, **kwargs): From 5b5cc84e74f8475d72150a5cb795a4869e5a4fb7 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 13:02:06 -0400 Subject: [PATCH 26/73] Improve code clarity --- signac/neighbor.py | 64 +++++++++++++++++++++++++++++++--------------- signac/project.py | 31 +++++++++++++++------- 2 files changed, 65 insertions(+), 30 deletions(-) diff --git a/signac/neighbor.py b/signac/neighbor.py index 7baf89409..9bae41e45 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -8,8 +8,9 @@ def prepare_shadow_project(sp_cache, ignore: list): """Build cache and mapping for shadow project, which comes from ignored keys. - Ignoring a key creates a subset of jobs, now identified with different job ids. - Call it "shadow" job id because we're making a projection of the project. + We use cache lookups for speedy searching. Ignoring a key creates a subset of jobs, now + identified with different job ids. Call it "shadow" job id because we're making a projection of + the project. We can map from the shadow job id to the actual job id in the use cases identified. Raise ValueError if this mapping is ill defined. @@ -17,8 +18,14 @@ def prepare_shadow_project(sp_cache, ignore: list): We can detect the neighbor list on the shadow project then map it back to the real project. - Returns shadow_map, shadow_cache + Parameters + ---------- + sp_cache, state point cache + ignore: list of str + state point keys + Returns + ------- shadow_map is a map from shadow job id to project job id. shadow_cache is an in-memory state point cache for the shadow project @@ -73,6 +80,7 @@ def prepare_shadow_project(sp_cache, ignore: list): {"a1": 2} -> shadowid2 -- Now we have shadowid2 .---> jobid2 \\--> jobid3 + """ shadow_cache = {} # like a state point cache, but for the shadow project job_projection = {} # goes from job id to shadow id @@ -99,18 +107,18 @@ def prepare_shadow_project(sp_cache, ignore: list): return shadow_map, shadow_cache # key and other_val provided separately to be used with functools.partial -def _search_cache_for_val(sp_dict, cache, key, other_val): - """Return job id of a job similar to sp_dict if present in cache. +def _search_cache_for_val(statepoint, cache, key, other_val): + """Return job id of a job similar to statepoint if present in cache. - The similar job is obtained by modifying sp_dict to include {key: other_val}. + The similar job is obtained by modifying statepoint to include {key: other_val}. - Internally converts sp_dict from dotted keys to nested dicts format. + Internally converts statepoint from dotted keys to nested dicts format. Parameters ---------- - sp_dict : dict - state point of job to modify. sp_dict must not be a reference to a state point because it - will be modified in this function + statepoint : dict + state point of job to modify. statepoint must not be a reference because it will be + modified in this function cache : dict project state point cache to search in key : str @@ -123,10 +131,10 @@ def _search_cache_for_val(sp_dict, cache, key, other_val): job id of similar job None, if not present """ - sp_dict.update({key: other_val}) + statepoint.update({key: other_val}) # schema output not compatible with dotted key notation - sp_dict = _dotted_dict_to_nested_dicts(sp_dict) - other_job_id = calc_id(sp_dict) + statepoint = _dotted_dict_to_nested_dicts(statepoint) + other_job_id = calc_id(statepoint) if other_job_id in cache: return other_job_id else: @@ -151,12 +159,12 @@ def _search_out(search_direction, values, current_index, boundary_index, search_ Returns ------- + None if jobid not found + {val: jobid} if jobid found per search_fun jobid : str job id of the nearest job in the search_direction val : value of the key at the neighbor jobid - - None otherwise """ query_index = current_index + search_direction # search either query_index >= low_boundary or query_index <= high_boundary @@ -187,7 +195,7 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): Map from key (in dotted notation) to sorted values of the key to search over """ - nearby_entry = {} + neighbors = {} for key, schema_values in sorted_schema.items(): # from project # allow comparison with output of schema, which is hashable value = _to_hashable(statepoint.get(key, _DictPlaceholder)) @@ -205,8 +213,8 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): this_d.update(next_neighbor) if prev_neighbor is not None: this_d.update(prev_neighbor) - nearby_entry.update({key: this_d}) - return nearby_entry + neighbors.update({key: this_d}) + return neighbors def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): """Replace shadow job ids with actual job ids in the neighbor list. @@ -226,7 +234,7 @@ def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): neighbor_list[shadow_map[jobid]] = this_d return neighbor_list -def build_neighbor_list(dotted_sp_cache, sorted_schema): +def _build_neighbor_list(dotted_sp_cache, sorted_schema): """Iterate over cached state points and get neighbors of each state point. Parameters @@ -247,12 +255,26 @@ def build_neighbor_list(dotted_sp_cache, sorted_schema): return neighbor_list def get_neighbor_list(sp_cache, sorted_schema, ignore): + """Build neighbor list while handling ignored keys. + + Parameters + ---------- + sp_cache : dict + Project state point cache + sorted_schema : dict + Map of keys to their values to search over + + Returns + ------- + neighbor_list : dict + {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}}} + """ if len(ignore) > 0: shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) - nl = build_neighbor_list(shadow_cache, sorted_schema) + nl = _build_neighbor_list(shadow_cache, sorted_schema) return shadow_neighbor_list_to_neighbor_list(nl, shadow_map) else: # the state point cache is incompatible with nested key notation for _id, _sp in sp_cache.items(): sp_cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} - return build_neighbor_list(sp_cache, sorted_schema) + return _build_neighbor_list(sp_cache, sorted_schema) diff --git a/signac/project.py b/signac/project.py index 7e8f0485e..23075bc17 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1686,12 +1686,14 @@ def flat_schema(self): return sorted_schema def get_neighbors(self, ignore = []): - """Builds a neighbor list of jobs in the project. + """Return the neighbors of each job in the project. + + The neighbors of a job are jobs that differ along one state point parameter. + + If neighbors are not being detected correctly, it is likely that there are several state + point parameters changing together. In this case, pass a list of state point parameters to + ignore to the `ignore` argument. - A neighbor of a job differs in the value of one state point. If a change of certain state - point parameters should not define. For example, this is useful of two state point - parameters always change together. - The neighbor list is a dictionary of dictionaries of dictionaries in the following format: {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}, ...}, ...} @@ -1702,8 +1704,20 @@ def get_neighbors(self, ignore = []): Returns ------- - neighbor_list + neighbor_list : dict + {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}}} + + Example + ------- + .. code-block:: python + neighbor_list = project.get_neighbors() + for job in project: + neighbors = neighbor_list[job.id] + print(f"Job {job.id}") + for key,v in job.sp.items(): + print(f"has {key}={v} with neighbor jobs {key}-->{f" and {key}-->".join( + f"{new_val} at job id {jid}" for new_val,jid in neighbors[key].items())}") """ if not isinstance(ignore, list): ignore = [ignore] @@ -1715,9 +1729,8 @@ def get_neighbors(self, ignore = []): warnings.warn("Ignored key not present in project.", RuntimeWarning) self.update_cache() - _cache = dict(self._sp_cache) # copy - - return get_neighbor_list(_cache, sorted_schema, ignore) + # pass a copy of cache + return get_neighbor_list(dict(self._sp_cache), sorted_schema, ignore) @contextmanager From 778cb5128175be8a68ea825f407e50416ed589dc Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 13:12:25 -0400 Subject: [PATCH 27/73] Add test for job neighbors --- tests/test_project.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index de1ae713e..292ec570d 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -843,26 +843,30 @@ def test_neighbors(self): for a,b in product(a_vals, b_vals): self.project.open_job({"a": a, "b": b}).init() - neighbors = self.project.get_neighbors() + neighbor_list = self.project.get_neighbors() for a,b in product(a_vals, b_vals): job = self.project.open_job({"a": a, "b": b}) - this_neighbors = neighbors[job.id] - - # a neighbors - if a == 1: - assert this_neighbors["a"][2] == self.project.open_job({"a": 2, "b": b}).id - elif a == 2: - assert this_neighbors["a"][1] == self.project.open_job({"a": 1, "b": b}).id - # b neighbors - if b == 3: - assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id - elif b == 4: - assert this_neighbors["b"][3] == self.project.open_job({"a": a, "b": 3}).id - assert this_neighbors["b"][5] == self.project.open_job({"a": a, "b": 5}).id - elif b == 5: - assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + neighbors_project = neighbor_list[job.id] + neighbors_job = job.get_neighbors() + assert neighbors_project == neighbors_job + + for this_neighbors in [neighbors_project, neighbors_job]: + # a neighbors + if a == 1: + assert this_neighbors["a"][2] == self.project.open_job({"a": 2, "b": b}).id + elif a == 2: + assert this_neighbors["a"][1] == self.project.open_job({"a": 1, "b": b}).id + + # b neighbors + if b == 3: + assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + elif b == 4: + assert this_neighbors["b"][3] == self.project.open_job({"a": a, "b": 3}).id + assert this_neighbors["b"][5] == self.project.open_job({"a": a, "b": 5}).id + elif b == 5: + assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id def test_neighbors_ignore(self): b_vals = [3, 4, 5] From 5394f47458d6504a27329da5b6bd6f8d8ea4b0a8 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 13:18:44 -0400 Subject: [PATCH 28/73] Move neighborlist tests to separate file --- tests/test_neighborlist.py | 120 +++++++++++++++++++++++++++++++++++++ tests/test_project.py | 109 --------------------------------- 2 files changed, 120 insertions(+), 109 deletions(-) create mode 100644 tests/test_neighborlist.py diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py new file mode 100644 index 000000000..337ad7391 --- /dev/null +++ b/tests/test_neighborlist.py @@ -0,0 +1,120 @@ +import pytest + +from test_project import TestProject +from itertools import product + +class TestNeighborList(TestProject): + def test_neighbors(self): + a_vals = [1, 2] + b_vals = [3, 4, 5] + for a,b in product(a_vals, b_vals): + self.project.open_job({"a": a, "b": b}).init() + + neighbor_list = self.project.get_neighbors() + + for a,b in product(a_vals, b_vals): + job = self.project.open_job({"a": a, "b": b}) + neighbors_job = job.get_neighbors() + + neighbors_project = neighbor_list[job.id] + assert neighbors_project == neighbors_job + + this_neighbors = neighbors_project + + # for this_neighbors in [neighbors_project, neighbors_job]: + # a neighbors + if a == 1: + assert this_neighbors["a"][2] == self.project.open_job({"a": 2, "b": b}).id + elif a == 2: + assert this_neighbors["a"][1] == self.project.open_job({"a": 1, "b": b}).id + + # b neighbors + if b == 3: + assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + elif b == 4: + assert this_neighbors["b"][3] == self.project.open_job({"a": a, "b": 3}).id + assert this_neighbors["b"][5] == self.project.open_job({"a": a, "b": 5}).id + elif b == 5: + assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + + def test_neighbors_ignore(self): + b_vals = [3, 4, 5] + for b in b_vals: + self.project.open_job({"b": b, "2b": 2 * b}).init() + + neighbor_list = self.project.get_neighbors(ignore = "2b") + + for b in b_vals: + job = self.project.open_job({"b": b, "2b": 2 * b}) + this_neighbors = neighbor_list[job.id] + + if b == 3: + assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id + elif b == 4: + assert this_neighbors["b"][3] == self.project.open_job({"b": 3, "2b": 6}).id + assert this_neighbors["b"][5] == self.project.open_job({"b": 5, "2b": 10}).id + elif b == 5: + assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id + + def test_neighbors_nested(self): + a_vals = [{"c": 2}, {"c": 3}, {"c": 4}] + for a in a_vals: + self.project.open_job({"a": a}).init() + + neighbor_list = self.project.get_neighbors() + + for a in a_vals: + job = self.project.open_job({"a": a}) + this_neighbors = neighbor_list[job.id] + # note how the inconsistency in neighborlist access syntax comes from schema + if a == 2: + assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id + elif a == 3: + assert this_neighbors["a.c"][2] == self.project.open_job({"a": {"c": 2}}).id + assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id + elif a == 4: + assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id + + def test_neighbors_varied_types(self): + # in sort order + # NoneType is first because it's capitalized + a_vals = [None, False, True, 1.2, 1.3, 2, "1", "2", "x", "y", (3,4), (5,6)] + + job_ids = [] + for a in a_vals: + job = self.project.open_job({"a": a}).init() + job_ids.append(job.id) + + neighbor_list = self.project.get_neighbors() + + for i,a in enumerate(a_vals): + jobid = job_ids[i] + if i > 0: + prev_val = a_vals[i-1] + assert neighbor_list[jobid]["a"][prev_val] == job_ids[i-1] + if i < len(a_vals) - 1: + next_val = a_vals[i+1] + assert neighbor_list[jobid]["a"][next_val] == job_ids[i+1] + + def test_neighbors_no(self): + self.project.open_job({"a": 1}).init() + self.project.open_job({"b": 1}).init() + neighbor_list = self.project.get_neighbors() + + for job in self.project: + for v in neighbor_list[job.id].values(): + assert len(v) == 0 + + def test_neighbors_ignore_dups(self): + a_vals = [1,2] + b_vals = [3,4,5] + for a,b in product(a_vals, b_vals): + self.project.open_job({"a": a, "b": b}).init() + with pytest.raises(ValueError): + self.project.get_neighbors(ignore = "a") + with pytest.raises(ValueError): + self.project.get_neighbors(ignore = "b") + + + + diff --git a/tests/test_project.py b/tests/test_project.py index 292ec570d..ce3bad5e4 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -837,115 +837,6 @@ def test_schema_format(self): assert s_format2 == S_FORMAT2 - def test_neighbors(self): - a_vals = [1, 2] - b_vals = [3, 4, 5] - for a,b in product(a_vals, b_vals): - self.project.open_job({"a": a, "b": b}).init() - - neighbor_list = self.project.get_neighbors() - - for a,b in product(a_vals, b_vals): - job = self.project.open_job({"a": a, "b": b}) - - neighbors_project = neighbor_list[job.id] - neighbors_job = job.get_neighbors() - assert neighbors_project == neighbors_job - - for this_neighbors in [neighbors_project, neighbors_job]: - # a neighbors - if a == 1: - assert this_neighbors["a"][2] == self.project.open_job({"a": 2, "b": b}).id - elif a == 2: - assert this_neighbors["a"][1] == self.project.open_job({"a": 1, "b": b}).id - - # b neighbors - if b == 3: - assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id - elif b == 4: - assert this_neighbors["b"][3] == self.project.open_job({"a": a, "b": 3}).id - assert this_neighbors["b"][5] == self.project.open_job({"a": a, "b": 5}).id - elif b == 5: - assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id - - def test_neighbors_ignore(self): - b_vals = [3, 4, 5] - for b in b_vals: - self.project.open_job({"b": b, "2b": 2 * b}).init() - - neighbors = self.project.get_neighbors(ignore = "2b") - - for b in b_vals: - job = self.project.open_job({"b": b, "2b": 2 * b}) - this_neighbors = neighbors[job.id] - - if b == 3: - assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id - elif b == 4: - assert this_neighbors["b"][3] == self.project.open_job({"b": 3, "2b": 6}).id - assert this_neighbors["b"][5] == self.project.open_job({"b": 5, "2b": 10}).id - elif b == 5: - assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id - - def test_neighbors_nested(self): - a_vals = [{"c": 2}, {"c": 3}, {"c": 4}] - for a in a_vals: - self.project.open_job({"a": a}).init() - - neighbors = self.project.get_neighbors() - - for a in a_vals: - job = self.project.open_job({"a": a}) - this_neighbors = neighbors[job.id] - # note how the inconsistency in neighborlist access syntax comes from schema - if a == 2: - assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id - elif a == 3: - assert this_neighbors["a.c"][2] == self.project.open_job({"a": {"c": 2}}).id - assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id - elif a == 4: - assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id - - def test_neighbors_varied_types(self): - # in sort order - # NoneType is first because it's capitalized - a_vals = [None, False, True, 1.2, 1.3, 2, "1", "2", "x", "y", (3,4), (5,6)] - - job_ids = [] - for a in a_vals: - job = self.project.open_job({"a": a}).init() - job_ids.append(job.id) - - neighbors = self.project.get_neighbors() - - for i,a in enumerate(a_vals): - jobid = job_ids[i] - if i > 0: - prev_val = a_vals[i-1] - assert neighbors[jobid]["a"][prev_val] == job_ids[i-1] - if i < len(a_vals) - 1: - next_val = a_vals[i+1] - assert neighbors[jobid]["a"][next_val] == job_ids[i+1] - - def test_neighbors_no(self): - self.project.open_job({"a": 1}).init() - self.project.open_job({"b": 1}).init() - neighbors = self.project.get_neighbors() - - for job in self.project: - for v in neighbors[job.id].values(): - assert len(v) == 0 - - def test_neighbors_ignore_dups(self): - a_vals = [1,2] - b_vals = [3,4,5] - for a,b in product(a_vals, b_vals): - self.project.open_job({"a": a, "b": b}).init() - with pytest.raises(ValueError): - self.project.get_neighbors(ignore = "a") - with pytest.raises(ValueError): - self.project.get_neighbors(ignore = "b") - def test_jobs_groupby(self): def get_sp(i): return {"a": i, "b": i % 2, "c": i % 3} From 4a2fab04d0ade81dc77602dc58aa9bcb2f6c0410 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 13:50:02 -0400 Subject: [PATCH 29/73] Move flat_schema to internal method --- signac/job.py | 2 +- signac/project.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/signac/job.py b/signac/job.py index 05d365cae..456380147 100644 --- a/signac/job.py +++ b/signac/job.py @@ -984,8 +984,8 @@ def get_neighbors(self, ignore = []): """Prototype going from job to neighbor with minimal mess""" from .neighbor import neighbors_of_sp sp_cache = self._project._sp_cache - sorted_schema = self._project.flat_schema() neighbors = neighbors_of_sp(self.cached_statepoint, sp_cache, sorted_schema) + sorted_schema = self._project._flat_schema() return neighbors def __enter__(self): diff --git a/signac/project.py b/signac/project.py index 23075bc17..2d884af7e 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1666,7 +1666,7 @@ def __setstate__(self, state): state["_lock"] = RLock() self.__dict__.update(state) - def flat_schema(self): + def _flat_schema(self): """For each state point parameter, make a flat list sorted by values it takes in the project. This is almost like schema, but the schema separates items by type. @@ -1722,7 +1722,7 @@ def get_neighbors(self, ignore = []): if not isinstance(ignore, list): ignore = [ignore] - sorted_schema = self.flat_schema() + sorted_schema = self._flat_schema() need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] if any(a is _DictPlaceholder for a in need_to_ignore): ignore = [] From ba8f70e9aa3e0139fbecdf71e3d588fa711b854d Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 13:50:44 -0400 Subject: [PATCH 30/73] Add tests for neighbors detected from job --- signac/job.py | 25 ++++++++++++++++++++++--- tests/test_neighborlist.py | 22 ++++++++++++++++++++-- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/signac/job.py b/signac/job.py index 456380147..0505dcf87 100644 --- a/signac/job.py +++ b/signac/job.py @@ -981,11 +981,30 @@ def close(self): pass def get_neighbors(self, ignore = []): - """Prototype going from job to neighbor with minimal mess""" - from .neighbor import neighbors_of_sp + """Return the neighbors of this job. + + Parameters + ---------- + ignore : list + List of state point parameters to ignore when building neighbor list + """ + from .neighbor import neighbors_of_sp, prepare_shadow_project, shadow_neighbor_list_to_neighbor_list sp_cache = self._project._sp_cache - neighbors = neighbors_of_sp(self.cached_statepoint, sp_cache, sorted_schema) sorted_schema = self._project._flat_schema() + if len(ignore) > 0: + # TODO reduce code duplication here. Existing code focuses on doing all at once and converting to shadow space early + shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) + # sp = shadow_cache[shadow_... ? + sp = dict(self.cached_statepoint) + ig = [sp.pop(i, None) for i in ignore] + neighbors = neighbors_of_sp(sp, shadow_cache, sorted_schema) + # convert back from shadow id + this_d = {} + for neighbor_key, neighbor_vals in neighbors.items(): + this_d[neighbor_key] = {k: shadow_map[i] for k,i in neighbor_vals.items()} + neighbors = this_d + else: + neighbors = neighbors_of_sp(self.cached_statepoint, sp_cache, sorted_schema) return neighbors def __enter__(self): diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index 337ad7391..e8f011969 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -46,7 +46,10 @@ def test_neighbors_ignore(self): for b in b_vals: job = self.project.open_job({"b": b, "2b": 2 * b}) + neighbors_job = job.get_neighbors(ignore = ["2b"]) + this_neighbors = neighbor_list[job.id] + assert this_neighbors == neighbors_job if b == 3: assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id @@ -65,7 +68,10 @@ def test_neighbors_nested(self): for a in a_vals: job = self.project.open_job({"a": a}) + neighbors_job = job.get_neighbors() + this_neighbors = neighbor_list[job.id] + assert this_neighbors == neighbors_job # note how the inconsistency in neighborlist access syntax comes from schema if a == 2: assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id @@ -89,12 +95,17 @@ def test_neighbors_varied_types(self): for i,a in enumerate(a_vals): jobid = job_ids[i] + job = self.project.open_job(id = jobid) + + neighbors_job = job.get_neighbors() + this_neighbors = neighbor_list[jobid] + assert this_neighbors == neighbors_job if i > 0: prev_val = a_vals[i-1] - assert neighbor_list[jobid]["a"][prev_val] == job_ids[i-1] + assert this_neighbors["a"][prev_val] == job_ids[i-1] if i < len(a_vals) - 1: next_val = a_vals[i+1] - assert neighbor_list[jobid]["a"][next_val] == job_ids[i+1] + assert this_neighbors["a"][next_val] == job_ids[i+1] def test_neighbors_no(self): self.project.open_job({"a": 1}).init() @@ -104,6 +115,8 @@ def test_neighbors_no(self): for job in self.project: for v in neighbor_list[job.id].values(): assert len(v) == 0 + for v in job.get_neighbors().values(): + assert len(v) == 0 def test_neighbors_ignore_dups(self): a_vals = [1,2] @@ -114,6 +127,11 @@ def test_neighbors_ignore_dups(self): self.project.get_neighbors(ignore = "a") with pytest.raises(ValueError): self.project.get_neighbors(ignore = "b") + for job in self.project: + with pytest.raises(ValueError): + job.get_neighbors(ignore = "a") + with pytest.raises(ValueError): + job.get_neighbors(ignore = "b") From 08fae39ed0377e95dca1070674b59f775b6f846a Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 14:00:06 -0400 Subject: [PATCH 31/73] Split shadow unmapping into two functions --- signac/job.py | 8 ++------ signac/neighbor.py | 26 +++++++++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/signac/job.py b/signac/job.py index 0505dcf87..21374709a 100644 --- a/signac/job.py +++ b/signac/job.py @@ -988,7 +988,7 @@ def get_neighbors(self, ignore = []): ignore : list List of state point parameters to ignore when building neighbor list """ - from .neighbor import neighbors_of_sp, prepare_shadow_project, shadow_neighbor_list_to_neighbor_list + from .neighbor import neighbors_of_sp, prepare_shadow_project, shadow_neighbors_to_neighbors sp_cache = self._project._sp_cache sorted_schema = self._project._flat_schema() if len(ignore) > 0: @@ -998,11 +998,7 @@ def get_neighbors(self, ignore = []): sp = dict(self.cached_statepoint) ig = [sp.pop(i, None) for i in ignore] neighbors = neighbors_of_sp(sp, shadow_cache, sorted_schema) - # convert back from shadow id - this_d = {} - for neighbor_key, neighbor_vals in neighbors.items(): - this_d[neighbor_key] = {k: shadow_map[i] for k,i in neighbor_vals.items()} - neighbors = this_d + neighbors = shadow_neighbors_to_neighbors(neighbors, shadow_map) else: neighbors = neighbors_of_sp(self.cached_statepoint, sp_cache, sorted_schema) return neighbors diff --git a/signac/neighbor.py b/signac/neighbor.py index 9bae41e45..38daa6fcd 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -216,22 +216,34 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): neighbors.update({key: this_d}) return neighbors +def shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map): + """Replace shadow job ids with actual job ids in the neighbors of one job. + + Parameters + ---------- + shadow_neighbors : dict of state point parameters to neighbor values to shadow job id + neighbors containing shadow job ids + shadow_map : dict + map from shadow job id to project job id + """ + neighbors = dict() + for neighbor_key, neighbor_vals in shadow_neighbors.items(): + neighbors[neighbor_key] = {k: shadow_map[i] for k,i in neighbor_vals.items()} + return neighbors + def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): """Replace shadow job ids with actual job ids in the neighbor list. Parameters ---------- - shadow_neighbor_list : dict - neighbor list containing shadow job ids + shadow_neighbor_list : dict of shadow job ids to state point parameters to neighbor values to shadow job id + neighbors containing shadow job ids shadow_map : dict map from shadow job id to project job id """ neighbor_list = dict() - for jobid, neighbors in shadow_neighbor_list.items(): - this_d = {} - for neighbor_key, neighbor_vals in neighbors.items(): - this_d[neighbor_key] = {k: shadow_map[i] for k,i in neighbor_vals.items()} - neighbor_list[shadow_map[jobid]] = this_d + for jobid, shadow_neighbors in shadow_neighbor_list.items(): + neighbor_list[shadow_map[jobid]] = shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map) return neighbor_list def _build_neighbor_list(dotted_sp_cache, sorted_schema): From 7111a3907401a929ec95a58c7e12a4f8728bb9a8 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 15:25:32 -0400 Subject: [PATCH 32/73] Add tests for nested dict with str value --- tests/test_neighborlist.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index e8f011969..a1d03924c 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -60,7 +60,7 @@ def test_neighbors_ignore(self): assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id def test_neighbors_nested(self): - a_vals = [{"c": 2}, {"c": 3}, {"c": 4}] + a_vals = [{"c": 2}, {"c": 3}, {"c": 4}, {"c": "5"}, {"c": "hello"}] for a in a_vals: self.project.open_job({"a": a}).init() @@ -71,7 +71,7 @@ def test_neighbors_nested(self): neighbors_job = job.get_neighbors() this_neighbors = neighbor_list[job.id] - assert this_neighbors == neighbors_job + # assert this_neighbors == neighbors_job # note how the inconsistency in neighborlist access syntax comes from schema if a == 2: assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id @@ -80,6 +80,10 @@ def test_neighbors_nested(self): assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id elif a == 4: assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id + assert this_neighbors["a.c"]["5"] == self.project.open_job({"a": {"c": "5"}}).id + elif a == "5": + assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id + assert this_neighbors["a.c"]["hello"] == self.project.open_job({"a": {"c": "hello"}}).id def test_neighbors_varied_types(self): # in sort order From b582da0fdcc496562cdf5113236bdbdfa132a954 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 15:25:45 -0400 Subject: [PATCH 33/73] Fix dotted_key format when detecting neighborlist for single job --- signac/job.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/signac/job.py b/signac/job.py index 21374709a..bc9cbc89c 100644 --- a/signac/job.py +++ b/signac/job.py @@ -989,6 +989,7 @@ def get_neighbors(self, ignore = []): List of state point parameters to ignore when building neighbor list """ from .neighbor import neighbors_of_sp, prepare_shadow_project, shadow_neighbors_to_neighbors + from ._utility import _nested_dicts_to_dotted_keys sp_cache = self._project._sp_cache sorted_schema = self._project._flat_schema() if len(ignore) > 0: @@ -997,6 +998,7 @@ def get_neighbors(self, ignore = []): # sp = shadow_cache[shadow_... ? sp = dict(self.cached_statepoint) ig = [sp.pop(i, None) for i in ignore] + sp = dict(_nested_dicts_to_dotted_keys(sp)) neighbors = neighbors_of_sp(sp, shadow_cache, sorted_schema) neighbors = shadow_neighbors_to_neighbors(neighbors, shadow_map) else: From 9cac4eed6539d89557107a5d5973ca32364300dd Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 16:24:49 -0400 Subject: [PATCH 34/73] Add test to catch and fix error not converting to dotted sp cache --- signac/neighbor.py | 2 ++ tests/test_neighborlist.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/signac/neighbor.py b/signac/neighbor.py index 38daa6fcd..3bd70296a 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -283,6 +283,8 @@ def get_neighbor_list(sp_cache, sorted_schema, ignore): """ if len(ignore) > 0: shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) + for _id, _sp in shadow_cache.items(): + shadow_cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} nl = _build_neighbor_list(shadow_cache, sorted_schema) return shadow_neighbor_list_to_neighbor_list(nl, shadow_map) else: diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index a1d03924c..8d954cf81 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -85,6 +85,18 @@ def test_neighbors_nested(self): assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id assert this_neighbors["a.c"]["hello"] == self.project.open_job({"a": {"c": "hello"}}).id + def test_neighbors_disjoint_ignore(self): + for a,b in product([1,2,3], [5,6,7]): + self.project.open_job({"a": a, "b": b, "2b": 2*b}).init() + for x in [{"n": "nested"}, {"n": "values"}]: + self.project.open_job({"x": x}).init() + + neighbor_list = self.project.get_neighbors(ignore = ["2b"]) + + flat_schema = self.project._flat_schema() + job = self.project.open_job({"x": {"n": "nested"}}) + assert len(neighbor_list[job.id]) > 0 + def test_neighbors_varied_types(self): # in sort order # NoneType is first because it's capitalized From 7d6be834cc6f8835ce7ec4781538121f104c9b00 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 16:25:24 -0400 Subject: [PATCH 35/73] Fix error in tests --- tests/test_neighborlist.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index 8d954cf81..ca35f17c1 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -70,18 +70,20 @@ def test_neighbors_nested(self): job = self.project.open_job({"a": a}) neighbors_job = job.get_neighbors() + c = a["c"] + this_neighbors = neighbor_list[job.id] # assert this_neighbors == neighbors_job # note how the inconsistency in neighborlist access syntax comes from schema - if a == 2: + if c == 2: assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id - elif a == 3: + elif c == 3: assert this_neighbors["a.c"][2] == self.project.open_job({"a": {"c": 2}}).id assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id - elif a == 4: + elif c == 4: assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id assert this_neighbors["a.c"]["5"] == self.project.open_job({"a": {"c": "5"}}).id - elif a == "5": + elif c == "5": assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id assert this_neighbors["a.c"]["hello"] == self.project.open_job({"a": {"c": "hello"}}).id From 14750c853a28b3290f667999b457b6fba196476a Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Tue, 5 Aug 2025 16:25:48 -0400 Subject: [PATCH 36/73] Accept single key to ignore for job neighbor entry point --- signac/job.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/signac/job.py b/signac/job.py index bc9cbc89c..dc2614494 100644 --- a/signac/job.py +++ b/signac/job.py @@ -990,6 +990,10 @@ def get_neighbors(self, ignore = []): """ from .neighbor import neighbors_of_sp, prepare_shadow_project, shadow_neighbors_to_neighbors from ._utility import _nested_dicts_to_dotted_keys + + if not isinstance(ignore, list): + ignore = [ignore] + sp_cache = self._project._sp_cache sorted_schema = self._project._flat_schema() if len(ignore) > 0: From c852971b3d034b66f12bdb8168d89ea525ca638f Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 11:25:24 -0400 Subject: [PATCH 37/73] Fix bug in nested neighbor detection for single job --- signac/job.py | 9 +++++---- signac/neighbor.py | 6 +++--- tests/test_neighborlist.py | 10 ++++++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/signac/job.py b/signac/job.py index dc2614494..5554e11c1 100644 --- a/signac/job.py +++ b/signac/job.py @@ -996,17 +996,18 @@ def get_neighbors(self, ignore = []): sp_cache = self._project._sp_cache sorted_schema = self._project._flat_schema() + sp = dict(self.cached_statepoint) + sp = dict(_nested_dicts_to_dotted_keys(sp)) if len(ignore) > 0: # TODO reduce code duplication here. Existing code focuses on doing all at once and converting to shadow space early shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) - # sp = shadow_cache[shadow_... ? - sp = dict(self.cached_statepoint) + for _id, _sp in shadow_cache.items(): + shadow_cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} ig = [sp.pop(i, None) for i in ignore] - sp = dict(_nested_dicts_to_dotted_keys(sp)) neighbors = neighbors_of_sp(sp, shadow_cache, sorted_schema) neighbors = shadow_neighbors_to_neighbors(neighbors, shadow_map) else: - neighbors = neighbors_of_sp(self.cached_statepoint, sp_cache, sorted_schema) + neighbors = neighbors_of_sp(sp, sp_cache, sorted_schema) return neighbors def __enter__(self): diff --git a/signac/neighbor.py b/signac/neighbor.py index 3bd70296a..1ac7445ba 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -77,7 +77,7 @@ def prepare_shadow_project(sp_cache, ignore: list): {"a1": 10} -> shadowid1 {"a1": 2} -> shadowid2 - {"a1": 2} -> shadowid2 -- + {"a1": 2} -> shadowid2 Now we have shadowid2 .---> jobid2 \\--> jobid3 @@ -182,13 +182,13 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): State point and cache must both use either job ids or shadow job ids. - dotted_sp_cache must be in dotted key format, which is accessed by calling + statepoint and dotted_sp_cache must be in dotted key format, which is accessed by calling _nested_dicts_to_dotted_keys on each state point in the cache. Parameters ---------- statepoint : dict - Place to search from + State point to start search from, in dotted key format dotted_sp_cache : dict Map from job id to state point in dotted key format sorted_schema : dict diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index ca35f17c1..665db9050 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -21,7 +21,6 @@ def test_neighbors(self): this_neighbors = neighbors_project - # for this_neighbors in [neighbors_project, neighbors_job]: # a neighbors if a == 1: assert this_neighbors["a"][2] == self.project.open_job({"a": 2, "b": b}).id @@ -73,7 +72,7 @@ def test_neighbors_nested(self): c = a["c"] this_neighbors = neighbor_list[job.id] - # assert this_neighbors == neighbors_job + assert this_neighbors == neighbors_job # note how the inconsistency in neighborlist access syntax comes from schema if c == 2: assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id @@ -95,9 +94,12 @@ def test_neighbors_disjoint_ignore(self): neighbor_list = self.project.get_neighbors(ignore = ["2b"]) - flat_schema = self.project._flat_schema() job = self.project.open_job({"x": {"n": "nested"}}) - assert len(neighbor_list[job.id]) > 0 + neighbors_job = job.get_neighbors(ignore = ["2b"]) + neighbors_project = neighbor_list[job.id] + + assert neighbors_project == neighbors_job + assert neighbors_project["x.n"]["values"] == self.project.open_job({"x": {"n": "values"}}).id def test_neighbors_varied_types(self): # in sort order From 303849d50ec7119ec83b5b61f5a3a94360b0e19f Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 11:25:49 -0400 Subject: [PATCH 38/73] Add check for RuntimeWarning --- tests/test_neighborlist.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index 665db9050..c87204488 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -35,6 +35,8 @@ def test_neighbors(self): assert this_neighbors["b"][5] == self.project.open_job({"a": a, "b": 5}).id elif b == 5: assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + with pytest.raises(RuntimeWarning): + self.project.get_neighbors(ignore = ["not_present"]) def test_neighbors_ignore(self): b_vals = [3, 4, 5] From a88d60268f80ea4c17c56c743a18f7e20cb4985b Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 11:33:35 -0400 Subject: [PATCH 39/73] Convert shadow_cache to dotted key format early --- signac/job.py | 2 -- signac/neighbor.py | 12 ++++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/signac/job.py b/signac/job.py index 5554e11c1..c87b71f8d 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1001,8 +1001,6 @@ def get_neighbors(self, ignore = []): if len(ignore) > 0: # TODO reduce code duplication here. Existing code focuses on doing all at once and converting to shadow space early shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) - for _id, _sp in shadow_cache.items(): - shadow_cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} ig = [sp.pop(i, None) for i in ignore] neighbors = neighbors_of_sp(sp, shadow_cache, sorted_schema) neighbors = shadow_neighbors_to_neighbors(neighbors, shadow_map) diff --git a/signac/neighbor.py b/signac/neighbor.py index 1ac7445ba..0e621de63 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -26,10 +26,11 @@ def prepare_shadow_project(sp_cache, ignore: list): Returns ------- - shadow_map is a map from shadow job id to project job id. + shadow_map + a map from shadow job id to project job id. - shadow_cache is an in-memory state point cache for the shadow project - mapping shadow job id --> shadow state point + shadow_cache + an in-memory state point cache for the shadow project mapping shadow job id --> shadow state point, in dotted key format Use cases: @@ -89,7 +90,8 @@ def prepare_shadow_project(sp_cache, ignore: list): for ig in ignore: shadow_sp.pop(ig, None) shadow_id = calc_id(shadow_sp) - shadow_cache[shadow_id] = shadow_sp + # The cache needs to be in dotted key format, so just convert it here + shadow_cache[shadow_id] = dict(_nested_dicts_to_dotted_keys(shadow_sp)) job_projection[jobid] = shadow_id if len(set(job_projection.values())) != len(job_projection): @@ -283,8 +285,6 @@ def get_neighbor_list(sp_cache, sorted_schema, ignore): """ if len(ignore) > 0: shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) - for _id, _sp in shadow_cache.items(): - shadow_cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} nl = _build_neighbor_list(shadow_cache, sorted_schema) return shadow_neighbor_list_to_neighbor_list(nl, shadow_map) else: From c1801dbfbbffd02025b4febc7b19667974353259 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 13:00:38 -0400 Subject: [PATCH 40/73] Clarify comments about dotted keys --- signac/neighbor.py | 10 +++++----- signac/project.py | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/signac/neighbor.py b/signac/neighbor.py index 0e621de63..3b2e13f58 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -200,6 +200,7 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): neighbors = {} for key, schema_values in sorted_schema.items(): # from project # allow comparison with output of schema, which is hashable + # and which is in dotted key format value = _to_hashable(statepoint.get(key, _DictPlaceholder)) if value is _DictPlaceholder: # Possible if schema is heterogeneous @@ -256,7 +257,7 @@ def _build_neighbor_list(dotted_sp_cache, sorted_schema): dotted_sp_cache : dict Map from job id to state point OR shadow job id to shadow state point in dotted key format sorted_schema : dict - Map of keys to their values to search over + Map of dotted keys to their values to search over Returns ------- @@ -276,7 +277,7 @@ def get_neighbor_list(sp_cache, sorted_schema, ignore): sp_cache : dict Project state point cache sorted_schema : dict - Map of keys to their values to search over + Map of dotted keys to their values to search over Returns ------- @@ -288,7 +289,6 @@ def get_neighbor_list(sp_cache, sorted_schema, ignore): nl = _build_neighbor_list(shadow_cache, sorted_schema) return shadow_neighbor_list_to_neighbor_list(nl, shadow_map) else: - # the state point cache is incompatible with nested key notation - for _id, _sp in sp_cache.items(): - sp_cache[_id] = {k : v for k, v in _nested_dicts_to_dotted_keys(_sp)} + # the state point cache needs to be in dotted keys to enable searching over schema values + sp_cache = {_id: dict(_nested_dicts_to_dotted_keys(_sp)) for _id, _sp in sp_cache.items()} return _build_neighbor_list(sp_cache, sorted_schema) diff --git a/signac/project.py b/signac/project.py index 2d884af7e..37feb91c2 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1670,7 +1670,6 @@ def _flat_schema(self): """For each state point parameter, make a flat list sorted by values it takes in the project. This is almost like schema, but the schema separates items by type. - The schema also uses dotted keys. To sort between different types, put in order of the name of the type """ schema = self.detect_schema() From 7a0e73508a7ab4ed8f71236a82445861b22e2552 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 13:18:35 -0400 Subject: [PATCH 41/73] Update job entry point --- signac/job.py | 19 +++++++++++++------ tests/test_neighborlist.py | 3 +++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/signac/job.py b/signac/job.py index c87b71f8d..0bc5743d6 100644 --- a/signac/job.py +++ b/signac/job.py @@ -9,6 +9,8 @@ import logging import os import shutil +import warnings + from copy import deepcopy from threading import RLock from types import MappingProxyType @@ -990,24 +992,30 @@ def get_neighbors(self, ignore = []): """ from .neighbor import neighbors_of_sp, prepare_shadow_project, shadow_neighbors_to_neighbors from ._utility import _nested_dicts_to_dotted_keys + from ._search_indexer import _DictPlaceholder if not isinstance(ignore, list): ignore = [ignore] sp_cache = self._project._sp_cache + sorted_schema = self._project._flat_schema() - sp = dict(self.cached_statepoint) - sp = dict(_nested_dicts_to_dotted_keys(sp)) + sp = dict(_nested_dicts_to_dotted_keys(self.cached_statepoint)) + need_to_ignore = [sorted_schema.pop(i, _DictPlaceholder) for i in ignore] + if any(a is _DictPlaceholder for a in need_to_ignore): + ignore = [] + warnings.warn("Ignored key not present in project.", RuntimeWarning) + if len(ignore) > 0: - # TODO reduce code duplication here. Existing code focuses on doing all at once and converting to shadow space early - shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) ig = [sp.pop(i, None) for i in ignore] + shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) neighbors = neighbors_of_sp(sp, shadow_cache, sorted_schema) neighbors = shadow_neighbors_to_neighbors(neighbors, shadow_map) else: + sp_cache = {_id: dict(_nested_dicts_to_dotted_keys(_sp)) for _id, _sp in sp_cache.items()} neighbors = neighbors_of_sp(sp, sp_cache, sorted_schema) return neighbors - + def __enter__(self): self.open() return self @@ -1041,4 +1049,3 @@ def __deepcopy__(self, memo): setattr(result, key, deepcopy(value, memo)) result._lock = RLock() return result - diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index c87204488..bd97ef9b6 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -15,6 +15,8 @@ def test_neighbors(self): for a,b in product(a_vals, b_vals): job = self.project.open_job({"a": a, "b": b}) neighbors_job = job.get_neighbors() + with pytest.raises(RuntimeWarning): + job.get_neighbors(ignore = ["not_present"]) neighbors_project = neighbor_list[job.id] assert neighbors_project == neighbors_job @@ -38,6 +40,7 @@ def test_neighbors(self): with pytest.raises(RuntimeWarning): self.project.get_neighbors(ignore = ["not_present"]) + def test_neighbors_ignore(self): b_vals = [3, 4, 5] for b in b_vals: From 531aa1c53f4cb75641b2f1b40d37a101f7b4cfa4 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 13:30:22 -0400 Subject: [PATCH 42/73] Run precommit --- signac/__main__.py | 8 +- signac/_search_indexer.py | 5 +- signac/job.py | 19 +++-- signac/neighbor.py | 55 ++++++++++---- signac/project.py | 34 +++++---- tests/test_neighborlist.py | 146 ++++++++++++++++++++++++------------- tests/test_project.py | 1 + 7 files changed, 174 insertions(+), 94 deletions(-) diff --git a/signac/__main__.py b/signac/__main__.py index 87bcfb45e..bb6e08121 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -196,12 +196,13 @@ def main_statepoint(args): else: print(json.dumps(job.statepoint(), indent=args.indent, sort_keys=args.sort)) + def main_neighbors(args): project = get_project() if args.job_id: jobs = (_open_job_by_id(project, jid) for jid in args.job_id) for job in jobs: - pprint(job.get_neighbors()) + pprint(job.get_neighbors()) def main_document(args): @@ -214,6 +215,7 @@ def main_document(args): else: print(json.dumps(job.document(), indent=args.indent, sort_keys=args.sort)) + def main_remove(args): """Handle remove subcommand.""" project = get_project() @@ -974,8 +976,7 @@ def main(): parser_statepoint.set_defaults(func=main_statepoint) parser_neighbor = subparsers.add_parser( - "neighbors", - description = "Print the neighbors of the job" + "neighbors", description="Print the neighbors of the job" ) parser_neighbor.add_argument( "job_id", @@ -985,7 +986,6 @@ def main(): ) parser_neighbor.set_defaults(func=main_neighbors) - parser_diff = subparsers.add_parser( "diff", description="Find the difference among job state points." ) diff --git a/signac/_search_indexer.py b/signac/_search_indexer.py index c2f0894ef..158b7f466 100644 --- a/signac/_search_indexer.py +++ b/signac/_search_indexer.py @@ -254,6 +254,7 @@ class _SearchIndexer(dict): ``_SearchIndexer(iterable, **kwargs)``. """ + def build_all_index(self): # figure out keys from all jobs @@ -266,10 +267,10 @@ def build_all_index(self): index[_DictPlaceholder].add(_id) else: index[v].add(_id) - + def get_index(self, key): pass - + def build_index(self, key): """Build index for a given key. diff --git a/signac/job.py b/signac/job.py index 0bc5743d6..c97d0ea84 100644 --- a/signac/job.py +++ b/signac/job.py @@ -10,7 +10,6 @@ import os import shutil import warnings - from copy import deepcopy from threading import RLock from types import MappingProxyType @@ -29,7 +28,6 @@ from .h5store import H5StoreManager from .sync import sync_jobs - logger = logging.getLogger(__name__) @@ -982,7 +980,7 @@ def close(self): except IndexError: pass - def get_neighbors(self, ignore = []): + def get_neighbors(self, ignore=[]): """Return the neighbors of this job. Parameters @@ -990,9 +988,13 @@ def get_neighbors(self, ignore = []): ignore : list List of state point parameters to ignore when building neighbor list """ - from .neighbor import neighbors_of_sp, prepare_shadow_project, shadow_neighbors_to_neighbors - from ._utility import _nested_dicts_to_dotted_keys from ._search_indexer import _DictPlaceholder + from ._utility import _nested_dicts_to_dotted_keys + from .neighbor import ( + neighbors_of_sp, + prepare_shadow_project, + shadow_neighbors_to_neighbors, + ) if not isinstance(ignore, list): ignore = [ignore] @@ -1008,11 +1010,14 @@ def get_neighbors(self, ignore = []): if len(ignore) > 0: ig = [sp.pop(i, None) for i in ignore] - shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) + shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore=ignore) neighbors = neighbors_of_sp(sp, shadow_cache, sorted_schema) neighbors = shadow_neighbors_to_neighbors(neighbors, shadow_map) else: - sp_cache = {_id: dict(_nested_dicts_to_dotted_keys(_sp)) for _id, _sp in sp_cache.items()} + sp_cache = { + _id: dict(_nested_dicts_to_dotted_keys(_sp)) + for _id, _sp in sp_cache.items() + } neighbors = neighbors_of_sp(sp, sp_cache, sorted_schema) return neighbors diff --git a/signac/neighbor.py b/signac/neighbor.py index 3b2e13f58..e19cc75f7 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -1,9 +1,14 @@ -from functools import partial from collections import defaultdict +from functools import partial -from .job import calc_id -from ._utility import _to_hashable, _dotted_dict_to_nested_dicts, _nested_dicts_to_dotted_keys from ._search_indexer import _DictPlaceholder +from ._utility import ( + _dotted_dict_to_nested_dicts, + _nested_dicts_to_dotted_keys, + _to_hashable, +) +from .job import calc_id + def prepare_shadow_project(sp_cache, ignore: list): """Build cache and mapping for shadow project, which comes from ignored keys. @@ -83,8 +88,8 @@ def prepare_shadow_project(sp_cache, ignore: list): \\--> jobid3 """ - shadow_cache = {} # like a state point cache, but for the shadow project - job_projection = {} # goes from job id to shadow id + shadow_cache = {} # like a state point cache, but for the shadow project + job_projection = {} # goes from job id to shadow id for jobid, sp in sp_cache.items(): shadow_sp = dict(sp) for ig in ignore: @@ -101,13 +106,18 @@ def prepare_shadow_project(sp_cache, ignore: list): for job_id, shadow_id in job_projection.items(): shadow_to_job[shadow_id].append(job_id) counts[shadow_id] += 1 - bad_jobids = [shadow_to_job[shadow_id] for shadow_id, num in counts.items() if num > 1] + bad_jobids = [ + shadow_to_job[shadow_id] for shadow_id, num in counts.items() if num > 1 + ] err_str = "\n".join(f"Job ids: {', '.join(j)}." for j in bad_jobids) - raise ValueError(f"Ignoring {ignore} makes it impossible to distinguish some jobs:\n{err_str}") + raise ValueError( + f"Ignoring {ignore} makes it impossible to distinguish some jobs:\n{err_str}" + ) # map from shadow job id to project job id shadow_map = {v: k for k, v in job_projection.items()} return shadow_map, shadow_cache + # key and other_val provided separately to be used with functools.partial def _search_cache_for_val(statepoint, cache, key, other_val): """Return job id of a job similar to statepoint if present in cache. @@ -142,6 +152,7 @@ def _search_cache_for_val(statepoint, cache, key, other_val): else: return None + def _search_out(search_direction, values, current_index, boundary_index, search_fun): """Search in values towards boundary_index from current_index using search_fun. @@ -162,7 +173,7 @@ def _search_out(search_direction, values, current_index, boundary_index, search_ Returns ------- None if jobid not found - + {val: jobid} if jobid found per search_fun jobid : str job id of the nearest job in the search_direction @@ -179,6 +190,7 @@ def _search_out(search_direction, values, current_index, boundary_index, search_ return {val: jobid} return None + def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): """Return neighbors of given state point by searching along sorted_schema in dotted_sp_cache. @@ -198,7 +210,7 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): """ neighbors = {} - for key, schema_values in sorted_schema.items(): # from project + for key, schema_values in sorted_schema.items(): # from project # allow comparison with output of schema, which is hashable # and which is in dotted key format value = _to_hashable(statepoint.get(key, _DictPlaceholder)) @@ -207,9 +219,13 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): continue value_index = schema_values.index(value) # need to pass statepoint by copy - search_fun = partial(_search_cache_for_val, dict(statepoint), dotted_sp_cache, key) + search_fun = partial( + _search_cache_for_val, dict(statepoint), dotted_sp_cache, key + ) prev_neighbor = _search_out(-1, schema_values, value_index, 0, search_fun) - next_neighbor = _search_out(1, schema_values, value_index, len(schema_values) - 1, search_fun) + next_neighbor = _search_out( + 1, schema_values, value_index, len(schema_values) - 1, search_fun + ) this_d = {} if next_neighbor is not None: @@ -219,6 +235,7 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): neighbors.update({key: this_d}) return neighbors + def shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map): """Replace shadow job ids with actual job ids in the neighbors of one job. @@ -231,9 +248,10 @@ def shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map): """ neighbors = dict() for neighbor_key, neighbor_vals in shadow_neighbors.items(): - neighbors[neighbor_key] = {k: shadow_map[i] for k,i in neighbor_vals.items()} + neighbors[neighbor_key] = {k: shadow_map[i] for k, i in neighbor_vals.items()} return neighbors + def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): """Replace shadow job ids with actual job ids in the neighbor list. @@ -246,9 +264,12 @@ def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): """ neighbor_list = dict() for jobid, shadow_neighbors in shadow_neighbor_list.items(): - neighbor_list[shadow_map[jobid]] = shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map) + neighbor_list[shadow_map[jobid]] = shadow_neighbors_to_neighbors( + shadow_neighbors, shadow_map + ) return neighbor_list + def _build_neighbor_list(dotted_sp_cache, sorted_schema): """Iterate over cached state points and get neighbors of each state point. @@ -269,6 +290,7 @@ def _build_neighbor_list(dotted_sp_cache, sorted_schema): neighbor_list[_id] = neighbors_of_sp(_sp, dotted_sp_cache, sorted_schema) return neighbor_list + def get_neighbor_list(sp_cache, sorted_schema, ignore): """Build neighbor list while handling ignored keys. @@ -285,10 +307,13 @@ def get_neighbor_list(sp_cache, sorted_schema, ignore): {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}}} """ if len(ignore) > 0: - shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore = ignore) + shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore=ignore) nl = _build_neighbor_list(shadow_cache, sorted_schema) return shadow_neighbor_list_to_neighbor_list(nl, shadow_map) else: # the state point cache needs to be in dotted keys to enable searching over schema values - sp_cache = {_id: dict(_nested_dicts_to_dotted_keys(_sp)) for _id, _sp in sp_cache.items()} + sp_cache = { + _id: dict(_nested_dicts_to_dotted_keys(_sp)) + for _id, _sp in sp_cache.items() + } return _build_neighbor_list(sp_cache, sorted_schema) diff --git a/signac/project.py b/signac/project.py index 37feb91c2..92c1d3a21 100644 --- a/signac/project.py +++ b/signac/project.py @@ -4,6 +4,7 @@ """The signac Project and JobsCursor classes.""" import errno +import functools import gzip import json import logging @@ -12,8 +13,7 @@ import shutil import time import warnings -import functools -from collections import defaultdict, Counter +from collections import Counter, defaultdict from collections.abc import Iterable from contextlib import contextmanager from copy import deepcopy @@ -33,9 +33,13 @@ _raise_if_older_schema, _read_config_file, ) -from .neighbor import get_neighbor_list -from ._search_indexer import _SearchIndexer, _DictPlaceholder -from ._utility import _mkdir_p, _nested_dicts_to_dotted_keys, _to_hashable, _dotted_dict_to_nested_dicts +from ._search_indexer import _DictPlaceholder, _SearchIndexer +from ._utility import ( + _dotted_dict_to_nested_dicts, + _mkdir_p, + _nested_dicts_to_dotted_keys, + _to_hashable, +) from .errors import ( DestinationExistsError, IncompatibleSchemaVersion, @@ -45,6 +49,7 @@ from .filterparse import _add_prefix, _root_keys, parse_filter from .h5store import H5StoreManager from .job import Job, calc_id +from .neighbor import get_neighbor_list from .schema import ProjectSchema from .sync import sync_projects from .version import SCHEMA_VERSION, __version__ @@ -654,6 +659,7 @@ def detect_schema(self, exclude_const=False, subset=None): """ statepoint_index = self.detect_schema_index(exclude_const, subset) + def _collect_by_type(values): """Construct a mapping of types to a set of elements drawn from the input values.""" values_by_type = defaultdict(set) @@ -665,11 +671,8 @@ def _collect_by_type(values): {key: _collect_by_type(value) for key, value in statepoint_index} ) - def detect_schema_index(self, exclude_const=False, subset=None): - """Return just the state point index not collected by type. - - """ + """Return just the state point index not collected by type.""" from .schema import _build_job_statepoint_index @@ -683,8 +686,6 @@ def detect_schema_index(self, exclude_const=False, subset=None): return statepoint_index - - def _find_job_ids(self, filter=None): """Find the job ids of all jobs matching the filter. @@ -1677,14 +1678,16 @@ def _flat_schema(self): for key, schema_values in schema.items(): tuples_to_sort = [] for type_name in schema_values: - tuples_to_sort.append((type_name.__name__, sorted(schema_values[type_name]))) + tuples_to_sort.append( + (type_name.__name__, sorted(schema_values[type_name])) + ) combined_values = [] - for _, v in sorted(tuples_to_sort, key = lambda x: x[0]): + for _, v in sorted(tuples_to_sort, key=lambda x: x[0]): combined_values.extend(v) sorted_schema[key] = combined_values return sorted_schema - def get_neighbors(self, ignore = []): + def get_neighbors(self, ignore=[]): """Return the neighbors of each job in the project. The neighbors of a job are jobs that differ along one state point parameter. @@ -1730,7 +1733,7 @@ def get_neighbors(self, ignore = []): self.update_cache() # pass a copy of cache return get_neighbor_list(dict(self._sp_cache), sorted_schema, ignore) - + @contextmanager def TemporaryProject(cls=None, **kwargs): @@ -2223,6 +2226,7 @@ def _repr_html_(self): """ return repr(self) + self._repr_html_jobs() + def init_project(path=None): """Initialize a project. diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index bd97ef9b6..7e49dc0c6 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -1,67 +1,91 @@ -import pytest +from itertools import product +import pytest from test_project import TestProject -from itertools import product + class TestNeighborList(TestProject): def test_neighbors(self): a_vals = [1, 2] b_vals = [3, 4, 5] - for a,b in product(a_vals, b_vals): + for a, b in product(a_vals, b_vals): self.project.open_job({"a": a, "b": b}).init() neighbor_list = self.project.get_neighbors() - for a,b in product(a_vals, b_vals): + for a, b in product(a_vals, b_vals): job = self.project.open_job({"a": a, "b": b}) neighbors_job = job.get_neighbors() with pytest.raises(RuntimeWarning): - job.get_neighbors(ignore = ["not_present"]) - + job.get_neighbors(ignore=["not_present"]) + neighbors_project = neighbor_list[job.id] assert neighbors_project == neighbors_job this_neighbors = neighbors_project - + # a neighbors if a == 1: - assert this_neighbors["a"][2] == self.project.open_job({"a": 2, "b": b}).id + assert ( + this_neighbors["a"][2] == self.project.open_job({"a": 2, "b": b}).id + ) elif a == 2: - assert this_neighbors["a"][1] == self.project.open_job({"a": 1, "b": b}).id + assert ( + this_neighbors["a"][1] == self.project.open_job({"a": 1, "b": b}).id + ) # b neighbors if b == 3: - assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + assert ( + this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + ) elif b == 4: - assert this_neighbors["b"][3] == self.project.open_job({"a": a, "b": 3}).id - assert this_neighbors["b"][5] == self.project.open_job({"a": a, "b": 5}).id + assert ( + this_neighbors["b"][3] == self.project.open_job({"a": a, "b": 3}).id + ) + assert ( + this_neighbors["b"][5] == self.project.open_job({"a": a, "b": 5}).id + ) elif b == 5: - assert this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + assert ( + this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id + ) with pytest.raises(RuntimeWarning): - self.project.get_neighbors(ignore = ["not_present"]) - + self.project.get_neighbors(ignore=["not_present"]) def test_neighbors_ignore(self): b_vals = [3, 4, 5] for b in b_vals: self.project.open_job({"b": b, "2b": 2 * b}).init() - neighbor_list = self.project.get_neighbors(ignore = "2b") + neighbor_list = self.project.get_neighbors(ignore="2b") for b in b_vals: job = self.project.open_job({"b": b, "2b": 2 * b}) - neighbors_job = job.get_neighbors(ignore = ["2b"]) + neighbors_job = job.get_neighbors(ignore=["2b"]) this_neighbors = neighbor_list[job.id] assert this_neighbors == neighbors_job if b == 3: - assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id + assert ( + this_neighbors["b"][4] + == self.project.open_job({"b": 4, "2b": 8}).id + ) elif b == 4: - assert this_neighbors["b"][3] == self.project.open_job({"b": 3, "2b": 6}).id - assert this_neighbors["b"][5] == self.project.open_job({"b": 5, "2b": 10}).id + assert ( + this_neighbors["b"][3] + == self.project.open_job({"b": 3, "2b": 6}).id + ) + assert ( + this_neighbors["b"][5] + == self.project.open_job({"b": 5, "2b": 10}).id + ) elif b == 5: - assert this_neighbors["b"][4] == self.project.open_job({"b": 4, "2b": 8}).id + assert ( + this_neighbors["b"][4] + == self.project.open_job({"b": 4, "2b": 8}).id + ) def test_neighbors_nested(self): a_vals = [{"c": 2}, {"c": 3}, {"c": 4}, {"c": "5"}, {"c": "hello"}] @@ -80,36 +104,60 @@ def test_neighbors_nested(self): assert this_neighbors == neighbors_job # note how the inconsistency in neighborlist access syntax comes from schema if c == 2: - assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id + assert ( + this_neighbors["a.c"][3] + == self.project.open_job({"a": {"c": 3}}).id + ) elif c == 3: - assert this_neighbors["a.c"][2] == self.project.open_job({"a": {"c": 2}}).id - assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id + assert ( + this_neighbors["a.c"][2] + == self.project.open_job({"a": {"c": 2}}).id + ) + assert ( + this_neighbors["a.c"][4] + == self.project.open_job({"a": {"c": 4}}).id + ) elif c == 4: - assert this_neighbors["a.c"][3] == self.project.open_job({"a": {"c": 3}}).id - assert this_neighbors["a.c"]["5"] == self.project.open_job({"a": {"c": "5"}}).id + assert ( + this_neighbors["a.c"][3] + == self.project.open_job({"a": {"c": 3}}).id + ) + assert ( + this_neighbors["a.c"]["5"] + == self.project.open_job({"a": {"c": "5"}}).id + ) elif c == "5": - assert this_neighbors["a.c"][4] == self.project.open_job({"a": {"c": 4}}).id - assert this_neighbors["a.c"]["hello"] == self.project.open_job({"a": {"c": "hello"}}).id + assert ( + this_neighbors["a.c"][4] + == self.project.open_job({"a": {"c": 4}}).id + ) + assert ( + this_neighbors["a.c"]["hello"] + == self.project.open_job({"a": {"c": "hello"}}).id + ) def test_neighbors_disjoint_ignore(self): - for a,b in product([1,2,3], [5,6,7]): - self.project.open_job({"a": a, "b": b, "2b": 2*b}).init() + for a, b in product([1, 2, 3], [5, 6, 7]): + self.project.open_job({"a": a, "b": b, "2b": 2 * b}).init() for x in [{"n": "nested"}, {"n": "values"}]: self.project.open_job({"x": x}).init() - neighbor_list = self.project.get_neighbors(ignore = ["2b"]) + neighbor_list = self.project.get_neighbors(ignore=["2b"]) job = self.project.open_job({"x": {"n": "nested"}}) - neighbors_job = job.get_neighbors(ignore = ["2b"]) + neighbors_job = job.get_neighbors(ignore=["2b"]) neighbors_project = neighbor_list[job.id] assert neighbors_project == neighbors_job - assert neighbors_project["x.n"]["values"] == self.project.open_job({"x": {"n": "values"}}).id + assert ( + neighbors_project["x.n"]["values"] + == self.project.open_job({"x": {"n": "values"}}).id + ) def test_neighbors_varied_types(self): # in sort order # NoneType is first because it's capitalized - a_vals = [None, False, True, 1.2, 1.3, 2, "1", "2", "x", "y", (3,4), (5,6)] + a_vals = [None, False, True, 1.2, 1.3, 2, "1", "2", "x", "y", (3, 4), (5, 6)] job_ids = [] for a in a_vals: @@ -118,19 +166,19 @@ def test_neighbors_varied_types(self): neighbor_list = self.project.get_neighbors() - for i,a in enumerate(a_vals): + for i, a in enumerate(a_vals): jobid = job_ids[i] - job = self.project.open_job(id = jobid) + job = self.project.open_job(id=jobid) neighbors_job = job.get_neighbors() this_neighbors = neighbor_list[jobid] assert this_neighbors == neighbors_job if i > 0: - prev_val = a_vals[i-1] - assert this_neighbors["a"][prev_val] == job_ids[i-1] + prev_val = a_vals[i - 1] + assert this_neighbors["a"][prev_val] == job_ids[i - 1] if i < len(a_vals) - 1: - next_val = a_vals[i+1] - assert this_neighbors["a"][next_val] == job_ids[i+1] + next_val = a_vals[i + 1] + assert this_neighbors["a"][next_val] == job_ids[i + 1] def test_neighbors_no(self): self.project.open_job({"a": 1}).init() @@ -144,20 +192,16 @@ def test_neighbors_no(self): assert len(v) == 0 def test_neighbors_ignore_dups(self): - a_vals = [1,2] - b_vals = [3,4,5] - for a,b in product(a_vals, b_vals): + a_vals = [1, 2] + b_vals = [3, 4, 5] + for a, b in product(a_vals, b_vals): self.project.open_job({"a": a, "b": b}).init() with pytest.raises(ValueError): - self.project.get_neighbors(ignore = "a") + self.project.get_neighbors(ignore="a") with pytest.raises(ValueError): - self.project.get_neighbors(ignore = "b") + self.project.get_neighbors(ignore="b") for job in self.project: with pytest.raises(ValueError): - job.get_neighbors(ignore = "a") + job.get_neighbors(ignore="a") with pytest.raises(ValueError): - job.get_neighbors(ignore = "b") - - - - + job.get_neighbors(ignore="b") diff --git a/tests/test_project.py b/tests/test_project.py index ce3bad5e4..c31790bc0 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2377,6 +2377,7 @@ def test_no_migration(self): class TestProjectNeighbors(TestProjectBase): pass + def _initialize_v1_project(dirname, with_workspace=True, with_other_files=True): # Create v1 config file. cfg_fn = os.path.join(dirname, "signac.rc") From 41c608bd69cd6a9b5fe49c1a13aafa566bf23cf6 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 13:35:26 -0400 Subject: [PATCH 43/73] Remove prototype code --- signac/_search_indexer.py | 18 +----------------- signac/project.py | 25 +++++++++---------------- 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/signac/_search_indexer.py b/signac/_search_indexer.py index 158b7f466..0f561a615 100644 --- a/signac/_search_indexer.py +++ b/signac/_search_indexer.py @@ -255,22 +255,6 @@ class _SearchIndexer(dict): """ - def build_all_index(self): - # figure out keys from all jobs - - # go through jobs, getting keys of each job - for _id, spdoc in self.items(): - v = spdoc["sp"] - if type(v) is list: - index[_to_hashable(v)].add(_id) - elif type(v) is dict: - index[_DictPlaceholder].add(_id) - else: - index[v].add(_id) - - def get_index(self, key): - pass - def build_index(self, key): """Build index for a given key. @@ -295,7 +279,7 @@ def build_index(self, key): logger.debug(f"Building index for key '{key}'...") nodes = key.split(".") index = _TypedSetDefaultDict() - # breakpoint() + for _id, doc in self.items(): try: v = doc diff --git a/signac/project.py b/signac/project.py index 92c1d3a21..74961c522 100644 --- a/signac/project.py +++ b/signac/project.py @@ -658,7 +658,15 @@ def detect_schema(self, exclude_const=False, subset=None): The detected project schema. """ - statepoint_index = self.detect_schema_index(exclude_const, subset) + from .schema import _build_job_statepoint_index + + index = _SearchIndexer(self._build_index(include_job_document=False)) + if subset is not None: + subset = {str(s) for s in subset}.intersection(index.keys()) + index = _SearchIndexer((id_, index[id_]) for id_ in subset) + statepoint_index = _build_job_statepoint_index( + exclude_const=exclude_const, index=index + ) def _collect_by_type(values): """Construct a mapping of types to a set of elements drawn from the input values.""" @@ -671,21 +679,6 @@ def _collect_by_type(values): {key: _collect_by_type(value) for key, value in statepoint_index} ) - def detect_schema_index(self, exclude_const=False, subset=None): - """Return just the state point index not collected by type.""" - - from .schema import _build_job_statepoint_index - - index = _SearchIndexer(self._build_index(include_job_document=False)) - if subset is not None: - subset = {str(s) for s in subset}.intersection(index.keys()) - index = _SearchIndexer((id_, index[id_]) for id_ in subset) - statepoint_index = _build_job_statepoint_index( - exclude_const=exclude_const, index=index - ) - - return statepoint_index - def _find_job_ids(self, filter=None): """Find the job ids of all jobs matching the filter. From 2e1c26e8f143568ddb9e2af7f8eb4d7bf85e78b3 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 13:39:35 -0400 Subject: [PATCH 44/73] Address some precommit errors --- signac/neighbor.py | 10 +++++----- signac/project.py | 7 ++----- tests/test_project.py | 1 - 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/signac/neighbor.py b/signac/neighbor.py index e19cc75f7..6b46ab846 100644 --- a/signac/neighbor.py +++ b/signac/neighbor.py @@ -11,7 +11,7 @@ def prepare_shadow_project(sp_cache, ignore: list): - """Build cache and mapping for shadow project, which comes from ignored keys. + r"""Build cache and mapping for shadow project, which comes from ignored keys. We use cache lookups for speedy searching. Ignoring a key creates a subset of jobs, now identified with different job ids. Call it "shadow" job id because we're making a projection of @@ -35,7 +35,8 @@ def prepare_shadow_project(sp_cache, ignore: list): a map from shadow job id to project job id. shadow_cache - an in-memory state point cache for the shadow project mapping shadow job id --> shadow state point, in dotted key format + an in-memory state point cache for the shadow project mapping + shadow job id --> shadow state point, in dotted key format Use cases: @@ -208,7 +209,6 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): sorted_schema : dict Map from key (in dotted notation) to sorted values of the key to search over """ - neighbors = {} for key, schema_values in sorted_schema.items(): # from project # allow comparison with output of schema, which is hashable @@ -246,7 +246,7 @@ def shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map): shadow_map : dict map from shadow job id to project job id """ - neighbors = dict() + neighbors = {} for neighbor_key, neighbor_vals in shadow_neighbors.items(): neighbors[neighbor_key] = {k: shadow_map[i] for k, i in neighbor_vals.items()} return neighbors @@ -262,7 +262,7 @@ def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): shadow_map : dict map from shadow job id to project job id """ - neighbor_list = dict() + neighbor_list = {} for jobid, shadow_neighbors in shadow_neighbor_list.items(): neighbor_list[shadow_map[jobid]] = shadow_neighbors_to_neighbors( shadow_neighbors, shadow_map diff --git a/signac/project.py b/signac/project.py index 74961c522..adbe14acb 100644 --- a/signac/project.py +++ b/signac/project.py @@ -4,7 +4,6 @@ """The signac Project and JobsCursor classes.""" import errno -import functools import gzip import json import logging @@ -13,7 +12,7 @@ import shutil import time import warnings -from collections import Counter, defaultdict +from collections import defaultdict from collections.abc import Iterable from contextlib import contextmanager from copy import deepcopy @@ -35,10 +34,8 @@ ) from ._search_indexer import _DictPlaceholder, _SearchIndexer from ._utility import ( - _dotted_dict_to_nested_dicts, _mkdir_p, _nested_dicts_to_dotted_keys, - _to_hashable, ) from .errors import ( DestinationExistsError, @@ -1661,7 +1658,7 @@ def __setstate__(self, state): self.__dict__.update(state) def _flat_schema(self): - """For each state point parameter, make a flat list sorted by values it takes in the project. + """For each state point parameter, make a flat list sorted by its values in the project. This is almost like schema, but the schema separates items by type. To sort between different types, put in order of the name of the type diff --git a/tests/test_project.py b/tests/test_project.py index c31790bc0..e2679948e 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -12,7 +12,6 @@ import sys import textwrap from contextlib import contextmanager, redirect_stderr -from itertools import product from tarfile import TarFile from tempfile import TemporaryDirectory from time import time From c46f42ca061c5dbc6b7fdd4f40e0efc499df74f9 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 13:41:09 -0400 Subject: [PATCH 45/73] Move neighbor module to _neighbor --- signac/{neighbor.py => _neighbor.py} | 0 signac/job.py | 2 +- signac/project.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename signac/{neighbor.py => _neighbor.py} (100%) diff --git a/signac/neighbor.py b/signac/_neighbor.py similarity index 100% rename from signac/neighbor.py rename to signac/_neighbor.py diff --git a/signac/job.py b/signac/job.py index c97d0ea84..4bc8053a6 100644 --- a/signac/job.py +++ b/signac/job.py @@ -990,7 +990,7 @@ def get_neighbors(self, ignore=[]): """ from ._search_indexer import _DictPlaceholder from ._utility import _nested_dicts_to_dotted_keys - from .neighbor import ( + from ._neighbor import ( neighbors_of_sp, prepare_shadow_project, shadow_neighbors_to_neighbors, diff --git a/signac/project.py b/signac/project.py index adbe14acb..bf5244555 100644 --- a/signac/project.py +++ b/signac/project.py @@ -46,7 +46,7 @@ from .filterparse import _add_prefix, _root_keys, parse_filter from .h5store import H5StoreManager from .job import Job, calc_id -from .neighbor import get_neighbor_list +from ._neighbor import get_neighbor_list from .schema import ProjectSchema from .sync import sync_projects from .version import SCHEMA_VERSION, __version__ From 4908d6abcfe6c4eba2047bacb4bd5dd8b7bf333b Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 13:42:56 -0400 Subject: [PATCH 46/73] Address some precommit warnings --- signac/__main__.py | 1 + signac/_neighbor.py | 5 +++-- signac/job.py | 7 ++++--- signac/project.py | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/signac/__main__.py b/signac/__main__.py index bb6e08121..0df552132 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -198,6 +198,7 @@ def main_statepoint(args): def main_neighbors(args): + """Handle the neighbors subcommand.""" project = get_project() if args.job_id: jobs = (_open_job_by_id(project, jid) for jid in args.job_id) diff --git a/signac/_neighbor.py b/signac/_neighbor.py index 6b46ab846..aa902e31c 100644 --- a/signac/_neighbor.py +++ b/signac/_neighbor.py @@ -257,8 +257,9 @@ def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): Parameters ---------- - shadow_neighbor_list : dict of shadow job ids to state point parameters to neighbor values to shadow job id - neighbors containing shadow job ids + shadow_neighbor_list : dict + neighbor_list containing shadow job ids + dict shadow job ids to state point parameters to neighbor values to shadow job id shadow_map : dict map from shadow job id to project job id """ diff --git a/signac/job.py b/signac/job.py index 4bc8053a6..cbf690270 100644 --- a/signac/job.py +++ b/signac/job.py @@ -988,13 +988,13 @@ def get_neighbors(self, ignore=[]): ignore : list List of state point parameters to ignore when building neighbor list """ - from ._search_indexer import _DictPlaceholder - from ._utility import _nested_dicts_to_dotted_keys from ._neighbor import ( neighbors_of_sp, prepare_shadow_project, shadow_neighbors_to_neighbors, ) + from ._search_indexer import _DictPlaceholder + from ._utility import _nested_dicts_to_dotted_keys if not isinstance(ignore, list): ignore = [ignore] @@ -1009,7 +1009,8 @@ def get_neighbors(self, ignore=[]): warnings.warn("Ignored key not present in project.", RuntimeWarning) if len(ignore) > 0: - ig = [sp.pop(i, None) for i in ignore] + for i in ignore: + sp.pop(i, None) shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore=ignore) neighbors = neighbors_of_sp(sp, shadow_cache, sorted_schema) neighbors = shadow_neighbors_to_neighbors(neighbors, shadow_map) diff --git a/signac/project.py b/signac/project.py index bf5244555..9e4780a17 100644 --- a/signac/project.py +++ b/signac/project.py @@ -32,6 +32,7 @@ _raise_if_older_schema, _read_config_file, ) +from ._neighbor import get_neighbor_list from ._search_indexer import _DictPlaceholder, _SearchIndexer from ._utility import ( _mkdir_p, @@ -46,7 +47,6 @@ from .filterparse import _add_prefix, _root_keys, parse_filter from .h5store import H5StoreManager from .job import Job, calc_id -from ._neighbor import get_neighbor_list from .schema import ProjectSchema from .sync import sync_projects from .version import SCHEMA_VERSION, __version__ From 33b2525773da382a7f3bc68639927848d544623e Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 6 Aug 2025 13:51:03 -0400 Subject: [PATCH 47/73] Type the counting defaultdict --- signac/_neighbor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/signac/_neighbor.py b/signac/_neighbor.py index aa902e31c..bde747dda 100644 --- a/signac/_neighbor.py +++ b/signac/_neighbor.py @@ -1,5 +1,6 @@ from collections import defaultdict from functools import partial +from typing import DefaultDict from ._search_indexer import _DictPlaceholder from ._utility import ( @@ -103,7 +104,7 @@ def prepare_shadow_project(sp_cache, ignore: list): if len(set(job_projection.values())) != len(job_projection): # Make a helpful error message for map that has duplicates shadow_to_job = defaultdict(list) - counts = defaultdict(int) + counts: DefaultDict[str, int] = defaultdict(int) for job_id, shadow_id in job_projection.items(): shadow_to_job[shadow_id].append(job_id) counts[shadow_id] += 1 From 527a8997bfb9ceb8b81d961af42afe8f247b707b Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 3 Sep 2025 21:01:45 -0400 Subject: [PATCH 48/73] Allow signac neighbors --ignore parsing --- signac/__main__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/signac/__main__.py b/signac/__main__.py index 0df552132..d29b8e29e 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -203,7 +203,7 @@ def main_neighbors(args): if args.job_id: jobs = (_open_job_by_id(project, jid) for jid in args.job_id) for job in jobs: - pprint(job.get_neighbors()) + pprint(job.get_neighbors(ignore=args.ignore)) def main_document(args): @@ -985,6 +985,11 @@ def main(): type=str, help="One or more job ids. The corresponding jobs must be initialized.", ) + parser_neighbor.add_argument( + "--ignore", + nargs="+", + type=str, + help="State point keys to ignore when finding neighbors. Useful for state point parameters that change together.") parser_neighbor.set_defaults(func=main_neighbors) parser_diff = subparsers.add_parser( @@ -994,6 +999,7 @@ def main(): "job_id", nargs="*", type=str, + default=[], help="One or more job ids. The corresponding jobs must be initialized.", ) parser_diff.add_argument( From e6607f9a38321ae0e2520ec7a7863e88d4a1f152 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 3 Sep 2025 21:26:38 -0400 Subject: [PATCH 49/73] Don't print keys with no neighbors --- signac/__main__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/signac/__main__.py b/signac/__main__.py index d29b8e29e..095eb9bbe 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -203,7 +203,8 @@ def main_neighbors(args): if args.job_id: jobs = (_open_job_by_id(project, jid) for jid in args.job_id) for job in jobs: - pprint(job.get_neighbors(ignore=args.ignore)) + nl = job.get_neighbors(ignore=args.ignore) + pprint({k: v for k,v in nl.items() if len(v)>0}) def main_document(args): From 4f60c23a552a15a112c86d2d7daa7f45e8c9cf4d Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 4 Sep 2025 11:44:36 -0400 Subject: [PATCH 50/73] Warn which ignored keys are not found, and only remove those --- signac/job.py | 12 ++++++++---- signac/project.py | 10 ++++++---- tests/test_neighborlist.py | 4 ++-- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/signac/job.py b/signac/job.py index cbf690270..2545c6fa3 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1003,10 +1003,14 @@ def get_neighbors(self, ignore=[]): sorted_schema = self._project._flat_schema() sp = dict(_nested_dicts_to_dotted_keys(self.cached_statepoint)) - need_to_ignore = [sorted_schema.pop(i, _DictPlaceholder) for i in ignore] - if any(a is _DictPlaceholder for a in need_to_ignore): - ignore = [] - warnings.warn("Ignored key not present in project.", RuntimeWarning) + need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] + if any(is_bad_key := list(a is _DictPlaceholder for a in need_to_ignore)): + # any uses up the iterator + from itertools import compress + bad_keys = list(compress(ignore, is_bad_key)) + warnings.warn(f"Ignored state point parameter{"s" if len(bad_keys)>1 else ""} {bad_keys} not present in project.", RuntimeWarning) + for b in bad_keys: + ignore.remove(b) if len(ignore) > 0: for i in ignore: diff --git a/signac/project.py b/signac/project.py index 9e4780a17..6fdf5955d 100644 --- a/signac/project.py +++ b/signac/project.py @@ -17,7 +17,7 @@ from contextlib import contextmanager from copy import deepcopy from datetime import timedelta -from itertools import groupby +from itertools import compress, groupby from multiprocessing.pool import ThreadPool from tempfile import TemporaryDirectory from threading import RLock @@ -1716,9 +1716,11 @@ def get_neighbors(self, ignore=[]): sorted_schema = self._flat_schema() need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] - if any(a is _DictPlaceholder for a in need_to_ignore): - ignore = [] - warnings.warn("Ignored key not present in project.", RuntimeWarning) + if any(is_bad_key := list(a is _DictPlaceholder for a in need_to_ignore)): + bad_keys = list(compress(ignore, is_bad_key)) + warnings.warn(f"Ignored state point parameter{"s" if len(bad_keys)>1 else ""} {bad_keys} not present in project.", RuntimeWarning) + for b in bad_keys: + ignore.remove(b) self.update_cache() # pass a copy of cache diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index 7e49dc0c6..a503b6703 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -16,7 +16,7 @@ def test_neighbors(self): for a, b in product(a_vals, b_vals): job = self.project.open_job({"a": a, "b": b}) neighbors_job = job.get_neighbors() - with pytest.raises(RuntimeWarning): + with pytest.warns(RuntimeWarning, match="not_present"): job.get_neighbors(ignore=["not_present"]) neighbors_project = neighbor_list[job.id] @@ -50,7 +50,7 @@ def test_neighbors(self): assert ( this_neighbors["b"][4] == self.project.open_job({"a": a, "b": 4}).id ) - with pytest.raises(RuntimeWarning): + with pytest.warns(RuntimeWarning, match="not_present"): self.project.get_neighbors(ignore=["not_present"]) def test_neighbors_ignore(self): From 18e226e42582472dcc71e4a27c5aa9b9f1837832 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 4 Sep 2025 11:49:25 -0400 Subject: [PATCH 51/73] Ensure bad key is listed in error message --- tests/test_neighborlist.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index a503b6703..088b074d3 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -196,12 +196,13 @@ def test_neighbors_ignore_dups(self): b_vals = [3, 4, 5] for a, b in product(a_vals, b_vals): self.project.open_job({"a": a, "b": b}).init() - with pytest.raises(ValueError): + # match with single quote to avoid matching on the a in "makes" + with pytest.raises(ValueError, match="'a'"): self.project.get_neighbors(ignore="a") - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="'b'"): self.project.get_neighbors(ignore="b") for job in self.project: - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="'a'"): job.get_neighbors(ignore="a") - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="'b'"): job.get_neighbors(ignore="b") From 9b126875316431da1c8401b6c2fc4fb825d53682 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 5 Sep 2025 20:25:54 +0000 Subject: [PATCH 52/73] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- signac/__main__.py | 5 +++-- signac/job.py | 6 +++++- signac/project.py | 5 ++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/signac/__main__.py b/signac/__main__.py index 095eb9bbe..4045207b4 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -204,7 +204,7 @@ def main_neighbors(args): jobs = (_open_job_by_id(project, jid) for jid in args.job_id) for job in jobs: nl = job.get_neighbors(ignore=args.ignore) - pprint({k: v for k,v in nl.items() if len(v)>0}) + pprint({k: v for k, v in nl.items() if len(v) > 0}) def main_document(args): @@ -990,7 +990,8 @@ def main(): "--ignore", nargs="+", type=str, - help="State point keys to ignore when finding neighbors. Useful for state point parameters that change together.") + help="State point keys to ignore when finding neighbors. Useful for state point parameters that change together.", + ) parser_neighbor.set_defaults(func=main_neighbors) parser_diff = subparsers.add_parser( diff --git a/signac/job.py b/signac/job.py index 2545c6fa3..f6aae68d0 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1007,8 +1007,12 @@ def get_neighbors(self, ignore=[]): if any(is_bad_key := list(a is _DictPlaceholder for a in need_to_ignore)): # any uses up the iterator from itertools import compress + bad_keys = list(compress(ignore, is_bad_key)) - warnings.warn(f"Ignored state point parameter{"s" if len(bad_keys)>1 else ""} {bad_keys} not present in project.", RuntimeWarning) + warnings.warn( + f"Ignored state point parameter{"s" if len(bad_keys)>1 else ""} {bad_keys} not present in project.", + RuntimeWarning, + ) for b in bad_keys: ignore.remove(b) diff --git a/signac/project.py b/signac/project.py index 6fdf5955d..51346fb67 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1718,7 +1718,10 @@ def get_neighbors(self, ignore=[]): need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] if any(is_bad_key := list(a is _DictPlaceholder for a in need_to_ignore)): bad_keys = list(compress(ignore, is_bad_key)) - warnings.warn(f"Ignored state point parameter{"s" if len(bad_keys)>1 else ""} {bad_keys} not present in project.", RuntimeWarning) + warnings.warn( + f"Ignored state point parameter{"s" if len(bad_keys)>1 else ""} {bad_keys} not present in project.", + RuntimeWarning, + ) for b in bad_keys: ignore.remove(b) From 5f02aabfa984ef4f442f8a14c81f0a5227031798 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Sat, 6 Sep 2025 09:01:25 -0400 Subject: [PATCH 53/73] Support older f-string syntax --- signac/job.py | 2 +- signac/project.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/signac/job.py b/signac/job.py index f6aae68d0..1f965f92b 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1010,7 +1010,7 @@ def get_neighbors(self, ignore=[]): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{"s" if len(bad_keys)>1 else ""} {bad_keys} not present in project.", + f"Ignored state point parameter{'s' if len(bad_keys)>1 else ''} {bad_keys} not present in project.", RuntimeWarning, ) for b in bad_keys: diff --git a/signac/project.py b/signac/project.py index 51346fb67..887d4fd2c 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1719,7 +1719,7 @@ def get_neighbors(self, ignore=[]): if any(is_bad_key := list(a is _DictPlaceholder for a in need_to_ignore)): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{"s" if len(bad_keys)>1 else ""} {bad_keys} not present in project.", + f"Ignored state point parameter{'s' if len(bad_keys)>1 else ''} {bad_keys} not present in project.", RuntimeWarning, ) for b in bad_keys: From 647922877859ae45636878e898f884fa4a18dff7 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 10 Sep 2025 13:29:45 -0400 Subject: [PATCH 54/73] Add neighbors in sort order --- signac/_neighbor.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/signac/_neighbor.py b/signac/_neighbor.py index bde747dda..17805d29f 100644 --- a/signac/_neighbor.py +++ b/signac/_neighbor.py @@ -229,14 +229,13 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): ) this_d = {} - if next_neighbor is not None: - this_d.update(next_neighbor) if prev_neighbor is not None: this_d.update(prev_neighbor) + if next_neighbor is not None: + this_d.update(next_neighbor) neighbors.update({key: this_d}) return neighbors - def shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map): """Replace shadow job ids with actual job ids in the neighbors of one job. From 521e3d6faf48f35676eee7ffd5964b320caa78ba Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Sep 2025 17:31:28 +0000 Subject: [PATCH 55/73] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- signac/_neighbor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/signac/_neighbor.py b/signac/_neighbor.py index 17805d29f..b06aae1ff 100644 --- a/signac/_neighbor.py +++ b/signac/_neighbor.py @@ -236,6 +236,7 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): neighbors.update({key: this_d}) return neighbors + def shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map): """Replace shadow job ids with actual job ids in the neighbors of one job. From 2068eb6ce958652e42e9f2b88ec75a1e6638980a Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 18 Sep 2025 08:49:58 -0400 Subject: [PATCH 56/73] Optimization: Use detect_schema with exclude_const By definition, these keys will not distinguish any jobs so will not show up in the neighborlist. --- signac/project.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/signac/project.py b/signac/project.py index 887d4fd2c..93f453d64 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1657,13 +1657,13 @@ def __setstate__(self, state): state["_lock"] = RLock() self.__dict__.update(state) - def _flat_schema(self): + def _flat_schema(self, exclude_const=False): """For each state point parameter, make a flat list sorted by its values in the project. This is almost like schema, but the schema separates items by type. To sort between different types, put in order of the name of the type """ - schema = self.detect_schema() + schema = self.detect_schema(exclude_const=exclude_const) sorted_schema = {} for key, schema_values in schema.items(): tuples_to_sort = [] @@ -1714,7 +1714,7 @@ def get_neighbors(self, ignore=[]): if not isinstance(ignore, list): ignore = [ignore] - sorted_schema = self._flat_schema() + sorted_schema = self._flat_schema(exclude_const = True) need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] if any(is_bad_key := list(a is _DictPlaceholder for a in need_to_ignore)): bad_keys = list(compress(ignore, is_bad_key)) From bdff3d1c36fb89609fb507a1261fb1fb209abe01 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 18 Sep 2025 17:07:28 -0400 Subject: [PATCH 57/73] Add failing test for specifying ignored keys as dotted keys --- tests/test_neighborlist.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index 088b074d3..15d73600d 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -87,6 +87,34 @@ def test_neighbors_ignore(self): == self.project.open_job({"b": 4, "2b": 8}).id ) + def test_neighbors_ignore_nested(self): + + a_vals = [{"b": 2, "c": 2}, {"b": 3, "c": 3}] + for a in a_vals: + self.project.open_job({"a": a}).init() + + neighbor_list = self.project.get_neighbors(ignore = "a.b") + + for a in a_vals: + job = self.project.open_job({"a": a}) + neighbors_job = job.get_neighbors(ignore = "a.b") + + c = a["c"] + + this_neighbors = neighbor_list[job.id] + assert this_neighbors == neighbors_job + + if c == 2: + assert ( + this_neighbors["a.c"][3] + == self.project.open_job({"a": {"b": 3, "c": 3}}).id + ) + elif c == 3: + assert ( + this_neighbors["a.c"][2] + == self.project.open_job({"a": {"b": 2, "c": 2}}).id + ) + def test_neighbors_nested(self): a_vals = [{"c": 2}, {"c": 3}, {"c": 4}, {"c": "5"}, {"c": "hello"}] for a in a_vals: From a08e2819efb5b90d5fd1ff9e0ac2c3dacc9432cf Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 18 Sep 2025 17:16:29 -0400 Subject: [PATCH 58/73] Support ignoring nested keys, specified in dotted key format --- signac/_neighbor.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/signac/_neighbor.py b/signac/_neighbor.py index b06aae1ff..f5390fbeb 100644 --- a/signac/_neighbor.py +++ b/signac/_neighbor.py @@ -28,7 +28,8 @@ def prepare_shadow_project(sp_cache, ignore: list): ---------- sp_cache, state point cache ignore: list of str - state point keys + state point keys to ignore, with nested keys specified in dotted key + format Returns ------- @@ -36,8 +37,10 @@ def prepare_shadow_project(sp_cache, ignore: list): a map from shadow job id to project job id. shadow_cache - an in-memory state point cache for the shadow project mapping + an in-memory state point cache for the shadow project that maps shadow job id --> shadow state point, in dotted key format + The shadow job id is computed from the nested key format with + the ignored keys removed. Use cases: @@ -93,12 +96,14 @@ def prepare_shadow_project(sp_cache, ignore: list): shadow_cache = {} # like a state point cache, but for the shadow project job_projection = {} # goes from job id to shadow id for jobid, sp in sp_cache.items(): - shadow_sp = dict(sp) + # Remove ignored keys while in dotted key format + shadow_sp_dotted = dict(_nested_dicts_to_dotted_keys(sp)) for ig in ignore: - shadow_sp.pop(ig, None) - shadow_id = calc_id(shadow_sp) + shadow_sp_dotted.pop(ig, None) + # id calculated from nested keys + shadow_id = calc_id(_dotted_dict_to_nested_dicts(shadow_sp_dotted)) # The cache needs to be in dotted key format, so just convert it here - shadow_cache[shadow_id] = dict(_nested_dicts_to_dotted_keys(shadow_sp)) + shadow_cache[shadow_id] = shadow_sp_dotted job_projection[jobid] = shadow_id if len(set(job_projection.values())) != len(job_projection): @@ -115,7 +120,7 @@ def prepare_shadow_project(sp_cache, ignore: list): raise ValueError( f"Ignoring {ignore} makes it impossible to distinguish some jobs:\n{err_str}" ) - # map from shadow job id to project job id + # invert the map to go from shadow job id to project job id shadow_map = {v: k for k, v in job_projection.items()} return shadow_map, shadow_cache From 59da1ca2446f0d85a84c87f89e7efdbb6b9e0b65 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 18 Sep 2025 18:13:53 -0400 Subject: [PATCH 59/73] Run ruff and attempt to address long lines --- signac/__main__.py | 3 ++- signac/job.py | 2 +- signac/project.py | 15 +++++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/signac/__main__.py b/signac/__main__.py index 4045207b4..1666f32b7 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -990,7 +990,8 @@ def main(): "--ignore", nargs="+", type=str, - help="State point keys to ignore when finding neighbors. Useful for state point parameters that change together.", + help="State point keys to ignore when finding neighbors. " + "Useful for state point parameters that change together.", ) parser_neighbor.set_defaults(func=main_neighbors) diff --git a/signac/job.py b/signac/job.py index 1f965f92b..b7fcb3d1e 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1010,7 +1010,7 @@ def get_neighbors(self, ignore=[]): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{'s' if len(bad_keys)>1 else ''} {bad_keys} not present in project.", + f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys} not present in project.", RuntimeWarning, ) for b in bad_keys: diff --git a/signac/project.py b/signac/project.py index 93f453d64..8ba2c1baa 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1326,8 +1326,9 @@ def repair(self, job_ids=None): os.replace(invalid_wd, correct_wd) except OSError as error: logger.critical( - "Unable to fix location of job with " - " id '{}': '{}'.".format(job_id, error) + "Unable to fix location of job with id '{}': '{}'.".format( + job_id, error + ) ) corrupted.append(job_id) continue @@ -1346,8 +1347,9 @@ def repair(self, job_ids=None): job.init() except Exception as error: logger.error( - "Error during initialization of job with " - "id '{}': '{}'.".format(job_id, error) + "Error during initialization of job with id '{}': '{}'.".format( + job_id, error + ) ) try: # Attempt to fix the job state point file. job.init(force=True) @@ -1714,12 +1716,13 @@ def get_neighbors(self, ignore=[]): if not isinstance(ignore, list): ignore = [ignore] - sorted_schema = self._flat_schema(exclude_const = True) + sorted_schema = self._flat_schema(exclude_const=True) need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] if any(is_bad_key := list(a is _DictPlaceholder for a in need_to_ignore)): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{'s' if len(bad_keys)>1 else ''} {bad_keys} not present in project.", + f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys + } not present in project.", RuntimeWarning, ) for b in bad_keys: From d0b5831913dc300b36496203fd89a4e4b0008603 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 18 Sep 2025 18:55:31 -0400 Subject: [PATCH 60/73] Apply suggestions from review --- signac/__main__.py | 4 ++-- signac/_neighbor.py | 41 ++++++++++++++++++++--------------------- signac/job.py | 6 +++--- signac/project.py | 9 +++++---- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/signac/__main__.py b/signac/__main__.py index 1666f32b7..b94f635cd 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -187,7 +187,7 @@ def main_statepoint(args): """Handle statepoint subcommand.""" project = get_project() if args.job_id: - jobs = (_open_job_by_id(project, jid) for jid in args.job_id) + jobs = (_open_job_by_id(project, job_id) for job_id in args.job_id) else: jobs = project for job in jobs: @@ -201,7 +201,7 @@ def main_neighbors(args): """Handle the neighbors subcommand.""" project = get_project() if args.job_id: - jobs = (_open_job_by_id(project, jid) for jid in args.job_id) + jobs = (_open_job_by_id(project, job_id) for job_id in args.job_id) for job in jobs: nl = job.get_neighbors(ignore=args.ignore) pprint({k: v for k, v in nl.items() if len(v) > 0}) diff --git a/signac/_neighbor.py b/signac/_neighbor.py index f5390fbeb..de282523b 100644 --- a/signac/_neighbor.py +++ b/signac/_neighbor.py @@ -28,17 +28,16 @@ def prepare_shadow_project(sp_cache, ignore: list): ---------- sp_cache, state point cache ignore: list of str - state point keys to ignore, with nested keys specified in dotted key - format + State point keys to ignore, with nested keys specified in dotted key format Returns ------- shadow_map - a map from shadow job id to project job id. + A map from shadow job id to project job id shadow_cache - an in-memory state point cache for the shadow project that maps - shadow job id --> shadow state point, in dotted key format + An in-memory state point cache for the shadow project that maps + shadow job id --> shadow state point, in dotted key format. The shadow job id is computed from the nested key format with the ignored keys removed. @@ -136,10 +135,10 @@ def _search_cache_for_val(statepoint, cache, key, other_val): Parameters ---------- statepoint : dict - state point of job to modify. statepoint must not be a reference because it will be - modified in this function + State point of job to modify. Statepoint must not be a reference because it will be + modified in this function. cache : dict - project state point cache to search in + Project state point cache to search in key : str The key whose value to change other_val @@ -147,7 +146,7 @@ def _search_cache_for_val(statepoint, cache, key, other_val): Returns ------- - job id of similar job + Job id of similar job None, if not present """ statepoint.update({key: other_val}) @@ -168,14 +167,14 @@ def _search_out(search_direction, values, current_index, boundary_index, search_ search_direction : int, 1 or -1 1 means search in the positive direction from the index values : iterable - values to index into when searching + Values to index into when searching current_index : int - index into values to start searching from. + Index into values to start searching from. The value at this index is not accessed directly. boundary_index : int - the index at which to stop + The index at which to stop search_fun : function - unary function returning jobid if it exists and None otherwise + Unary function returning jobid if it exists and None otherwise Returns ------- @@ -183,8 +182,8 @@ def _search_out(search_direction, values, current_index, boundary_index, search_ {val: jobid} if jobid found per search_fun jobid : str - job id of the nearest job in the search_direction - val : value of the key at the neighbor jobid + Job id of the nearest job in the search_direction + val : Value of the key at the neighbor jobid """ query_index = current_index + search_direction # search either query_index >= low_boundary or query_index <= high_boundary @@ -203,7 +202,7 @@ def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema): State point and cache must both use either job ids or shadow job ids. - statepoint and dotted_sp_cache must be in dotted key format, which is accessed by calling + Statepoint and dotted_sp_cache must be in dotted key format, which is accessed by calling _nested_dicts_to_dotted_keys on each state point in the cache. Parameters @@ -248,9 +247,9 @@ def shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map): Parameters ---------- shadow_neighbors : dict of state point parameters to neighbor values to shadow job id - neighbors containing shadow job ids + Neighbors containing shadow job ids shadow_map : dict - map from shadow job id to project job id + Map from shadow job id to project job id """ neighbors = {} for neighbor_key, neighbor_vals in shadow_neighbors.items(): @@ -264,10 +263,10 @@ def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map): Parameters ---------- shadow_neighbor_list : dict - neighbor_list containing shadow job ids - dict shadow job ids to state point parameters to neighbor values to shadow job id + `neighbor_list` containing shadow job ids. + dict of shadow job ids to state point parameters to neighbor values to shadow job id shadow_map : dict - map from shadow job id to project job id + Map from shadow job id to project job id """ neighbor_list = {} for jobid, shadow_neighbors in shadow_neighbor_list.items(): diff --git a/signac/job.py b/signac/job.py index b7fcb3d1e..553dfcc27 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1004,7 +1004,7 @@ def get_neighbors(self, ignore=[]): sorted_schema = self._project._flat_schema() sp = dict(_nested_dicts_to_dotted_keys(self.cached_statepoint)) need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] - if any(is_bad_key := list(a is _DictPlaceholder for a in need_to_ignore)): + if any(is_bad_key := [a is _DictPlaceholder for a in need_to_ignore]): # any uses up the iterator from itertools import compress @@ -1013,8 +1013,8 @@ def get_neighbors(self, ignore=[]): f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys} not present in project.", RuntimeWarning, ) - for b in bad_keys: - ignore.remove(b) + for bad_key in bad_keys: + ignore.remove(bad_key) if len(ignore) > 0: for i in ignore: diff --git a/signac/project.py b/signac/project.py index 8ba2c1baa..48f86a795 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1699,7 +1699,8 @@ def get_neighbors(self, ignore=[]): Returns ------- neighbor_list : dict - {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}}} + A mapping of jobid to state point keys to previous and next job ids along each key + (see above). Example ------- @@ -1718,15 +1719,15 @@ def get_neighbors(self, ignore=[]): sorted_schema = self._flat_schema(exclude_const=True) need_to_ignore = [sorted_schema.pop(ig, _DictPlaceholder) for ig in ignore] - if any(is_bad_key := list(a is _DictPlaceholder for a in need_to_ignore)): + if any(is_bad_key := [a is _DictPlaceholder for a in need_to_ignore]): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys } not present in project.", RuntimeWarning, ) - for b in bad_keys: - ignore.remove(b) + for bad_key in bad_keys: + ignore.remove(bad_key) self.update_cache() # pass a copy of cache From a7177222d2ae1f44d1971bc757df5390b92e6aaf Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 18 Sep 2025 19:36:17 -0400 Subject: [PATCH 61/73] Update docstring for get_neighbors --- signac/job.py | 27 ++++++++++++++++++++++++++- signac/project.py | 27 ++++++++++++++++++--------- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/signac/job.py b/signac/job.py index 553dfcc27..e4e0dc5aa 100644 --- a/signac/job.py +++ b/signac/job.py @@ -981,12 +981,37 @@ def close(self): pass def get_neighbors(self, ignore=[]): - """Return the neighbors of this job. + """Return the neighbors of this job, mainly for command line use. + + Use `Project.get_neighbors()` to get the neighbors of all jobs in the project. + + The neighbors of a job are jobs that differ along one state point parameter. + + Job neighbors are provided in a dictionary containing + {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}, ...}, + where `state_point_key` is each of the non-constant state point parameters in the project + (equivalent to the output of `project.detect_schema(exclude_const = True)`). For nested + state point keys, the state point key is in "dotted key" notation, like the output of + `detect_schema`. + + Along each state_point_key, a job can have 0, 1 or 2 neighbors. For 0 neighbors, the job + neighbors dictionary is empty. For 2 neighbors, the neighbors are in sort order. State point + values of different types are ordered by their type name. + + If neighbors are not being detected correctly, it is likely that there are several state + point parameters changing together. In this case, pass a list of state point parameters to + ignore to the `ignore` argument. If a state point value is a dictionary (a "nested key"), + then the ignore list must be specified in "dotted key" notation. Parameters ---------- ignore : list List of state point parameters to ignore when building neighbor list + + Returns + ------- + neighbors : dict + A map of state point key to 0-2 neighbor values (or none) to job ids (see above) """ from ._neighbor import ( neighbors_of_sp, diff --git a/signac/project.py b/signac/project.py index 48f86a795..36ebab8e0 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1680,27 +1680,35 @@ def _flat_schema(self, exclude_const=False): return sorted_schema def get_neighbors(self, ignore=[]): - """Return the neighbors of each job in the project. + """Return a map of job ids to job neighbors. The neighbors of a job are jobs that differ along one state point parameter. + Job neighbors are provided in a dictionary containing + {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}, ...}, + where `state_point_key` is each of the non-constant state point parameters in the project + (equivalent to the output of `project.detect_schema(exclude_const = True)`). For nested + state point keys, the state point key is in "dotted key" notation, like the output of + `detect_schema`. + + Along each state_point_key, a job can have 0, 1 or 2 neighbors. For 0 neighbors, the job + neighbors dictionary is empty. For 2 neighbors, the neighbors are in sort order. State point + values of different types are ordered by their type name. + If neighbors are not being detected correctly, it is likely that there are several state point parameters changing together. In this case, pass a list of state point parameters to - ignore to the `ignore` argument. - - The neighbor list is a dictionary of dictionaries of dictionaries in the following format: - {jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}, ...}, ...} - + ignore to the `ignore` argument. If a state point value is a dictionary (a "nested key"), + then the ignore list must be specified in "dotted key" notation. + Parameters ---------- ignore : list of str - List of keys to ignore when building neighbor list. + List of keys to ignore when building neighbor list Returns ------- neighbor_list : dict - A mapping of jobid to state point keys to previous and next job ids along each key - (see above). + A map of job id to job neighbors (see above). Example ------- @@ -1713,6 +1721,7 @@ def get_neighbors(self, ignore=[]): for key,v in job.sp.items(): print(f"has {key}={v} with neighbor jobs {key}-->{f" and {key}-->".join( f"{new_val} at job id {jid}" for new_val,jid in neighbors[key].items())}") + """ if not isinstance(ignore, list): ignore = [ignore] From c9d7fba00b7ebc6e82d74eb194d321f7ae891669 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 18 Sep 2025 19:36:27 -0400 Subject: [PATCH 62/73] Add note to consider not exposing job.get_neighbors() --- signac/job.py | 1 + 1 file changed, 1 insertion(+) diff --git a/signac/job.py b/signac/job.py index e4e0dc5aa..2ceb151c5 100644 --- a/signac/job.py +++ b/signac/job.py @@ -980,6 +980,7 @@ def close(self): except IndexError: pass + # TODO considering not exposing this entry point since it's mainly for the CLI def get_neighbors(self, ignore=[]): """Return the neighbors of this job, mainly for command line use. From b3bf7466a54a30dd90fcbd9b62d2de23da5ee611 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Fri, 19 Sep 2025 10:50:32 -0400 Subject: [PATCH 63/73] Format test file --- tests/test_neighborlist.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index 15d73600d..a5db7856e 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -88,16 +88,15 @@ def test_neighbors_ignore(self): ) def test_neighbors_ignore_nested(self): - a_vals = [{"b": 2, "c": 2}, {"b": 3, "c": 3}] for a in a_vals: self.project.open_job({"a": a}).init() - neighbor_list = self.project.get_neighbors(ignore = "a.b") + neighbor_list = self.project.get_neighbors(ignore="a.b") for a in a_vals: job = self.project.open_job({"a": a}) - neighbors_job = job.get_neighbors(ignore = "a.b") + neighbors_job = job.get_neighbors(ignore="a.b") c = a["c"] From 7f613287444d207acfc7cf8823491f7caa216515 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Fri, 19 Sep 2025 10:53:12 -0400 Subject: [PATCH 64/73] Wrap lines --- signac/job.py | 3 ++- signac/project.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/signac/job.py b/signac/job.py index 2ceb151c5..adf5fa8b3 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1036,7 +1036,8 @@ def get_neighbors(self, ignore=[]): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys} not present in project.", + f"Ignored state point parameter{'s' if len( + bad_keys) > 1 else ''} {bad_keys} not present in project.", RuntimeWarning, ) for bad_key in bad_keys: diff --git a/signac/project.py b/signac/project.py index 36ebab8e0..e4d8c652c 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1731,8 +1731,8 @@ def get_neighbors(self, ignore=[]): if any(is_bad_key := [a is _DictPlaceholder for a in need_to_ignore]): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys - } not present in project.", + f"Ignored state point parameter{'s' if len( + bad_keys) > 1 else ''} {bad_keys} not present in project.", RuntimeWarning, ) for bad_key in bad_keys: From d61227c4d9cdc62ea33062cc9bef943953b20c97 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Mon, 22 Sep 2025 17:08:31 -0400 Subject: [PATCH 65/73] Wrap a different way for older python versions --- signac/job.py | 4 ++-- signac/project.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/signac/job.py b/signac/job.py index adf5fa8b3..3fb94123e 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1036,8 +1036,8 @@ def get_neighbors(self, ignore=[]): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{'s' if len( - bad_keys) > 1 else ''} {bad_keys} not present in project.", + f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys}"\ + " not present in project.", RuntimeWarning, ) for bad_key in bad_keys: diff --git a/signac/project.py b/signac/project.py index e4d8c652c..be7f3f2c3 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1731,8 +1731,8 @@ def get_neighbors(self, ignore=[]): if any(is_bad_key := [a is _DictPlaceholder for a in need_to_ignore]): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{'s' if len( - bad_keys) > 1 else ''} {bad_keys} not present in project.", + f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys}"\ + " not present in project.", RuntimeWarning, ) for bad_key in bad_keys: From 25168590de9237a3c99a18f2657efe31d2e5977f Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Mon, 22 Sep 2025 17:12:13 -0400 Subject: [PATCH 66/73] Wrap without backslash --- signac/job.py | 2 +- signac/project.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/signac/job.py b/signac/job.py index 3fb94123e..499244d6f 100644 --- a/signac/job.py +++ b/signac/job.py @@ -1036,7 +1036,7 @@ def get_neighbors(self, ignore=[]): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys}"\ + f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys}" " not present in project.", RuntimeWarning, ) diff --git a/signac/project.py b/signac/project.py index be7f3f2c3..b8bebe55e 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1731,7 +1731,7 @@ def get_neighbors(self, ignore=[]): if any(is_bad_key := [a is _DictPlaceholder for a in need_to_ignore]): bad_keys = list(compress(ignore, is_bad_key)) warnings.warn( - f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys}"\ + f"Ignored state point parameter{'s' if len(bad_keys) > 1 else ''} {bad_keys}" " not present in project.", RuntimeWarning, ) From 565839f42e8ed7eda498d4899cffa740ca2e34a7 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Mon, 22 Sep 2025 17:31:39 -0400 Subject: [PATCH 67/73] Clean up whitespace --- signac/job.py | 2 +- signac/project.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/signac/job.py b/signac/job.py index 499244d6f..5b9164e78 100644 --- a/signac/job.py +++ b/signac/job.py @@ -998,7 +998,7 @@ def get_neighbors(self, ignore=[]): Along each state_point_key, a job can have 0, 1 or 2 neighbors. For 0 neighbors, the job neighbors dictionary is empty. For 2 neighbors, the neighbors are in sort order. State point values of different types are ordered by their type name. - + If neighbors are not being detected correctly, it is likely that there are several state point parameters changing together. In this case, pass a list of state point parameters to ignore to the `ignore` argument. If a state point value is a dictionary (a "nested key"), diff --git a/signac/project.py b/signac/project.py index b8bebe55e..fd2167feb 100644 --- a/signac/project.py +++ b/signac/project.py @@ -1694,12 +1694,12 @@ def get_neighbors(self, ignore=[]): Along each state_point_key, a job can have 0, 1 or 2 neighbors. For 0 neighbors, the job neighbors dictionary is empty. For 2 neighbors, the neighbors are in sort order. State point values of different types are ordered by their type name. - + If neighbors are not being detected correctly, it is likely that there are several state point parameters changing together. In this case, pass a list of state point parameters to ignore to the `ignore` argument. If a state point value is a dictionary (a "nested key"), then the ignore list must be specified in "dotted key" notation. - + Parameters ---------- ignore : list of str From fc4f90b3f86ca2cfd482e61bec226d99fe98ec2b Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Fri, 3 Oct 2025 11:11:21 -0400 Subject: [PATCH 68/73] Default to empty list for ignore argument on command line --- signac/__main__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/signac/__main__.py b/signac/__main__.py index b94f635cd..1efe67d55 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -990,6 +990,7 @@ def main(): "--ignore", nargs="+", type=str, + default=[], help="State point keys to ignore when finding neighbors. " "Useful for state point parameters that change together.", ) From dc8be8f075a66b81aad95b4e3a9eae3d3ae5e03e Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Fri, 3 Oct 2025 11:11:53 -0400 Subject: [PATCH 69/73] Make job.get_neighbors private to ensure users use the efficient one --- signac/__main__.py | 2 +- signac/job.py | 3 +-- tests/test_neighborlist.py | 20 ++++++++++---------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/signac/__main__.py b/signac/__main__.py index 1efe67d55..f41946dc5 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -203,7 +203,7 @@ def main_neighbors(args): if args.job_id: jobs = (_open_job_by_id(project, job_id) for job_id in args.job_id) for job in jobs: - nl = job.get_neighbors(ignore=args.ignore) + nl = job._get_neighbors(ignore=args.ignore) pprint({k: v for k, v in nl.items() if len(v) > 0}) diff --git a/signac/job.py b/signac/job.py index 5b9164e78..89696303b 100644 --- a/signac/job.py +++ b/signac/job.py @@ -980,8 +980,7 @@ def close(self): except IndexError: pass - # TODO considering not exposing this entry point since it's mainly for the CLI - def get_neighbors(self, ignore=[]): + def _get_neighbors(self, ignore=[]): """Return the neighbors of this job, mainly for command line use. Use `Project.get_neighbors()` to get the neighbors of all jobs in the project. diff --git a/tests/test_neighborlist.py b/tests/test_neighborlist.py index a5db7856e..de0a00c92 100644 --- a/tests/test_neighborlist.py +++ b/tests/test_neighborlist.py @@ -15,9 +15,9 @@ def test_neighbors(self): for a, b in product(a_vals, b_vals): job = self.project.open_job({"a": a, "b": b}) - neighbors_job = job.get_neighbors() + neighbors_job = job._get_neighbors() with pytest.warns(RuntimeWarning, match="not_present"): - job.get_neighbors(ignore=["not_present"]) + job._get_neighbors(ignore=["not_present"]) neighbors_project = neighbor_list[job.id] assert neighbors_project == neighbors_job @@ -62,7 +62,7 @@ def test_neighbors_ignore(self): for b in b_vals: job = self.project.open_job({"b": b, "2b": 2 * b}) - neighbors_job = job.get_neighbors(ignore=["2b"]) + neighbors_job = job._get_neighbors(ignore=["2b"]) this_neighbors = neighbor_list[job.id] assert this_neighbors == neighbors_job @@ -96,7 +96,7 @@ def test_neighbors_ignore_nested(self): for a in a_vals: job = self.project.open_job({"a": a}) - neighbors_job = job.get_neighbors(ignore="a.b") + neighbors_job = job._get_neighbors(ignore="a.b") c = a["c"] @@ -123,7 +123,7 @@ def test_neighbors_nested(self): for a in a_vals: job = self.project.open_job({"a": a}) - neighbors_job = job.get_neighbors() + neighbors_job = job._get_neighbors() c = a["c"] @@ -172,7 +172,7 @@ def test_neighbors_disjoint_ignore(self): neighbor_list = self.project.get_neighbors(ignore=["2b"]) job = self.project.open_job({"x": {"n": "nested"}}) - neighbors_job = job.get_neighbors(ignore=["2b"]) + neighbors_job = job._get_neighbors(ignore=["2b"]) neighbors_project = neighbor_list[job.id] assert neighbors_project == neighbors_job @@ -197,7 +197,7 @@ def test_neighbors_varied_types(self): jobid = job_ids[i] job = self.project.open_job(id=jobid) - neighbors_job = job.get_neighbors() + neighbors_job = job._get_neighbors() this_neighbors = neighbor_list[jobid] assert this_neighbors == neighbors_job if i > 0: @@ -215,7 +215,7 @@ def test_neighbors_no(self): for job in self.project: for v in neighbor_list[job.id].values(): assert len(v) == 0 - for v in job.get_neighbors().values(): + for v in job._get_neighbors().values(): assert len(v) == 0 def test_neighbors_ignore_dups(self): @@ -230,6 +230,6 @@ def test_neighbors_ignore_dups(self): self.project.get_neighbors(ignore="b") for job in self.project: with pytest.raises(ValueError, match="'a'"): - job.get_neighbors(ignore="a") + job._get_neighbors(ignore="a") with pytest.raises(ValueError, match="'b'"): - job.get_neighbors(ignore="b") + job._get_neighbors(ignore="b") From 048fccd1b4df46565cd28d850b60505e123af5d5 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 8 Oct 2025 12:35:55 -0400 Subject: [PATCH 70/73] Add shell tests for neighbor --- signac/__main__.py | 6 +++--- tests/test_shell.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/signac/__main__.py b/signac/__main__.py index f41946dc5..3fabf1d4f 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -202,9 +202,9 @@ def main_neighbors(args): project = get_project() if args.job_id: jobs = (_open_job_by_id(project, job_id) for job_id in args.job_id) - for job in jobs: - nl = job._get_neighbors(ignore=args.ignore) - pprint({k: v for k, v in nl.items() if len(v) > 0}) + for job in jobs: + nl = job._get_neighbors(ignore=args.ignore) + pprint({k: v for k, v in nl.items() if len(v) > 0}) def main_document(args): diff --git a/tests/test_shell.py b/tests/test_shell.py index 172fb1e45..470d3ef90 100644 --- a/tests/test_shell.py +++ b/tests/test_shell.py @@ -1,6 +1,7 @@ # Copyright (c) 2017 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. +from itertools import product import json import os import shutil @@ -425,6 +426,36 @@ def test_schema(self): out = self.call("python -m signac schema".split()) assert s.format() == out.strip().replace(os.linesep, "\n") + def test_neighbors_ignore_nested(self): + self.call("python -m signac init".split()) + project = signac.Project() + a_vals = [{"b": 2, "c": 2}, {"b": 3, "c": 3}] + for a in a_vals: + project.open_job({"a": a}).init() + neighbor_list = project.get_neighbors(ignore="a.b") + for job in project: + out = self.call(f"python -m signac neighbors {job.id} --ignore a.b".split()) + assert str(neighbor_list[job.id]) in out + + def test_neighbors_ignore_not_present(self): + self.call("python -m signac init".split()) + project = signac.Project() + job = project.open_job({"a":1}).init() + out = self.call(f"python -m signac neighbors {job.id} --ignore not_in_project".split(), + error=True) + assert "not_in_project" in out + assert "not present" in out + + def test_neighbors_ignore(self): + self.call("python -m signac init".split()) + project = signac.Project() + for a,b in product([1,2], [2,3]): + job = project.open_job({"a":a, "b":b}).init() + out = self.call(f"python -m signac neighbors {job.id} --ignore b".split(), error=True, + aise_error=False) + assert "impossible to distinguish" in out + assert "'b'" in out + def test_sync(self): project_b = signac.init_project(path=os.path.join(self.tmpdir.name, "b")) self.call("python -m signac init".split()) From a5cdd231c02316265c68f3e8af8bd98ce357d13b Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 8 Oct 2025 12:36:09 -0400 Subject: [PATCH 71/73] Add copyright header --- signac/_neighbor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/signac/_neighbor.py b/signac/_neighbor.py index de282523b..b855eaa7e 100644 --- a/signac/_neighbor.py +++ b/signac/_neighbor.py @@ -1,3 +1,6 @@ +# Copyright (c) 2025 The Regents of the University of Michigan. +# All rights reserved. +# This software is licensed under the BSD 3-Clause License. from collections import defaultdict from functools import partial from typing import DefaultDict From d5ca36bb32d23835df0d04c23a2ab59d95b4d8f2 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Wed, 8 Oct 2025 13:25:40 -0400 Subject: [PATCH 72/73] Fix typo --- tests/test_shell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_shell.py b/tests/test_shell.py index 470d3ef90..bd9cb32eb 100644 --- a/tests/test_shell.py +++ b/tests/test_shell.py @@ -452,7 +452,7 @@ def test_neighbors_ignore(self): for a,b in product([1,2], [2,3]): job = project.open_job({"a":a, "b":b}).init() out = self.call(f"python -m signac neighbors {job.id} --ignore b".split(), error=True, - aise_error=False) + raise_error=False) assert "impossible to distinguish" in out assert "'b'" in out From 12a608ec8bdc0d585219279be0f6e4ece29a3555 Mon Sep 17 00:00:00 2001 From: Corwin Kerr Date: Thu, 9 Oct 2025 12:27:27 -0400 Subject: [PATCH 73/73] Run prek --- tests/test_shell.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/test_shell.py b/tests/test_shell.py index bd9cb32eb..e906090bd 100644 --- a/tests/test_shell.py +++ b/tests/test_shell.py @@ -1,12 +1,12 @@ # Copyright (c) 2017 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. -from itertools import product import json import os import shutil import subprocess import sys +from itertools import product from tempfile import TemporaryDirectory import pytest @@ -440,19 +440,24 @@ def test_neighbors_ignore_nested(self): def test_neighbors_ignore_not_present(self): self.call("python -m signac init".split()) project = signac.Project() - job = project.open_job({"a":1}).init() - out = self.call(f"python -m signac neighbors {job.id} --ignore not_in_project".split(), - error=True) + job = project.open_job({"a": 1}).init() + out = self.call( + f"python -m signac neighbors {job.id} --ignore not_in_project".split(), + error=True, + ) assert "not_in_project" in out assert "not present" in out def test_neighbors_ignore(self): self.call("python -m signac init".split()) project = signac.Project() - for a,b in product([1,2], [2,3]): - job = project.open_job({"a":a, "b":b}).init() - out = self.call(f"python -m signac neighbors {job.id} --ignore b".split(), error=True, - raise_error=False) + for a, b in product([1, 2], [2, 3]): + job = project.open_job({"a": a, "b": b}).init() + out = self.call( + f"python -m signac neighbors {job.id} --ignore b".split(), + error=True, + raise_error=False, + ) assert "impossible to distinguish" in out assert "'b'" in out