Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
76 commits
Select commit Hold shift + click to select a range
539cedc
Working prototype of neighbor list implemented within Project class
cbkerr Apr 29, 2025
65a93d4
Support nested keys & keys with different types, improve readability
cbkerr May 22, 2025
b40db7b
Update docstring
cbkerr Jun 6, 2025
290c904
Add idea for type of return value
cbkerr Jun 6, 2025
b91eb34
Pass statepoint, not job id and dotted_sp_cache to neighbors_of_job
cbkerr Jun 6, 2025
371ddea
Add API entry points
cbkerr Jun 6, 2025
a891c8d
Prototype code
cbkerr Jun 6, 2025
e542221
Merge branch 'main' into feat/neighbor-list
cbkerr Jun 6, 2025
a3e7d5b
Add prepare_shadow_project from dashboard navigator and fix bug
cbkerr Jul 3, 2025
500934d
Make ignore an empty list by default
cbkerr Jul 28, 2025
ad8395b
Update comments
cbkerr Jul 28, 2025
9275818
Import Counter
cbkerr Jul 28, 2025
16520d2
Add tests
cbkerr Jul 28, 2025
2715d46
Avoid 0 and 1 in neighborlist test because conflated with bools
cbkerr Jul 29, 2025
6b2a66e
Clean up code that gives duplicates error message
cbkerr Jul 30, 2025
53c4fab
Code cleanup
cbkerr Jul 30, 2025
43a7872
Only convert from shadow job ids if needed (if ignoring keys)
cbkerr Jul 30, 2025
9bb4087
Streamline output from search function
cbkerr Jul 31, 2025
df9a0bb
Improve function names
cbkerr Jul 31, 2025
ae89ad8
Add old neighbor code
cbkerr Jul 31, 2025
30a0478
Move neighbor code to separate module
cbkerr Jul 31, 2025
2e04fe4
Remove internal neighbor code from Project class
cbkerr Jul 31, 2025
bc371dd
Prototype API for accessing 1 job's neighbors
cbkerr Jul 31, 2025
2786117
Add shell command to print neighbors of job by id
cbkerr Aug 4, 2025
6176290
Update docstrings
cbkerr Aug 5, 2025
53990bf
Move code paths handling ignore to neighbor module
cbkerr Aug 5, 2025
5b5cc84
Improve code clarity
cbkerr Aug 5, 2025
778cb51
Add test for job neighbors
cbkerr Aug 5, 2025
5394f47
Move neighborlist tests to separate file
cbkerr Aug 5, 2025
4a2fab0
Move flat_schema to internal method
cbkerr Aug 5, 2025
ba8f70e
Add tests for neighbors detected from job
cbkerr Aug 5, 2025
08fae39
Split shadow unmapping into two functions
cbkerr Aug 5, 2025
7111a39
Add tests for nested dict with str value
cbkerr Aug 5, 2025
b582da0
Fix dotted_key format when detecting neighborlist for single job
cbkerr Aug 5, 2025
9cac4ee
Add test to catch and fix error not converting to dotted sp cache
cbkerr Aug 5, 2025
7d6be83
Fix error in tests
cbkerr Aug 5, 2025
14750c8
Accept single key to ignore for job neighbor entry point
cbkerr Aug 5, 2025
c852971
Fix bug in nested neighbor detection for single job
cbkerr Aug 6, 2025
303849d
Add check for RuntimeWarning
cbkerr Aug 6, 2025
a88d602
Convert shadow_cache to dotted key format early
cbkerr Aug 6, 2025
c1801db
Clarify comments about dotted keys
cbkerr Aug 6, 2025
7a0e735
Update job entry point
cbkerr Aug 6, 2025
531aa1c
Run precommit
cbkerr Aug 6, 2025
41c608b
Remove prototype code
cbkerr Aug 6, 2025
2e1c26e
Address some precommit errors
cbkerr Aug 6, 2025
c46f42c
Move neighbor module to _neighbor
cbkerr Aug 6, 2025
4908d6a
Address some precommit warnings
cbkerr Aug 6, 2025
fd619e2
Merge branch 'main' into feat/neighbor-list
cbkerr Aug 6, 2025
33b2525
Type the counting defaultdict
cbkerr Aug 6, 2025
e4ecc59
Merge branch 'main' into feat/neighbor-list
cbkerr Sep 3, 2025
527a899
Allow signac neighbors --ignore parsing
cbkerr Sep 4, 2025
e6607f9
Don't print keys with no neighbors
cbkerr Sep 4, 2025
4f60c23
Warn which ignored keys are not found, and only remove those
cbkerr Sep 4, 2025
18e226e
Ensure bad key is listed in error message
cbkerr Sep 4, 2025
9b12687
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 5, 2025
5f02aab
Support older f-string syntax
cbkerr Sep 6, 2025
6479228
Add neighbors in sort order
cbkerr Sep 10, 2025
521e3d6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 10, 2025
2068eb6
Optimization: Use detect_schema with exclude_const
cbkerr Sep 18, 2025
bdff3d1
Add failing test for specifying ignored keys as dotted keys
cbkerr Sep 18, 2025
a08e281
Support ignoring nested keys, specified in dotted key format
cbkerr Sep 18, 2025
59da1ca
Run ruff and attempt to address long lines
cbkerr Sep 18, 2025
d0b5831
Apply suggestions from review
cbkerr Sep 18, 2025
a717722
Update docstring for get_neighbors
cbkerr Sep 18, 2025
c9d7fba
Add note to consider not exposing job.get_neighbors()
cbkerr Sep 18, 2025
b3bf746
Format test file
cbkerr Sep 19, 2025
7f61328
Wrap lines
cbkerr Sep 19, 2025
d61227c
Wrap a different way for older python versions
cbkerr Sep 22, 2025
2516859
Wrap without backslash
cbkerr Sep 22, 2025
565839f
Clean up whitespace
cbkerr Sep 22, 2025
fc4f90b
Default to empty list for ignore argument on command line
cbkerr Oct 3, 2025
dc8be8f
Make job.get_neighbors private to ensure users use the efficient one
cbkerr Oct 3, 2025
048fccd
Add shell tests for neighbor
cbkerr Oct 8, 2025
a5cdd23
Add copyright header
cbkerr Oct 8, 2025
d5ca36b
Fix typo
cbkerr Oct 8, 2025
12a608e
Run prek
cbkerr Oct 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion signac/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def main_statepoint(args):
"""Handle statepoint subcommand."""
project = get_project()
if args.job_id:
jobs = (_open_job_by_id(project, jid) for jid in args.job_id)
jobs = (_open_job_by_id(project, job_id) for job_id in args.job_id)
else:
jobs = project
for job in jobs:
Expand All @@ -197,6 +197,16 @@ def main_statepoint(args):
print(json.dumps(job.statepoint(), indent=args.indent, sort_keys=args.sort))


def main_neighbors(args):
"""Handle the neighbors subcommand."""
project = get_project()
if args.job_id:
jobs = (_open_job_by_id(project, job_id) for job_id in args.job_id)
for job in jobs:
nl = job._get_neighbors(ignore=args.ignore)
pprint({k: v for k, v in nl.items() if len(v) > 0})


def main_document(args):
"""Handle document subcommand."""
project = get_project()
Expand Down Expand Up @@ -967,13 +977,33 @@ def main():
)
parser_statepoint.set_defaults(func=main_statepoint)

parser_neighbor = subparsers.add_parser(
"neighbors", description="Print the neighbors of the job"
)
parser_neighbor.add_argument(
"job_id",
nargs="*",
type=str,
help="One or more job ids. The corresponding jobs must be initialized.",
)
parser_neighbor.add_argument(
"--ignore",
nargs="+",
type=str,
default=[],
help="State point keys to ignore when finding neighbors. "
"Useful for state point parameters that change together.",
)
parser_neighbor.set_defaults(func=main_neighbors)

parser_diff = subparsers.add_parser(
"diff", description="Find the difference among job state points."
)
parser_diff.add_argument(
"job_id",
nargs="*",
type=str,
default=[],
help="One or more job ids. The corresponding jobs must be initialized.",
)
parser_diff.add_argument(
Expand Down
328 changes: 328 additions & 0 deletions signac/_neighbor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
# Copyright (c) 2025 The Regents of the University of Michigan.
# All rights reserved.
# This software is licensed under the BSD 3-Clause License.
from collections import defaultdict
from functools import partial
from typing import DefaultDict

from ._search_indexer import _DictPlaceholder
from ._utility import (
_dotted_dict_to_nested_dicts,
_nested_dicts_to_dotted_keys,
_to_hashable,
)
from .job import calc_id


def prepare_shadow_project(sp_cache, ignore: list):
r"""Build cache and mapping for shadow project, which comes from ignored keys.

We use cache lookups for speedy searching. Ignoring a key creates a subset of jobs, now
identified with different job ids. Call it "shadow" job id because we're making a projection of
the project.

We can map from the shadow job id to the actual job id in the use cases identified.
Raise ValueError if this mapping is ill defined.

We can detect the neighbor list on the shadow project then map it back
to the real project.

Parameters
----------
sp_cache, state point cache
ignore: list of str
State point keys to ignore, with nested keys specified in dotted key format

Returns
-------
shadow_map
A map from shadow job id to project job id

shadow_cache
An in-memory state point cache for the shadow project that maps
shadow job id --> shadow state point, in dotted key format.
The shadow job id is computed from the nested key format with
the ignored keys removed.


Use cases:

1) Seed that is different for every job.

2) State point key that changes in sync with another key.

Case 1:

{"a": 1, "b": 2, "seed": 0} -> jobid1
{"a": 1, "b": 3, "seed": 1} -> jobid2
{"a": 1, "b": 2} -> shadowid1
{"a": 1, "b": 3} -> shadowid2

shadowid1 <---> jobid1
shadowid2 <---> jobid2

Breaking case 1 with repeated shadow jobs:

{"a": 1, "b": 2, "seed": 0} -> jobid1
{"a": 1, "b": 3, "seed": 1} -> jobid2
{"a": 1, "b": 3, "seed": 2} -> jobid3

{"a": 1, "b": 2} -> shadowid1
{"a": 1, "b": 3} -> shadowid2
{"a": 1, "b": 3} -> shadowid2 *conflict* No longer bijection.
Now we have shadowid2 .---> jobid2
\\--> jobid3

Case 2:

{"a1": 10, "a2": 20} -> jobid1
{"a1": 2, "a2": 4} -> jobid2

{"a1": 10} -> shadowid1
{"a1": 2} -> shadowid2

Can still make the mapping between ids.

Breaking case 2:
{"a1": 10, "a2": 20} -> jobid1
{"a1": 2, "a2": 4} -> jobid2
{"a1": 2, "a2": 5} -> jobid3

{"a1": 10} -> shadowid1
{"a1": 2} -> shadowid2
{"a1": 2} -> shadowid2
Now we have shadowid2 .---> jobid2
\\--> jobid3

"""
shadow_cache = {} # like a state point cache, but for the shadow project
job_projection = {} # goes from job id to shadow id
for jobid, sp in sp_cache.items():
# Remove ignored keys while in dotted key format
shadow_sp_dotted = dict(_nested_dicts_to_dotted_keys(sp))
for ig in ignore:
shadow_sp_dotted.pop(ig, None)
# id calculated from nested keys
shadow_id = calc_id(_dotted_dict_to_nested_dicts(shadow_sp_dotted))
# The cache needs to be in dotted key format, so just convert it here
shadow_cache[shadow_id] = shadow_sp_dotted
job_projection[jobid] = shadow_id

if len(set(job_projection.values())) != len(job_projection):
# Make a helpful error message for map that has duplicates
shadow_to_job = defaultdict(list)
counts: DefaultDict[str, int] = defaultdict(int)
for job_id, shadow_id in job_projection.items():
shadow_to_job[shadow_id].append(job_id)
counts[shadow_id] += 1
bad_jobids = [
shadow_to_job[shadow_id] for shadow_id, num in counts.items() if num > 1
]
err_str = "\n".join(f"Job ids: {', '.join(j)}." for j in bad_jobids)
raise ValueError(
f"Ignoring {ignore} makes it impossible to distinguish some jobs:\n{err_str}"
)
# invert the map to go from shadow job id to project job id
shadow_map = {v: k for k, v in job_projection.items()}
return shadow_map, shadow_cache


# key and other_val provided separately to be used with functools.partial
def _search_cache_for_val(statepoint, cache, key, other_val):
"""Return job id of a job similar to statepoint if present in cache.

The similar job is obtained by modifying statepoint to include {key: other_val}.

Internally converts statepoint from dotted keys to nested dicts format.

Parameters
----------
statepoint : dict
State point of job to modify. Statepoint must not be a reference because it will be
modified in this function.
cache : dict
Project state point cache to search in
key : str
The key whose value to change
other_val
The new value of key to search for

Returns
-------
Job id of similar job
None, if not present
"""
statepoint.update({key: other_val})
# schema output not compatible with dotted key notation
statepoint = _dotted_dict_to_nested_dicts(statepoint)
other_job_id = calc_id(statepoint)
if other_job_id in cache:
return other_job_id
else:
return None


def _search_out(search_direction, values, current_index, boundary_index, search_fun):
"""Search in values towards boundary_index from current_index using search_fun.

Parameters
----------
search_direction : int, 1 or -1
1 means search in the positive direction from the index
values : iterable
Values to index into when searching
current_index : int
Index into values to start searching from.
The value at this index is not accessed directly.
boundary_index : int
The index at which to stop
search_fun : function
Unary function returning jobid if it exists and None otherwise

Returns
-------
None if jobid not found

{val: jobid} if jobid found per search_fun
jobid : str
Job id of the nearest job in the search_direction
val : Value of the key at the neighbor jobid
"""
query_index = current_index + search_direction
# search either query_index >= low_boundary or query_index <= high_boundary
while search_direction * query_index <= boundary_index * search_direction:
val = values[query_index]
jobid = search_fun(val)
if jobid is None:
query_index += search_direction
else:
return {val: jobid}
return None


def neighbors_of_sp(statepoint, dotted_sp_cache, sorted_schema):
"""Return neighbors of given state point by searching along sorted_schema in dotted_sp_cache.

State point and cache must both use either job ids or shadow job ids.

Statepoint and dotted_sp_cache must be in dotted key format, which is accessed by calling
_nested_dicts_to_dotted_keys on each state point in the cache.

Parameters
----------
statepoint : dict
State point to start search from, in dotted key format
dotted_sp_cache : dict
Map from job id to state point in dotted key format
sorted_schema : dict
Map from key (in dotted notation) to sorted values of the key to search over
"""
neighbors = {}
for key, schema_values in sorted_schema.items(): # from project
# allow comparison with output of schema, which is hashable
# and which is in dotted key format
value = _to_hashable(statepoint.get(key, _DictPlaceholder))
if value is _DictPlaceholder:
# Possible if schema is heterogeneous
continue
value_index = schema_values.index(value)
# need to pass statepoint by copy
search_fun = partial(
_search_cache_for_val, dict(statepoint), dotted_sp_cache, key
)
prev_neighbor = _search_out(-1, schema_values, value_index, 0, search_fun)
next_neighbor = _search_out(
1, schema_values, value_index, len(schema_values) - 1, search_fun
)

this_d = {}
if prev_neighbor is not None:
this_d.update(prev_neighbor)
if next_neighbor is not None:
this_d.update(next_neighbor)
neighbors.update({key: this_d})
return neighbors


def shadow_neighbors_to_neighbors(shadow_neighbors, shadow_map):
"""Replace shadow job ids with actual job ids in the neighbors of one job.

Parameters
----------
shadow_neighbors : dict of state point parameters to neighbor values to shadow job id
Neighbors containing shadow job ids
shadow_map : dict
Map from shadow job id to project job id
"""
neighbors = {}
for neighbor_key, neighbor_vals in shadow_neighbors.items():
neighbors[neighbor_key] = {k: shadow_map[i] for k, i in neighbor_vals.items()}
return neighbors


def shadow_neighbor_list_to_neighbor_list(shadow_neighbor_list, shadow_map):
"""Replace shadow job ids with actual job ids in the neighbor list.

Parameters
----------
shadow_neighbor_list : dict
`neighbor_list` containing shadow job ids.
dict of shadow job ids to state point parameters to neighbor values to shadow job id
shadow_map : dict
Map from shadow job id to project job id
"""
neighbor_list = {}
for jobid, shadow_neighbors in shadow_neighbor_list.items():
neighbor_list[shadow_map[jobid]] = shadow_neighbors_to_neighbors(
shadow_neighbors, shadow_map
)
return neighbor_list


def _build_neighbor_list(dotted_sp_cache, sorted_schema):
"""Iterate over cached state points and get neighbors of each state point.

Parameters
----------
dotted_sp_cache : dict
Map from job id to state point OR shadow job id to shadow state point in dotted key format
sorted_schema : dict
Map of dotted keys to their values to search over

Returns
-------
neighbor_list : dict
{jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}}}
"""
neighbor_list = {}
for _id, _sp in dotted_sp_cache.items():
neighbor_list[_id] = neighbors_of_sp(_sp, dotted_sp_cache, sorted_schema)
return neighbor_list


def get_neighbor_list(sp_cache, sorted_schema, ignore):
"""Build neighbor list while handling ignored keys.

Parameters
----------
sp_cache : dict
Project state point cache
sorted_schema : dict
Map of dotted keys to their values to search over

Returns
-------
neighbor_list : dict
{jobid: {state_point_key: {prev_value: neighbor_id, next_value: neighbor_id}}}
"""
if len(ignore) > 0:
shadow_map, shadow_cache = prepare_shadow_project(sp_cache, ignore=ignore)
nl = _build_neighbor_list(shadow_cache, sorted_schema)
return shadow_neighbor_list_to_neighbor_list(nl, shadow_map)
else:
# the state point cache needs to be in dotted keys to enable searching over schema values
sp_cache = {
_id: dict(_nested_dicts_to_dotted_keys(_sp))
for _id, _sp in sp_cache.items()
}
return _build_neighbor_list(sp_cache, sorted_schema)
Loading