diff --git a/pychunkedgraph/app/segmentation/common.py b/pychunkedgraph/app/segmentation/common.py index 70642c9ce..cd1a0e9b8 100644 --- a/pychunkedgraph/app/segmentation/common.py +++ b/pychunkedgraph/app/segmentation/common.py @@ -791,8 +791,8 @@ def handle_subgraph(table_id, root_id, only_internal_edges=True): supervoxels = np.concatenate( [agg.supervoxels for agg in l2id_agglomeration_d.values()] ) - mask0 = np.in1d(edges.node_ids1, supervoxels) - mask1 = np.in1d(edges.node_ids2, supervoxels) + mask0 = np.isin(edges.node_ids1, supervoxels) + mask1 = np.isin(edges.node_ids2, supervoxels) edges = edges[mask0 & mask1] return edges diff --git a/pychunkedgraph/graph/cache.py b/pychunkedgraph/graph/cache.py index e0ee6dc2e..ec7b09a0e 100644 --- a/pychunkedgraph/graph/cache.py +++ b/pychunkedgraph/graph/cache.py @@ -153,11 +153,11 @@ def cross_edges_decorated(node_id): return cross_edges_decorated(node_id) def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None): - node_ids = np.array(node_ids, dtype=NODE_ID, copy=False) + node_ids = np.asarray(node_ids, dtype=NODE_ID) if not node_ids.size: return node_ids self.stats["parents"]["calls"] += 1 - mask = np.in1d(node_ids, np.fromiter(self.parents_cache.keys(), dtype=NODE_ID)) + mask = np.isin(node_ids, np.fromiter(self.parents_cache.keys(), dtype=NODE_ID)) hits = int(np.sum(mask)) misses = len(node_ids) - hits self.stats["parents"]["hits"] += hits @@ -173,11 +173,11 @@ def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None) def children_multiple(self, node_ids: np.ndarray, *, flatten=False): result = {} - node_ids = np.array(node_ids, dtype=NODE_ID, copy=False) + node_ids = np.asarray(node_ids, dtype=NODE_ID) if not node_ids.size: return result self.stats["children"]["calls"] += 1 - mask = np.in1d(node_ids, np.fromiter(self.children_cache.keys(), dtype=NODE_ID)) + mask = np.isin(node_ids, np.fromiter(self.children_cache.keys(), dtype=NODE_ID)) hits = int(np.sum(mask)) misses = len(node_ids) - hits self.stats["children"]["hits"] += hits @@ -197,11 +197,11 @@ def cross_chunk_edges_multiple( self, node_ids: np.ndarray, *, time_stamp: datetime = None ): result = {} - node_ids = np.array(node_ids, dtype=NODE_ID, copy=False) + node_ids = np.asarray(node_ids, dtype=NODE_ID) if not node_ids.size: return result self.stats["cross_chunk_edges"]["calls"] += 1 - mask = np.in1d( + mask = np.isin( node_ids, np.fromiter(self.cross_chunk_edges_cache.keys(), dtype=NODE_ID) ) hits = int(np.sum(mask)) diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py index 636e1843e..6eabab065 100644 --- a/pychunkedgraph/graph/chunkedgraph.py +++ b/pychunkedgraph/graph/chunkedgraph.py @@ -1,5 +1,4 @@ # pylint: disable=invalid-name, missing-docstring, too-many-lines, import-outside-toplevel, unsupported-binary-operation - import time import typing import datetime @@ -734,8 +733,8 @@ def get_l2_agglomerations( else: all_chunk_edges = all_chunk_edges.get_pairs() supervoxels = self.get_children(level2_ids, flatten=True) - mask0 = np.in1d(all_chunk_edges[:, 0], supervoxels) - mask1 = np.in1d(all_chunk_edges[:, 1], supervoxels) + mask0 = np.isin(all_chunk_edges[:, 0], supervoxels) + mask1 = np.isin(all_chunk_edges[:, 1], supervoxels) return all_chunk_edges[mask0 & mask1] l2id_children_d = self.get_children(level2_ids) @@ -807,6 +806,7 @@ def add_edges( source_coords: typing.Sequence[int] = None, sink_coords: typing.Sequence[int] = None, allow_same_segment_merge: typing.Optional[bool] = False, + stitch_mode: typing.Optional[bool] = False, ) -> operation.GraphEditOperation.Result: """ Adds an edge to the chunkedgraph @@ -823,6 +823,7 @@ def add_edges( source_coords=source_coords, sink_coords=sink_coords, allow_same_segment_merge=allow_same_segment_merge, + stitch_mode=stitch_mode, ).execute() def remove_edges( diff --git a/pychunkedgraph/graph/chunks/utils.py b/pychunkedgraph/graph/chunks/utils.py index 5546d2650..cac1c30bc 100644 --- a/pychunkedgraph/graph/chunks/utils.py +++ b/pychunkedgraph/graph/chunks/utils.py @@ -99,7 +99,7 @@ def get_chunk_coordinates_multiple(meta, ids: np.ndarray) -> np.ndarray: y_offset = x_offset - bits_per_dim z_offset = y_offset - bits_per_dim - ids = np.array(ids, dtype=int, copy=False) + ids = np.asarray(ids, dtype=int) X = ids >> x_offset & 2**bits_per_dim - 1 Y = ids >> y_offset & 2**bits_per_dim - 1 Z = ids >> z_offset & 2**bits_per_dim - 1 @@ -154,7 +154,7 @@ def get_chunk_ids_from_node_ids(meta, ids: Iterable[np.uint64]) -> np.ndarray: bits_per_dims = np.array([meta.bitmasks[l] for l in get_chunk_layers(meta, ids)]) offsets = 64 - meta.graph_config.LAYER_ID_BITS - 3 * bits_per_dims - ids = np.array(ids, dtype=int, copy=False) + ids = np.asarray(ids, dtype=int) cids1 = np.array((ids >> offsets) << offsets, dtype=np.uint64) # cids2 = np.vectorize(get_chunk_id)(meta, ids) # assert np.all(cids1 == cids2) diff --git a/pychunkedgraph/graph/cutting.py b/pychunkedgraph/graph/cutting.py index 8b1583871..c12a0a560 100644 --- a/pychunkedgraph/graph/cutting.py +++ b/pychunkedgraph/graph/cutting.py @@ -62,7 +62,7 @@ def merge_cross_chunk_edges_graph_tool( if len(mapping) > 0: mapping = np.concatenate(mapping) u_nodes = np.unique(edges) - u_unmapped_nodes = u_nodes[~np.in1d(u_nodes, mapping)] + u_unmapped_nodes = u_nodes[~np.isin(u_nodes, mapping)] unmapped_mapping = np.concatenate( [u_unmapped_nodes.reshape(-1, 1), u_unmapped_nodes.reshape(-1, 1)], axis=1 ) @@ -189,9 +189,9 @@ def _build_gt_graph(self, edges, affs): ) = flatgraph.build_gt_graph(comb_edges, comb_affs, make_directed=True) self.source_graph_ids = np.where( - np.in1d(self.unique_supervoxel_ids, self.sources) + np.isin(self.unique_supervoxel_ids, self.sources) )[0] - self.sink_graph_ids = np.where(np.in1d(self.unique_supervoxel_ids, self.sinks))[ + self.sink_graph_ids = np.where(np.isin(self.unique_supervoxel_ids, self.sinks))[ 0 ] @@ -395,10 +395,10 @@ def _remap_cut_edge_set(self, cut_edge_set): remapped_cutset = np.array(remapped_cutset, dtype=np.uint64) - remapped_cutset_flattened_view = remapped_cutset.view(dtype="u8,u8") - edges_flattened_view = self.cg_edges.view(dtype="u8,u8") + remapped_cutset_flattened_view = remapped_cutset.view(dtype="u8,u8").ravel() + edges_flattened_view = self.cg_edges.view(dtype="u8,u8").ravel() - cutset_mask = np.in1d(remapped_cutset_flattened_view, edges_flattened_view) + cutset_mask = np.isin(remapped_cutset_flattened_view, edges_flattened_view) return remapped_cutset[cutset_mask] @@ -432,8 +432,8 @@ def _get_split_preview_connected_components(self, cut_edge_set): max_sinks = 0 i = 0 for cc in ccs_test_post_cut: - num_sources = np.count_nonzero(np.in1d(self.source_graph_ids, cc)) - num_sinks = np.count_nonzero(np.in1d(self.sink_graph_ids, cc)) + num_sources = np.count_nonzero(np.isin(self.source_graph_ids, cc)) + num_sinks = np.count_nonzero(np.isin(self.sink_graph_ids, cc)) if num_sources > max_sources: max_sources = num_sources max_source_index = i @@ -486,8 +486,8 @@ def _filter_graph_connected_components(self): # If connected component contains no sources or no sinks, # remove its nodes from the mincut computation if not ( - np.any(np.in1d(self.source_graph_ids, cc)) - and np.any(np.in1d(self.sink_graph_ids, cc)) + np.any(np.isin(self.source_graph_ids, cc)) + and np.any(np.isin(self.sink_graph_ids, cc)) ): for node_id in cc: removed[node_id] = True @@ -525,13 +525,13 @@ def _gt_mincut_sanity_check(self, partition): np.array(np.where(partition.a == i_cc)[0], dtype=int) ] - if np.any(np.in1d(self.sources, cc_list)): - assert np.all(np.in1d(self.sources, cc_list)) - assert ~np.any(np.in1d(self.sinks, cc_list)) + if np.any(np.isin(self.sources, cc_list)): + assert np.all(np.isin(self.sources, cc_list)) + assert ~np.any(np.isin(self.sinks, cc_list)) - if np.any(np.in1d(self.sinks, cc_list)): - assert np.all(np.in1d(self.sinks, cc_list)) - assert ~np.any(np.in1d(self.sources, cc_list)) + if np.any(np.isin(self.sinks, cc_list)): + assert np.all(np.isin(self.sinks, cc_list)) + assert ~np.any(np.isin(self.sources, cc_list)) def _sink_and_source_connectivity_sanity_check(self, cut_edge_set): """ @@ -555,9 +555,9 @@ def _sink_and_source_connectivity_sanity_check(self, cut_edge_set): illegal_split = False try: for cc in ccs_test_post_cut: - if np.any(np.in1d(self.source_graph_ids, cc)): - assert np.all(np.in1d(self.source_graph_ids, cc)) - assert ~np.any(np.in1d(self.sink_graph_ids, cc)) + if np.any(np.isin(self.source_graph_ids, cc)): + assert np.all(np.isin(self.source_graph_ids, cc)) + assert ~np.any(np.isin(self.sink_graph_ids, cc)) if ( len(self.source_path_vertices) == len(cc) and self.disallow_isolating_cut @@ -565,9 +565,9 @@ def _sink_and_source_connectivity_sanity_check(self, cut_edge_set): if not self.partition_edges_within_label(cc): raise IsolatingCutException("Source") - if np.any(np.in1d(self.sink_graph_ids, cc)): - assert np.all(np.in1d(self.sink_graph_ids, cc)) - assert ~np.any(np.in1d(self.source_graph_ids, cc)) + if np.any(np.isin(self.sink_graph_ids, cc)): + assert np.all(np.isin(self.sink_graph_ids, cc)) + assert ~np.any(np.isin(self.source_graph_ids, cc)) if ( len(self.sink_path_vertices) == len(cc) and self.disallow_isolating_cut @@ -664,8 +664,8 @@ def run_split_preview( supervoxels = np.concatenate( [agg.supervoxels for agg in l2id_agglomeration_d.values()] ) - mask0 = np.in1d(edges.node_ids1, supervoxels) - mask1 = np.in1d(edges.node_ids2, supervoxels) + mask0 = np.isin(edges.node_ids1, supervoxels) + mask1 = np.isin(edges.node_ids2, supervoxels) edges = edges[mask0 & mask1] edges_to_remove, illegal_split = run_multicut( edges, diff --git a/pychunkedgraph/graph/edges/utils.py b/pychunkedgraph/graph/edges/utils.py index b49a9a547..a7bad1463 100644 --- a/pychunkedgraph/graph/edges/utils.py +++ b/pychunkedgraph/graph/edges/utils.py @@ -69,7 +69,9 @@ def merge_cross_edge_dicts(x_edges_d1: Dict, x_edges_d2: Dict) -> Dict: Combines two cross chunk dictionaries of form {node_id: {layer id : edge list}}. """ - node_ids = np.unique(list(x_edges_d1.keys()) + list(x_edges_d2.keys())) + node_ids = np.unique( + np.array(list(x_edges_d1.keys()) + list(x_edges_d2.keys()), dtype=basetypes.NODE_ID) + ) result_d = {} for node_id in node_ids: cross_edge_ds = [x_edges_d1.get(node_id, {}), x_edges_d2.get(node_id, {})] diff --git a/pychunkedgraph/graph/locks.py b/pychunkedgraph/graph/locks.py index e3918f0ea..40231c21c 100644 --- a/pychunkedgraph/graph/locks.py +++ b/pychunkedgraph/graph/locks.py @@ -49,12 +49,12 @@ def __init__( self.privileged_mode = privileged_mode def __enter__(self): + if not self.operation_id: + self.operation_id = self.cg.id_client.create_operation_id() + if self.privileged_mode: - assert self.operation_id is not None, "Please provide operation ID." warn("Warning: Privileged mode without acquiring lock.") return self - if not self.operation_id: - self.operation_id = self.cg.id_client.create_operation_id() nodes_ts = self.cg.get_node_timestamps(self.root_ids, return_numpy=0) min_ts = min(nodes_ts) diff --git a/pychunkedgraph/graph/misc.py b/pychunkedgraph/graph/misc.py index 0f53c71c3..38bc31508 100644 --- a/pychunkedgraph/graph/misc.py +++ b/pychunkedgraph/graph/misc.py @@ -142,7 +142,7 @@ def get_contact_sites( ) # Build area lookup dictionary - cs_svs = edges[~np.in1d(edges, sv_ids).reshape(-1, 2)] + cs_svs = edges[~np.isin(edges, sv_ids).reshape(-1, 2)] area_dict = collections.defaultdict(int) for area, sv_id in zip(areas, cs_svs): @@ -165,7 +165,7 @@ def get_contact_sites( cs_dict = collections.defaultdict(list) for cc in ccs: cc_sv_ids = unique_ids[cc] - cc_sv_ids = cc_sv_ids[np.in1d(cc_sv_ids, u_cs_svs)] + cc_sv_ids = cc_sv_ids[np.isin(cc_sv_ids, u_cs_svs)] cs_areas = area_dict_vec(cc_sv_ids) partner_root_id = ( int(cg.get_root(cc_sv_ids[0], time_stamp=time_stamp)) diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py index 5295ade7f..995af7827 100644 --- a/pychunkedgraph/graph/operation.py +++ b/pychunkedgraph/graph/operation.py @@ -25,6 +25,7 @@ from . import attributes from .edges import Edges from .edges.utils import get_edges_status +from .edits import get_profiler from .utils import basetypes from .utils import serializers from .cache import CacheService @@ -419,6 +420,7 @@ def execute( op_type = "merge" if is_merge else "split" self.parent_ts = parent_ts root_ids = self._update_root_ids() + self.privileged_mode = self.privileged_mode or (is_merge and self.stitch_mode) with locks.RootLock( self.cg, root_ids, @@ -571,6 +573,7 @@ class MergeOperation(GraphEditOperation): "affinities", "bbox_offset", "allow_same_segment_merge", + "stitch_mode", ] def __init__( @@ -584,6 +587,7 @@ def __init__( bbox_offset: Tuple[int, int, int] = (240, 240, 24), affinities: Optional[Sequence[np.float32]] = None, allow_same_segment_merge: Optional[bool] = False, + stitch_mode: bool = False, ) -> None: super().__init__( cg, user_id=user_id, source_coords=source_coords, sink_coords=sink_coords @@ -591,6 +595,7 @@ def __init__( self.added_edges = np.atleast_2d(added_edges).astype(basetypes.NODE_ID) self.bbox_offset = np.atleast_1d(bbox_offset).astype(basetypes.COORDINATES) self.allow_same_segment_merge = allow_same_segment_merge + self.stitch_mode = stitch_mode self.affinities = None if affinities is not None: @@ -615,40 +620,55 @@ def _update_root_ids(self) -> np.ndarray: def _apply( self, *, operation_id, timestamp ) -> Tuple[np.ndarray, np.ndarray, List["bigtable.row.Row"]]: - root_ids = set( - self.cg.get_roots( - self.added_edges.ravel(), assert_roots=True, time_stamp=self.parent_ts + profiler = get_profiler() + + with profiler.profile("merge_apply_get_roots"): + root_ids = set( + self.cg.get_roots( + self.added_edges.ravel(), assert_roots=True, time_stamp=self.parent_ts + ) ) - ) if len(root_ids) < 2 and not self.allow_same_segment_merge: - raise PreconditionError("Supervoxels must belong to different objects.") - bbox = get_bbox(self.source_coords, self.sink_coords, self.bbox_offset) - with TimeIt("subgraph", self.cg.graph_id, operation_id): - edges = self.cg.get_subgraph( - root_ids, - bbox=bbox, - bbox_is_coordinate=True, - edges_only=True, + raise PreconditionError( + "Supervoxels must belong to different objects." + f" Tried to merge {self.added_edges.ravel()}," + f" which all belong to {tuple(root_ids)[0]}." ) - if self.allow_same_segment_merge: - inactive_edges = types.empty_2d - else: - with TimeIt("preprocess", self.cg.graph_id, operation_id): - inactive_edges = edits.merge_preprocess( + atomic_edges = self.added_edges + fake_edge_rows = [] + if not self.stitch_mode: + bbox = get_bbox(self.source_coords, self.sink_coords, self.bbox_offset) + with profiler.profile("get_subgraph"): + with TimeIt("subgraph", self.cg.graph_id, operation_id): + edges = self.cg.get_subgraph( + root_ids, + bbox=bbox, + bbox_is_coordinate=True, + edges_only=True, + ) + + if self.allow_same_segment_merge: + inactive_edges = types.empty_2d + else: + with profiler.profile("merge_preprocess"): + with TimeIt("preprocess", self.cg.graph_id, operation_id): + inactive_edges = edits.merge_preprocess( + self.cg, + subgraph_edges=edges, + supervoxels=self.added_edges.ravel(), + parent_ts=self.parent_ts, + ) + + with profiler.profile("check_fake_edges"): + atomic_edges, fake_edge_rows = edits.check_fake_edges( self.cg, - subgraph_edges=edges, - supervoxels=self.added_edges.ravel(), + atomic_edges=self.added_edges, + inactive_edges=inactive_edges, + time_stamp=timestamp, parent_ts=self.parent_ts, ) - atomic_edges, fake_edge_rows = edits.check_fake_edges( - self.cg, - atomic_edges=self.added_edges, - inactive_edges=inactive_edges, - time_stamp=timestamp, - parent_ts=self.parent_ts, - ) with TimeIt("add_edges", self.cg.graph_id, operation_id): new_roots, new_l2_ids, new_entries = edits.add_edges( self.cg, @@ -657,6 +677,7 @@ def _apply( time_stamp=timestamp, parent_ts=self.parent_ts, allow_same_segment_merge=self.allow_same_segment_merge, + stitch_mode=self.stitch_mode, ) return new_roots, new_l2_ids, fake_edge_rows + new_entries @@ -867,18 +888,20 @@ def __init__( self.bbox_offset = np.atleast_1d(bbox_offset).astype(basetypes.COORDINATES) self.path_augment = path_augment self.disallow_isolating_cut = disallow_isolating_cut - if np.any(np.in1d(self.sink_ids, self.source_ids)): + if np.any(np.isin(self.sink_ids, self.source_ids)): raise PreconditionError( "Supervoxels exist in both sink and source, " "try placing the points further apart." ) - ids = np.concatenate([self.source_ids, self.sink_ids]) + ids = np.concatenate([self.source_ids, self.sink_ids]).astype(basetypes.NODE_ID) layers = self.cg.get_chunk_layers(ids) assert np.sum(layers) == layers.size, "IDs must be supervoxels." def _update_root_ids(self) -> np.ndarray: - sink_and_source_ids = np.concatenate((self.source_ids, self.sink_ids)) + sink_and_source_ids = np.concatenate((self.source_ids, self.sink_ids)).astype( + basetypes.NODE_ID + ) root_ids = np.unique( self.cg.get_roots( sink_and_source_ids, assert_roots=True, time_stamp=self.parent_ts @@ -894,7 +917,9 @@ def _apply( # Verify that sink and source are from the same root object root_ids = set( self.cg.get_roots( - np.concatenate([self.source_ids, self.sink_ids]), + np.concatenate([self.source_ids, self.sink_ids]).astype( + basetypes.NODE_ID + ), assert_roots=True, time_stamp=self.parent_ts, ) @@ -915,9 +940,9 @@ def _apply( edges = reduce(lambda x, y: x + y, edges_tuple, Edges([], [])) supervoxels = np.concatenate( [agg.supervoxels for agg in l2id_agglomeration_d.values()] - ) - mask0 = np.in1d(edges.node_ids1, supervoxels) - mask1 = np.in1d(edges.node_ids2, supervoxels) + ).astype(basetypes.NODE_ID) + mask0 = np.isin(edges.node_ids1, supervoxels) + mask1 = np.isin(edges.node_ids2, supervoxels) edges = edges[mask0 & mask1] if len(edges) == 0: raise PreconditionError("No local edges found.") diff --git a/pychunkedgraph/graph/segmenthistory.py b/pychunkedgraph/graph/segmenthistory.py index 30f42d15b..0a215cf92 100644 --- a/pychunkedgraph/graph/segmenthistory.py +++ b/pychunkedgraph/graph/segmenthistory.py @@ -78,7 +78,7 @@ def operation_id_root_id_dict(self): @property def operation_ids(self): - return np.array(list(self.operation_id_root_id_dict.keys())) + return np.array(list(self.operation_id_root_id_dict.keys()), dtype=basetypes.OPERATION_ID) @property def _log_rows(self): diff --git a/pychunkedgraph/graph/utils/id_helpers.py b/pychunkedgraph/graph/utils/id_helpers.py index aa486ac84..2a245f79c 100644 --- a/pychunkedgraph/graph/utils/id_helpers.py +++ b/pychunkedgraph/graph/utils/id_helpers.py @@ -89,7 +89,7 @@ def get_atomic_id_from_coord( # sort by frequency and discard those ids that have been checked # previously sorted_atomic_ids = atomic_ids[np.argsort(atomic_id_count)] - sorted_atomic_ids = sorted_atomic_ids[~np.in1d(sorted_atomic_ids, checked)] + sorted_atomic_ids = sorted_atomic_ids[~np.isin(sorted_atomic_ids, checked)] # For each candidate id check whether its root id corresponds to the # given root id diff --git a/pychunkedgraph/graph/utils/serializers.py b/pychunkedgraph/graph/utils/serializers.py index 09c0f63b0..3b0101d86 100644 --- a/pychunkedgraph/graph/utils/serializers.py +++ b/pychunkedgraph/graph/utils/serializers.py @@ -41,7 +41,9 @@ def _deserialize(val, dtype, shape=None, order=None): def __init__(self, dtype, shape=None, order=None, compression_level=None): super().__init__( - serializer=lambda x: x.newbyteorder(dtype.byteorder).tobytes(), + serializer=lambda x: x.view( + x.dtype.newbyteorder(dtype.byteorder) + ).tobytes(), deserializer=lambda x: NumPyArray._deserialize( x, dtype, shape=shape, order=order ), @@ -53,7 +55,9 @@ def __init__(self, dtype, shape=None, order=None, compression_level=None): class NumPyValue(_Serializer): def __init__(self, dtype): super().__init__( - serializer=lambda x: x.newbyteorder(dtype.byteorder).tobytes(), + serializer=lambda x: x.view( + x.dtype.newbyteorder(dtype.byteorder) + ).tobytes(), deserializer=lambda x: np.frombuffer(x, dtype=dtype)[0], basetype=dtype.type, ) @@ -96,7 +100,7 @@ def __init__(self): def pad_node_id(node_id: np.uint64) -> str: - """ Pad node id to 20 digits + """Pad node id to 20 digits :param node_id: int :return: str @@ -105,7 +109,7 @@ def pad_node_id(node_id: np.uint64) -> str: def serialize_uint64(node_id: np.uint64, counter=False, fake_edges=False) -> bytes: - """ Serializes an id to be ingested by a bigtable table row + """Serializes an id to be ingested by a bigtable table row :param node_id: int :return: str @@ -118,7 +122,7 @@ def serialize_uint64(node_id: np.uint64, counter=False, fake_edges=False) -> byt def serialize_uint64s_to_regex(node_ids: Iterable[np.uint64]) -> bytes: - """ Serializes an id to be ingested by a bigtable table row + """Serializes an id to be ingested by a bigtable table row :param node_id: int :return: str @@ -128,7 +132,7 @@ def serialize_uint64s_to_regex(node_ids: Iterable[np.uint64]) -> bytes: def deserialize_uint64(node_id: bytes, fake_edges=False) -> np.uint64: - """ De-serializes a node id from a BigTable row + """De-serializes a node id from a BigTable row :param node_id: bytes :return: np.uint64 @@ -139,7 +143,7 @@ def deserialize_uint64(node_id: bytes, fake_edges=False) -> np.uint64: def serialize_key(key: str) -> bytes: - """ Serializes a key to be ingested by a bigtable table row + """Serializes a key to be ingested by a bigtable table row :param key: str :return: bytes @@ -148,7 +152,7 @@ def serialize_key(key: str) -> bytes: def deserialize_key(key: bytes) -> str: - """ Deserializes a row key + """Deserializes a row key :param key: bytes :return: str diff --git a/pychunkedgraph/ingest/create/atomic_layer.py b/pychunkedgraph/ingest/create/atomic_layer.py index 0a7aae728..e235d36d4 100644 --- a/pychunkedgraph/ingest/create/atomic_layer.py +++ b/pychunkedgraph/ingest/create/atomic_layer.py @@ -68,8 +68,10 @@ def _get_chunk_nodes_and_edges(chunk_edges_d: dict, isolated_ids: Sequence[int]) in-chunk edges and nodes_ids """ isolated_nodes_self_edges = np.vstack([isolated_ids, isolated_ids]).T - node_ids = [isolated_ids] - edge_ids = [isolated_nodes_self_edges] + node_ids = [isolated_ids] if len(isolated_ids) != 0 else [] + edge_ids = ( + [isolated_nodes_self_edges] if len(isolated_nodes_self_edges) != 0 else [] + ) for edge_type in EDGE_TYPES: edges = chunk_edges_d[edge_type] node_ids.append(edges.node_ids1) @@ -77,9 +79,9 @@ def _get_chunk_nodes_and_edges(chunk_edges_d: dict, isolated_ids: Sequence[int]) node_ids.append(edges.node_ids2) edge_ids.append(edges.get_pairs()) - chunk_node_ids = np.unique(np.concatenate(node_ids)) + chunk_node_ids = np.unique(np.concatenate(node_ids).astype(basetypes.NODE_ID)) edge_ids.append(np.vstack([chunk_node_ids, chunk_node_ids]).T) - return (chunk_node_ids, np.concatenate(edge_ids)) + return (chunk_node_ids, np.concatenate(edge_ids).astype(basetypes.NODE_ID)) def _get_remapping(chunk_edges_d: dict): @@ -116,7 +118,7 @@ def _process_component( r_key = serializers.serialize_uint64(node_id) nodes.append(cg.client.mutate_row(r_key, val_dict, time_stamp=time_stamp)) - chunk_out_edges = np.concatenate(chunk_out_edges) + chunk_out_edges = np.concatenate(chunk_out_edges).astype(basetypes.NODE_ID) cce_layers = cg.get_cross_chunk_edges_layer(chunk_out_edges) u_cce_layers = np.unique(cce_layers) @@ -147,5 +149,7 @@ def _get_outgoing_edges(node_id, chunk_edges_d, sparse_indices, remapping): ] row_ids = row_ids[column_ids == 0] # edges that this node is part of - chunk_out_edges = np.concatenate([chunk_out_edges, edges[row_ids]]) + chunk_out_edges = np.concatenate([chunk_out_edges, edges[row_ids]]).astype( + basetypes.NODE_ID + ) return chunk_out_edges diff --git a/pychunkedgraph/ingest/create/parent_layer.py b/pychunkedgraph/ingest/create/parent_layer.py index 90b24d26a..dfdb48dac 100644 --- a/pychunkedgraph/ingest/create/parent_layer.py +++ b/pychunkedgraph/ingest/create/parent_layer.py @@ -73,7 +73,7 @@ def _read_children_chunks( children_ids = [types.empty_1d] for child_coord in children_coords: children_ids.append(_read_chunk([], cg, layer_id - 1, child_coord)) - return np.concatenate(children_ids) + return np.concatenate(children_ids).astype(basetypes.NODE_ID) with mp.Manager() as manager: children_ids_shared = manager.list() @@ -92,7 +92,7 @@ def _read_children_chunks( multi_args, n_threads=min(len(multi_args), mp.cpu_count()), ) - return np.concatenate(children_ids_shared) + return np.concatenate(children_ids_shared).astype(basetypes.NODE_ID) def _read_chunk_helper(args): diff --git a/pychunkedgraph/ingest/ran_agglomeration.py b/pychunkedgraph/ingest/ran_agglomeration.py index a0ca42d54..d726ba4a5 100644 --- a/pychunkedgraph/ingest/ran_agglomeration.py +++ b/pychunkedgraph/ingest/ran_agglomeration.py @@ -314,7 +314,9 @@ def get_active_edges(edges_d, mapping): if edge_type == EDGE_TYPES.in_chunk: pseudo_isolated_ids.append(edges.node_ids2) - return chunk_edges_active, np.unique(np.concatenate(pseudo_isolated_ids)) + return chunk_edges_active, np.unique( + np.concatenate(pseudo_isolated_ids).astype(basetypes.NODE_ID) + ) def define_active_edges(edge_dict, mapping) -> Union[Dict, np.ndarray]: @@ -380,7 +382,7 @@ def read_raw_agglomeration_data(imanager: IngestionManager, chunk_coord: np.ndar edges_list = _read_agg_files(filenames, chunk_ids, path) G = nx.Graph() - G.add_edges_from(np.concatenate(edges_list)) + G.add_edges_from(np.concatenate(edges_list).astype(basetypes.NODE_ID)) mapping = {} components = list(nx.connected_components(G)) for i_cc, cc in enumerate(components): diff --git a/pychunkedgraph/meshing/manifest/utils.py b/pychunkedgraph/meshing/manifest/utils.py index 67e600653..90963570c 100644 --- a/pychunkedgraph/meshing/manifest/utils.py +++ b/pychunkedgraph/meshing/manifest/utils.py @@ -40,7 +40,7 @@ def _get_children(cg, node_ids: Sequence[np.uint64], children_cache: Dict): if len(node_ids) == 0: return empty_1d.copy() node_ids = np.array(node_ids, dtype=NODE_ID) - mask = np.in1d(node_ids, np.fromiter(children_cache.keys(), dtype=NODE_ID)) + mask = np.isin(node_ids, np.fromiter(children_cache.keys(), dtype=NODE_ID)) children_d = cg.get_children(node_ids[~mask]) children_cache.update(children_d) diff --git a/pychunkedgraph/meshing/meshgen.py b/pychunkedgraph/meshing/meshgen.py index a8da89b1f..d137d52ad 100644 --- a/pychunkedgraph/meshing/meshgen.py +++ b/pychunkedgraph/meshing/meshgen.py @@ -75,7 +75,7 @@ def remap_seg_using_unsafe_dict(seg, unsafe_dict): overlaps.extend(np.unique(seg[:, :, -2][bin_cc_seg[:, :, -1]])) overlaps = np.unique(overlaps) - linked_l2_ids = overlaps[np.in1d(overlaps, unsafe_dict[unsafe_root_id])] + linked_l2_ids = overlaps[np.isin(overlaps, unsafe_dict[unsafe_root_id])] if len(linked_l2_ids) == 0: seg[bin_cc_seg] = 0 @@ -253,7 +253,7 @@ def _get_root_ids(args): lx_id_remap = get_higher_to_lower_remapping(cg, chunk_id, time_stamp=time_stamp) - lx_ids = np.array(list(lx_id_remap.keys())) + lx_ids = np.array(list(lx_id_remap.keys()), dtype=np.uint64) root_ids = np.zeros(len(lx_ids), dtype=np.uint64) n_jobs = np.min([n_threads, len(lx_ids)]) @@ -357,7 +357,7 @@ def get_lx_overlapping_remappings(cg, chunk_id, time_stamp=None, n_threads=1): ) safe_lx_ids = lx_ids[u_idx[c_root_ids == 1]] - unsafe_lx_ids = lx_ids[~np.in1d(lx_ids, safe_lx_ids)] + unsafe_lx_ids = lx_ids[~np.isin(lx_ids, safe_lx_ids)] unsafe_root_ids = np.unique(root_ids[u_idx[c_root_ids != 1]]) lx_root_dict = dict(zip(neigh_lx_ids, neigh_root_ids)) @@ -387,7 +387,7 @@ def get_lx_overlapping_remappings(cg, chunk_id, time_stamp=None, n_threads=1): unsafe_dict = collections.defaultdict(list) for root_id in unsafe_root_ids: - if np.sum(~np.in1d(root_lx_dict[root_id], unsafe_lx_ids)) == 0: + if np.sum(~np.isin(root_lx_dict[root_id], unsafe_lx_ids)) == 0: continue for neigh_lx_id in root_lx_dict[root_id]: @@ -935,6 +935,7 @@ def chunk_initial_mesh_task( cv = CloudVolume( f"graphene://https://localhost/segmentation/table/dummy", info=meshgen_utils.get_json_info(cg), + secrets={"token": "dummy"}, ) sharding_info = cv.mesh.meta.info["sharding"]["2"] sharding_spec = ShardingSpecification.from_dict(sharding_info) @@ -1033,8 +1034,8 @@ def get_multi_child_nodes(cg, chunk_id, node_id_subset=None, chunk_bbox_string=F node_ids=node_id_subset, properties=attributes.Hierarchy.Child ) - node_ids = np.array(list(range_read.keys())) - node_rows = np.array(list(range_read.values())) + node_ids = np.array(list(range_read.keys()), dtype=np.uint64) + node_rows = np.array(list(range_read.values()), dtype=object) child_fragments = np.array( [ fragment.value @@ -1123,6 +1124,7 @@ def chunk_stitch_remeshing_task( f"graphene://https://localhost/segmentation/table/dummy", mesh_dir=cv_sharded_mesh_dir, info=meshgen_utils.get_json_info(cg), + secrets={"token": "dummy"}, ) fragments_in_batch_processed = 0 @@ -1257,6 +1259,7 @@ def chunk_initial_sharded_stitching_task( cv = CloudVolume( f"graphene://https://localhost/segmentation/table/dummy", info=meshgen_utils.get_json_info(cg), + secrets={"token": "dummy"}, ) shard_filenames = [] shard_to_chunk_id = {} diff --git a/pychunkedgraph/meshing/meshgen_utils.py b/pychunkedgraph/meshing/meshgen_utils.py index 711c09322..43e6f5c3a 100644 --- a/pychunkedgraph/meshing/meshgen_utils.py +++ b/pychunkedgraph/meshing/meshgen_utils.py @@ -129,7 +129,13 @@ def recursive_helper(cur_node_ids): only_child_mask = np.array( [len(children_for_node) == 1 for children_for_node in children_array] ) - only_children = children_array[only_child_mask].astype(np.uint64).ravel() + # Extract children from object array - each filtered element is a 1-element array + filtered_children = children_array[only_child_mask] + only_children = ( + np.concatenate(filtered_children).astype(np.uint64) + if filtered_children.size + else np.array([], dtype=np.uint64) + ) if np.any(only_child_mask): temp_array = cur_node_ids[stop_layer_mask] temp_array[only_child_mask] = recursive_helper(only_children) diff --git a/pychunkedgraph/utils/general.py b/pychunkedgraph/utils/general.py index ac4929660..8913025c7 100644 --- a/pychunkedgraph/utils/general.py +++ b/pychunkedgraph/utils/general.py @@ -40,4 +40,4 @@ def chunked(l: Sequence, n: int): def in2d(arr1: np.ndarray, arr2: np.ndarray) -> np.ndarray: arr1_view = arr1.view(dtype="u8,u8").reshape(arr1.shape[0]) arr2_view = arr2.view(dtype="u8,u8").reshape(arr2.shape[0]) - return np.in1d(arr1_view, arr2_view) + return np.isin(arr1_view, arr2_view) diff --git a/requirements.in b/requirements.in index 4fcd353ed..bf735af22 100644 --- a/requirements.in +++ b/requirements.in @@ -18,15 +18,15 @@ werkzeug tensorstore # PyPI only: -cloud-files>=4.21.1 -cloud-volume>=8.26.0 +cloud-files>=5.3.0 +cloud-volume>=12.2.0 multiwrapper middle-auth-client>=3.11.0 zmesh>=1.7.0 fastremap>=1.14.0 task-queue>=2.13.0 messagingclient -dracopy>=1.3.0 +dracopy>=1.5.0 datastoreflex>=0.5.0 zstandard==0.21.0 diff --git a/requirements.txt b/requirements.txt index 0eedacb31..35014d4de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,22 +41,18 @@ click==8.1.7 # -r requirements.in # cloud-files # compressed-segmentation - # compresso # flask + # microviewer # rq # task-queue -cloud-files==4.21.1 +cloud-files==5.3.0 # via # -r requirements.in # cloud-volume # datastoreflex -cloud-volume==8.26.0 +cloud-volume==12.2.0 # via -r requirements.in -compressed-segmentation==2.2.1 - # via cloud-volume -compresso==3.2.1 - # via cloud-volume -crackle-codec==0.7.0 +compressed-segmentation==2.3.2 # via cloud-volume crc32c==2.3.post0 # via cloud-files @@ -68,7 +64,7 @@ dill==0.3.7 # via # multiprocess # pathos -dracopy==1.3.0 +dracopy==1.5.0 # via # -r requirements.in # cloud-volume @@ -78,7 +74,7 @@ fastremap==1.14.0 # via # -r requirements.in # cloud-volume - # crackle-codec + # osteoid flask==2.3.3 # via # -r requirements.in @@ -86,8 +82,6 @@ flask==2.3.3 # middle-auth-client flask-cors==4.0.0 # via -r requirements.in -fpzip==1.2.2 - # via cloud-volume furl==2.1.3 # via middle-auth-client gevent==23.9.1 @@ -189,9 +183,11 @@ markupsafe==2.1.3 # werkzeug messagingclient==0.1.3 # via -r requirements.in +microviewer==1.13.1 + # via cloud-volume middle-auth-client==3.16.1 # via -r requirements.in -ml-dtypes==0.3.2 +ml-dtypes==0.5.1 # via tensorstore multiprocess==0.70.15 # via pathos @@ -201,24 +197,22 @@ networkx==3.1 # via # -r requirements.in # cloud-volume + # osteoid numpy==1.26.0 # via # -r requirements.in # cloud-volume # compressed-segmentation - # compresso - # crackle-codec # fastremap - # fpzip # messagingclient + # microviewer # ml-dtypes # multiwrapper + # osteoid # pandas - # pyspng-seunglab # simplejpeg # task-queue # tensorstore - # zfpc # zmesh orderedmultidict==1.0.1 # via furl @@ -226,6 +220,8 @@ orjson==3.9.7 # via # cloud-files # task-queue +osteoid==0.3.1 + # via cloud-volume packaging==23.1 # via pytest pandas==2.1.1 @@ -237,8 +233,6 @@ pathos==0.3.1 # task-queue pbr==5.11.1 # via task-queue -pillow==10.0.1 - # via cloud-volume pluggy==1.3.0 # via pytest posix-ipc==1.1.1 @@ -273,12 +267,8 @@ pyasn1==0.5.0 # rsa pyasn1-modules==0.3.0 # via google-auth -pybind11==2.11.1 - # via crackle-codec pysimdjson==5.0.2 # via cloud-volume -pyspng-seunglab==1.1.0 - # via cloud-volume pytest==7.4.2 # via compressed-segmentation python-dateutil==2.8.2 @@ -340,7 +330,7 @@ tenacity==8.2.3 # cloud-files # cloud-volume # task-queue -tensorstore==0.1.53 +tensorstore==0.1.75 # via -r requirements.in tqdm==4.66.1 # via @@ -360,10 +350,6 @@ werkzeug==2.3.8 # via # -r requirements.in # flask -zfpc==0.1.2 - # via cloud-volume -zfpy==1.0.0 - # via zfpc zmesh==1.7.0 # via -r requirements.in zope-event==5.0