From 2823e8a812453b83a8c69b732578e65d3eb5b126 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Mon, 29 Sep 2025 21:19:26 +0200 Subject: [PATCH 01/28] init --- brainbuilder/app/sonata.py | 31 +++++ brainbuilder/utils/sonata/convert_allen_v1.py | 124 ++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 brainbuilder/utils/sonata/convert_allen_v1.py diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index f8c9fc2..c23f8fc 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -44,6 +44,37 @@ def from_mvd3(mvd3, output, model_type, mecombo_info, population): convert.provide_me_info(mvd3, output, model_type, mecombo_info, population) +@app.command() +@click.option("-o", "--output", help="directory to output SONATA files", required=True) +@click.option("--nodes-file", help="Path to nodes file", required=True) +@click.option("--node-types-file", help="Path to node type file", required=True) +@click.option("--edges-file", help="Path to edges file", required=True) +@click.option("--edge-types-file", help="Path to edge type file", required=True) +def from_allen_circuit(nodes_file, node_types_file, edges_file, edge_types_file, output): + """Provide SONATA nodes with MorphoElectrical info""" + from brainbuilder.utils.sonata import convert_allen_v1 + from voxcell import CellCollection + from pathlib import Path + + import h5py + + node_file_name = Path(nodes_file).name + edge_file_name = Path(edges_file).name + + cells_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) + edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) + + # save to sonata + if not Path(output).exists(): + Path(output).mkdir(parents=True, exist_ok=True) + cells = CellCollection.from_dataframe(cells_df, index_offset=0) + cells.population_name = node_pop + cells.save_sonata(Path(output) / node_file_name, cells_df) + + with h5py.File(Path(output) / edge_file_name, "w") as h5f: + convert_allen_v1.write_edges_from_dataframe(edges_df, src_pop, tgt_pop, h5f) + + @app.command() @click.argument("cells-path") @click.option("-o", "--output", help="Path to output SONATA nodes", required=True) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py new file mode 100644 index 0000000..f1f5249 --- /dev/null +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -0,0 +1,124 @@ +import h5py +import pandas as pd +import numpy as np +from itertools import chain + + +def sonata_to_dataframe(sonata_file, file_type="nodes"): + cells_df = pd.DataFrame() + with h5py.File(sonata_file, "r") as h5f: + population_names = list(h5f[f"/{file_type}"].keys()) + assert len(population_names) == 1, "Single population is supported only" + population_name = population_names[0] + + population = h5f[f"/{file_type}/{population_name}"] + assert "0" in population, 'Single group "0" is supported only' + group = population["0"] + + for key in group.keys(): + cells_df[key] = group[key][()] + type_id_key = "node_type_id" if file_type == "nodes" else "edge_type_id" + cells_df[type_id_key] = population[type_id_key][()] + res_pop = population_name + if file_type == "edges": + src_group = population["source_node_id"] + tgt_group = population["target_node_id"] + cells_df["source_node_id"] = src_group[()] + cells_df["target_node_id"] = tgt_group[()] + src_pop = src_group.attrs["node_population"] + tgt_pop = tgt_group.attrs["node_population"] + res_pop = (src_pop, tgt_pop) + return cells_df, res_pop + + +def load_allen_nodes(nodes_file, node_types_file): + node_types_df = pd.read_csv(node_types_file, sep=r"\s+") + cells_df, node_population = sonata_to_dataframe(nodes_file, file_type="nodes") + cells_df = cells_df.merge( + node_types_df[ + ["node_type_id", "model_template", "morphology", "rotation_angle_zaxis", "model_type"] + ], + on="node_type_id", + how="left", + ) + # print(cells_df.loc[cells_df["node_type_id"] >= 100000121, "rotation_angle_yaxis"]) + cells_df["model_template"] = cells_df["model_template"].str.replace( + r"^.*:([A-Za-z0-9_]+)(?:\.hoc)?$", r"hoc:\1", regex=True + ) + cells_df["morphology"] = cells_df["morphology"].str.replace(r"\.[^.]+$", "", regex=True) + cells_df["rotation_angle_zaxis"] = cells_df["rotation_angle_zaxis"].fillna(0) + cells_df["morphology"] = cells_df["morphology"].fillna("none") + + return cells_df, node_population + + +def load_allen_edges(edges_file, edge_types_file): + edge_types_df = pd.read_csv(edge_types_file, sep=r"\s+") + edges_df, pop = sonata_to_dataframe(edges_file, file_type="edges") + assert len(pop) == 2, "Should return source and target population names for edges" + edges_df = edges_df.merge( + edge_types_df[["edge_type_id", "syn_weight", "delay"]], on="edge_type_id", how="left" + ) + # print(cells_df.loc[cells_df["node_type_id"] >= 100000121, "rotation_angle_yaxis"]) + edges_df.rename(columns={"syn_weight": "conductance"}, inplace=True) + # edges_df["target_query"]=edges_df["target_query"].str.split("&pop_name==", n=1, expand=True)[1].str.strip("'") + # edges_df["source_query"]=edges_df["target_query"].str.split("&pop_name==", n=1, expand=True)[1].str.strip("'") + return edges_df, pop[0], pop[1] + + +def split_dataframe_by_attribute(df, attribute_name): + return dict(tuple(df.groupby(attribute_name))) + + +def write_edges_from_dataframe(data_df, src_pop, tgt_pop, outfile): + edge_population = f"{src_pop}__{tgt_pop}__chemical" + group = outfile.create_group(f"/edges/{edge_population}") + group_pop = group.create_group("0") + dset = group.create_dataset("source_node_id", data=data_df["source_node_id"].to_numpy()) + dset.attrs["node_population"] = src_pop + dset = group.create_dataset("target_node_id", data=data_df["target_node_id"].to_numpy()) + dset.attrs["node_population"] = tgt_pop + print(data_df[data_df["source_node_id"] == 0]["target_node_id"]) + print(data_df[data_df["target_node_id"] == 0]["source_node_id"]) + group.create_dataset("edge_type_id", data=data_df["edge_type_id"].to_numpy()) + for attribute_name in set(data_df.columns) - set( + ["source_node_id", "target_node_id", "edge_type_id"] + ): + group_pop.create_dataset(attribute_name, data=data_df[attribute_name].to_numpy()) + group_indices_src = group.create_group("indices/source_to_target") + group_indices_tgt = group.create_group("indices/target_to_source") + write_index_group(group_indices_src, data_df.groupby("source_node_id")) + write_index_group(group_indices_tgt, data_df.groupby("target_node_id")) + + +def write_index_group(group, grouped_df): + node_to_edge_ids = { + key: indices_to_ranges(list(value)) for key, value in grouped_df.groups.items() + } + range_ids = ranges_per_node(node_to_edge_ids) + edge_ids = list(chain.from_iterable(node_to_edge_ids.values())) + group.create_dataset("node_id_to_ranges", data=range_ids) + group.create_dataset("range_to_edge_id", data=edge_ids) + + +def indices_to_ranges(indices): + if not indices: + return [] + arr = np.sort(np.array(indices)) + # find where consecutive list breaks + breaks = np.where(np.diff(arr) != 1)[0] + 1 + starts = np.concatenate(([0], breaks)) + ends = np.concatenate((breaks, [len(arr)])) + # equivalent to [[arr[s], arr[e-1]+1] for s, e in zip(starts, ends)] + return np.stack((arr[starts], arr[ends - 1] + 1), axis=1) + + +def ranges_per_node(node_to_edge_ids): + res = [] + start = 0 + for ranges in node_to_edge_ids.values(): + n_ranges = len(ranges) + end = start + n_ranges + res.append([start, end]) + start = end + return res From 91e17b25311abe3a81cd832c5100ae636efd6b99 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Tue, 30 Sep 2025 11:24:22 +0200 Subject: [PATCH 02/28] expand nsyns --- brainbuilder/utils/sonata/convert_allen_v1.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index f1f5249..3dc5f97 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -63,7 +63,11 @@ def load_allen_edges(edges_file, edge_types_file): edges_df.rename(columns={"syn_weight": "conductance"}, inplace=True) # edges_df["target_query"]=edges_df["target_query"].str.split("&pop_name==", n=1, expand=True)[1].str.strip("'") # edges_df["source_query"]=edges_df["target_query"].str.split("&pop_name==", n=1, expand=True)[1].str.strip("'") - return edges_df, pop[0], pop[1] + + edges_df_expanded = edges_df.loc[edges_df.index.repeat(edges_df["nsyns"])].reset_index( + drop=True + ) + return edges_df_expanded, pop[0], pop[1] def split_dataframe_by_attribute(df, attribute_name): From af880599fe68d14a06a43dff7e2ee72cb8e541e6 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Tue, 30 Sep 2025 20:34:15 +0200 Subject: [PATCH 03/28] adjust syn weight --- brainbuilder/app/sonata.py | 2 + brainbuilder/utils/sonata/convert_allen_v1.py | 79 +++++++++++++++---- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index c23f8fc..2e12205 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -63,6 +63,7 @@ def from_allen_circuit(nodes_file, node_types_file, edges_file, edge_types_file, cells_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) + edges_df = convert_allen_v1.prepare_synapses(edges_df, cells_df) # save to sonata if not Path(output).exists(): @@ -71,6 +72,7 @@ def from_allen_circuit(nodes_file, node_types_file, edges_file, edge_types_file, cells.population_name = node_pop cells.save_sonata(Path(output) / node_file_name, cells_df) + with h5py.File(Path(output) / edge_file_name, "w") as h5f: convert_allen_v1.write_edges_from_dataframe(edges_df, src_pop, tgt_pop, h5f) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 3dc5f97..618e4f1 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -47,7 +47,11 @@ def load_allen_nodes(nodes_file, node_types_file): ) cells_df["morphology"] = cells_df["morphology"].str.replace(r"\.[^.]+$", "", regex=True) cells_df["rotation_angle_zaxis"] = cells_df["rotation_angle_zaxis"].fillna(0) - cells_df["morphology"] = cells_df["morphology"].fillna("none") + cells_df["morphology"] = cells_df["morphology"].fillna("None") + + for dummy_field in ["mtype", "etype"]: + if dummy_field not in cells_df.columns: + cells_df[dummy_field]="None" return cells_df, node_population @@ -57,21 +61,21 @@ def load_allen_edges(edges_file, edge_types_file): edges_df, pop = sonata_to_dataframe(edges_file, file_type="edges") assert len(pop) == 2, "Should return source and target population names for edges" edges_df = edges_df.merge( - edge_types_df[["edge_type_id", "syn_weight", "delay"]], on="edge_type_id", how="left" + edge_types_df[["edge_type_id", "syn_weight", "weight_function", "weight_sigma", "delay"]], on="edge_type_id", how="left" ) - # print(cells_df.loc[cells_df["node_type_id"] >= 100000121, "rotation_angle_yaxis"]) - edges_df.rename(columns={"syn_weight": "conductance"}, inplace=True) - # edges_df["target_query"]=edges_df["target_query"].str.split("&pop_name==", n=1, expand=True)[1].str.strip("'") - # edges_df["source_query"]=edges_df["target_query"].str.split("&pop_name==", n=1, expand=True)[1].str.strip("'") + # edges_df.rename(columns={"syn_weight": "conductance"}, inplace=True) + return edges_df, pop[0], pop[1] - edges_df_expanded = edges_df.loc[edges_df.index.repeat(edges_df["nsyns"])].reset_index( - drop=True - ) - return edges_df_expanded, pop[0], pop[1] +def prepare_synapses(edges_df, nodes_df): + adjust_synapse_weights(edges_df, nodes_df) + edges_df_expanded = edges_df.loc[edges_df.index.repeat(edges_df["nsyns"])].reset_index(drop=True) + return edges_df_expanded - -def split_dataframe_by_attribute(df, attribute_name): - return dict(tuple(df.groupby(attribute_name))) +def adjust_synapse_weights(edges_df, nodes_df): + src_df = nodes_df.loc[edges_df["source_node_id"], ["tuning_angle", "x","z"]].reset_index(drop=True) + tgt_df = nodes_df.loc[edges_df["target_node_id"], ["tuning_angle", "x","z"]].reset_index(drop=True) + edges_df.loc[edges_df["weight_function"]=="DirectionRule_others", "conductance"] = DirectionRule_others(edges_df, src_df, tgt_df) + edges_df.loc[edges_df["weight_function"]=="DirectionRule_EE", "conductance"] = DirectionRule_EE(edges_df, src_df, tgt_df) def write_edges_from_dataframe(data_df, src_pop, tgt_pop, outfile): @@ -82,8 +86,6 @@ def write_edges_from_dataframe(data_df, src_pop, tgt_pop, outfile): dset.attrs["node_population"] = src_pop dset = group.create_dataset("target_node_id", data=data_df["target_node_id"].to_numpy()) dset.attrs["node_population"] = tgt_pop - print(data_df[data_df["source_node_id"] == 0]["target_node_id"]) - print(data_df[data_df["target_node_id"] == 0]["source_node_id"]) group.create_dataset("edge_type_id", data=data_df["edge_type_id"].to_numpy()) for attribute_name in set(data_df.columns) - set( ["source_node_id", "target_node_id", "edge_type_id"] @@ -126,3 +128,50 @@ def ranges_per_node(node_to_edge_ids): res.append([start, end]) start = end return res + + +def DirectionRule_others(edge_props, src_node, trg_node): + """Adjust the synapse weight, copied from bmtk + """ + sigma = edge_props['weight_sigma'] + src_tuning = src_node['tuning_angle'] + tar_tuning = trg_node['tuning_angle'] + + delta_tuning_180 = abs(abs((abs(tar_tuning - src_tuning) % 360.0) - 180.0) - 180.0) + w_multiplier_180 = np.exp(-(delta_tuning_180 / sigma) ** 2) + return w_multiplier_180 * edge_props['syn_weight'] + + +def DirectionRule_EE(edge_props, src_node, trg_node): + """Adjust the synapse weight, copied from bmtk + """ + sigma = edge_props['weight_sigma'] + + src_tuning = src_node['tuning_angle'] + x_src = src_node['x'] + z_src = src_node['z'] + + tar_tuning = trg_node['tuning_angle'] + x_tar = trg_node['x'] + z_tar = trg_node['z'] + + delta_tuning_180 = abs(abs((abs(tar_tuning - src_tuning) % 360.0) - 180.0) - 180.0) + w_multiplier_180 = np.exp(-(delta_tuning_180 / sigma) ** 2) + + delta_x = (x_tar - x_src) * 0.07 + delta_z = (z_tar - z_src) * 0.04 + + theta_pref = tar_tuning * (np.pi / 180.) + xz = delta_x * np.cos(theta_pref) + delta_z * np.sin(theta_pref) + sigma_phase = 1.0 + phase_scale_ratio = np.exp(- (xz ** 2 / (2 * sigma_phase ** 2))) + + # To account for the 0.07 vs 0.04 dimensions. This ensures + # the horizontal neurons are scaled by 5.5/4 (from the midpoint + # of 4 & 7). Also, ensures the vertical is scaled by 5.5/7. This + # was a basic linear estimate to get the numbers (y = ax + b). + theta_tar_scale = abs(abs(abs(180.0 - abs(tar_tuning) % 360.0) - 90.0) - 90.0) + phase_scale_ratio = phase_scale_ratio * (5.5 / 4 - 11. / 1680 * theta_tar_scale) + + return w_multiplier_180 * phase_scale_ratio * edge_props['syn_weight'] + From 6afe81b738ab75cf522209c8cc44d468be11ed7c Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Wed, 1 Oct 2025 14:32:55 +0200 Subject: [PATCH 04/28] correct syn weights for biophysical and point neurons --- brainbuilder/utils/sonata/convert_allen_v1.py | 76 ++++++++++++------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 618e4f1..1e1a05c 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -51,7 +51,7 @@ def load_allen_nodes(nodes_file, node_types_file): for dummy_field in ["mtype", "etype"]: if dummy_field not in cells_df.columns: - cells_df[dummy_field]="None" + cells_df[dummy_field] = "None" return cells_df, node_population @@ -61,21 +61,42 @@ def load_allen_edges(edges_file, edge_types_file): edges_df, pop = sonata_to_dataframe(edges_file, file_type="edges") assert len(pop) == 2, "Should return source and target population names for edges" edges_df = edges_df.merge( - edge_types_df[["edge_type_id", "syn_weight", "weight_function", "weight_sigma", "delay"]], on="edge_type_id", how="left" + edge_types_df[["edge_type_id", "syn_weight", "weight_function", "weight_sigma", "delay"]], + on="edge_type_id", + how="left", ) # edges_df.rename(columns={"syn_weight": "conductance"}, inplace=True) return edges_df, pop[0], pop[1] + def prepare_synapses(edges_df, nodes_df): adjust_synapse_weights(edges_df, nodes_df) - edges_df_expanded = edges_df.loc[edges_df.index.repeat(edges_df["nsyns"])].reset_index(drop=True) + + biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] + point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] + # For edges targeting point cells, multiple syn_weight by nsys + mask = edges_df["target_node_id"].isin(point_gids) + edges_df.loc[mask, "conductance"] *= edges_df.loc[mask, "nsyns"] + # For edges targeting biophysical cells, expand synapses + repeat_counts = edges_df["nsyns"].where(edges_df["target_node_id"].isin(biophysical_gids), 1) + edges_df_expanded = edges_df.loc[edges_df.index.repeat(repeat_counts)].reset_index(drop=True) + return edges_df_expanded + def adjust_synapse_weights(edges_df, nodes_df): - src_df = nodes_df.loc[edges_df["source_node_id"], ["tuning_angle", "x","z"]].reset_index(drop=True) - tgt_df = nodes_df.loc[edges_df["target_node_id"], ["tuning_angle", "x","z"]].reset_index(drop=True) - edges_df.loc[edges_df["weight_function"]=="DirectionRule_others", "conductance"] = DirectionRule_others(edges_df, src_df, tgt_df) - edges_df.loc[edges_df["weight_function"]=="DirectionRule_EE", "conductance"] = DirectionRule_EE(edges_df, src_df, tgt_df) + src_df = nodes_df.loc[edges_df["source_node_id"], ["tuning_angle", "x", "z"]].reset_index( + drop=True + ) + tgt_df = nodes_df.loc[edges_df["target_node_id"], ["tuning_angle", "x", "z"]].reset_index( + drop=True + ) + edges_df.loc[edges_df["weight_function"] == "DirectionRule_others", "conductance"] = ( + DirectionRule_others(edges_df, src_df, tgt_df) + ) + edges_df.loc[edges_df["weight_function"] == "DirectionRule_EE", "conductance"] = ( + DirectionRule_EE(edges_df, src_df, tgt_df) + ) def write_edges_from_dataframe(data_df, src_pop, tgt_pop, outfile): @@ -131,47 +152,44 @@ def ranges_per_node(node_to_edge_ids): def DirectionRule_others(edge_props, src_node, trg_node): - """Adjust the synapse weight, copied from bmtk - """ - sigma = edge_props['weight_sigma'] - src_tuning = src_node['tuning_angle'] - tar_tuning = trg_node['tuning_angle'] + """Adjust the synapse weight, copied from bmtk""" + sigma = edge_props["weight_sigma"] + src_tuning = src_node["tuning_angle"] + tar_tuning = trg_node["tuning_angle"] delta_tuning_180 = abs(abs((abs(tar_tuning - src_tuning) % 360.0) - 180.0) - 180.0) - w_multiplier_180 = np.exp(-(delta_tuning_180 / sigma) ** 2) - return w_multiplier_180 * edge_props['syn_weight'] + w_multiplier_180 = np.exp(-((delta_tuning_180 / sigma) ** 2)) + return w_multiplier_180 * edge_props["syn_weight"] def DirectionRule_EE(edge_props, src_node, trg_node): - """Adjust the synapse weight, copied from bmtk - """ - sigma = edge_props['weight_sigma'] + """Adjust the synapse weight, copied from bmtk""" + sigma = edge_props["weight_sigma"] - src_tuning = src_node['tuning_angle'] - x_src = src_node['x'] - z_src = src_node['z'] + src_tuning = src_node["tuning_angle"] + x_src = src_node["x"] + z_src = src_node["z"] - tar_tuning = trg_node['tuning_angle'] - x_tar = trg_node['x'] - z_tar = trg_node['z'] + tar_tuning = trg_node["tuning_angle"] + x_tar = trg_node["x"] + z_tar = trg_node["z"] delta_tuning_180 = abs(abs((abs(tar_tuning - src_tuning) % 360.0) - 180.0) - 180.0) - w_multiplier_180 = np.exp(-(delta_tuning_180 / sigma) ** 2) + w_multiplier_180 = np.exp(-((delta_tuning_180 / sigma) ** 2)) delta_x = (x_tar - x_src) * 0.07 delta_z = (z_tar - z_src) * 0.04 - theta_pref = tar_tuning * (np.pi / 180.) + theta_pref = tar_tuning * (np.pi / 180.0) xz = delta_x * np.cos(theta_pref) + delta_z * np.sin(theta_pref) sigma_phase = 1.0 - phase_scale_ratio = np.exp(- (xz ** 2 / (2 * sigma_phase ** 2))) + phase_scale_ratio = np.exp(-(xz**2 / (2 * sigma_phase**2))) # To account for the 0.07 vs 0.04 dimensions. This ensures # the horizontal neurons are scaled by 5.5/4 (from the midpoint # of 4 & 7). Also, ensures the vertical is scaled by 5.5/7. This # was a basic linear estimate to get the numbers (y = ax + b). theta_tar_scale = abs(abs(abs(180.0 - abs(tar_tuning) % 360.0) - 90.0) - 90.0) - phase_scale_ratio = phase_scale_ratio * (5.5 / 4 - 11. / 1680 * theta_tar_scale) - - return w_multiplier_180 * phase_scale_ratio * edge_props['syn_weight'] + phase_scale_ratio = phase_scale_ratio * (5.5 / 4 - 11.0 / 1680 * theta_tar_scale) + return w_multiplier_180 * phase_scale_ratio * edge_props["syn_weight"] From 702f53bc9ce38c5ef399bed60a6eead8f47efbfb Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Wed, 1 Oct 2025 16:26:19 +0200 Subject: [PATCH 05/28] patch model_template --- brainbuilder/utils/sonata/convert_allen_v1.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 1e1a05c..bc01c97 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -36,14 +36,18 @@ def load_allen_nodes(nodes_file, node_types_file): cells_df, node_population = sonata_to_dataframe(nodes_file, file_type="nodes") cells_df = cells_df.merge( node_types_df[ - ["node_type_id", "model_template", "morphology", "rotation_angle_zaxis", "model_type"] + ["node_type_id", "dynamics_params", "morphology", "rotation_angle_zaxis", "model_type"] ], on="node_type_id", how="left", ) # print(cells_df.loc[cells_df["node_type_id"] >= 100000121, "rotation_angle_yaxis"]) + cells_df.rename(columns={"dynamics_params": "model_template"}, inplace=True) + # hoc template name can not be started with number, prefix with BP_ where necessary cells_df["model_template"] = cells_df["model_template"].str.replace( - r"^.*:([A-Za-z0-9_]+)(?:\.hoc)?$", r"hoc:\1", regex=True + r"^([0-9][A-Za-z0-9_]*)(?:\.json)?$|^([A-Za-z][A-Za-z0-9_]*)(?:\.json)?$", + lambda m: f"hoc:BP_{m.group(1)}" if m.group(1) else f"hoc:{m.group(2)}", + regex=True, ) cells_df["morphology"] = cells_df["morphology"].str.replace(r"\.[^.]+$", "", regex=True) cells_df["rotation_angle_zaxis"] = cells_df["rotation_angle_zaxis"].fillna(0) From 50163c47304e4836e7fd322df8887f06b9c08959 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Thu, 2 Oct 2025 15:26:38 +0200 Subject: [PATCH 06/28] add syn locations --- brainbuilder/app/sonata.py | 14 ++++++++++---- brainbuilder/utils/sonata/convert_allen_v1.py | 19 ++++++++++++------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 2e12205..96200c9 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -50,7 +50,10 @@ def from_mvd3(mvd3, output, model_type, mecombo_info, population): @click.option("--node-types-file", help="Path to node type file", required=True) @click.option("--edges-file", help="Path to edges file", required=True) @click.option("--edge-types-file", help="Path to edge type file", required=True) -def from_allen_circuit(nodes_file, node_types_file, edges_file, edge_types_file, output): +@click.option("--synapse-location-file", help="Path to synapse location file", required=True) +def from_allen_circuit( + nodes_file, node_types_file, edges_file, edge_types_file, synapse_location_file, output +): """Provide SONATA nodes with MorphoElectrical info""" from brainbuilder.utils.sonata import convert_allen_v1 from voxcell import CellCollection @@ -63,16 +66,19 @@ def from_allen_circuit(nodes_file, node_types_file, edges_file, edge_types_file, cells_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) - edges_df = convert_allen_v1.prepare_synapses(edges_df, cells_df) + edges_df = convert_allen_v1.prepare_synapses(edges_df, cells_df, synapse_location_file) + + # drop columns not needed for OBI simulator + cells_df.drop(["tuning_angle"], axis=1, inplace=True) + edges_df.drop(["weight_function", "weight_sigma", "syn_weight"], axis=1, inplace=True) - # save to sonata + # save to sonata h5 files if not Path(output).exists(): Path(output).mkdir(parents=True, exist_ok=True) cells = CellCollection.from_dataframe(cells_df, index_offset=0) cells.population_name = node_pop cells.save_sonata(Path(output) / node_file_name, cells_df) - with h5py.File(Path(output) / edge_file_name, "w") as h5f: convert_allen_v1.write_edges_from_dataframe(edges_df, src_pop, tgt_pop, h5f) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index bc01c97..0084618 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -41,7 +41,6 @@ def load_allen_nodes(nodes_file, node_types_file): on="node_type_id", how="left", ) - # print(cells_df.loc[cells_df["node_type_id"] >= 100000121, "rotation_angle_yaxis"]) cells_df.rename(columns={"dynamics_params": "model_template"}, inplace=True) # hoc template name can not be started with number, prefix with BP_ where necessary cells_df["model_template"] = cells_df["model_template"].str.replace( @@ -53,9 +52,7 @@ def load_allen_nodes(nodes_file, node_types_file): cells_df["rotation_angle_zaxis"] = cells_df["rotation_angle_zaxis"].fillna(0) cells_df["morphology"] = cells_df["morphology"].fillna("None") - for dummy_field in ["mtype", "etype"]: - if dummy_field not in cells_df.columns: - cells_df[dummy_field] = "None" + cells_df[["mtype", "etype"]] = "None" return cells_df, node_population @@ -73,18 +70,26 @@ def load_allen_edges(edges_file, edge_types_file): return edges_df, pop[0], pop[1] -def prepare_synapses(edges_df, nodes_df): +def prepare_synapses(edges_df, nodes_df, syn_location_file): adjust_synapse_weights(edges_df, nodes_df) biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] # For edges targeting point cells, multiple syn_weight by nsys - mask = edges_df["target_node_id"].isin(point_gids) - edges_df.loc[mask, "conductance"] *= edges_df.loc[mask, "nsyns"] + mask_point = edges_df["target_node_id"].isin(point_gids) + edges_df.loc[mask_point, "conductance"] *= edges_df.loc[mask_point, "nsyns"] # For edges targeting biophysical cells, expand synapses repeat_counts = edges_df["nsyns"].where(edges_df["target_node_id"].isin(biophysical_gids), 1) edges_df_expanded = edges_df.loc[edges_df.index.repeat(repeat_counts)].reset_index(drop=True) + # Read synapse location from --syn-location-file + syn_df = pd.read_csv(syn_location_file, sep=r"\s+") + mask = edges_df_expanded["target_node_id"].isin(biophysical_gids) + assert np.allclose(edges_df_expanded.loc[mask, "conductance"], syn_df["syn_weight"]) + edges_df_expanded[["afferent_section_id", "afferent_section_pos"]] = np.nan + edges_df_expanded.loc[mask, "afferent_section_id"] = syn_df["sec_id"] + edges_df_expanded.loc[mask, "afferent_section_pos"] = syn_df["sec_x"] + return edges_df_expanded From 54fa227e99be504afa846d85d80c485fb1e670b3 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Fri, 3 Oct 2025 11:24:23 +0200 Subject: [PATCH 07/28] refactor --- brainbuilder/app/sonata.py | 26 ++++++++--- brainbuilder/utils/sonata/convert_allen_v1.py | 44 ++++++++++++++----- 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 96200c9..20fe473 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -50,12 +50,17 @@ def from_mvd3(mvd3, output, model_type, mecombo_info, population): @click.option("--node-types-file", help="Path to node type file", required=True) @click.option("--edges-file", help="Path to edges file", required=True) @click.option("--edge-types-file", help="Path to edge type file", required=True) -@click.option("--synapse-location-file", help="Path to synapse location file", required=True) +@click.option( + "--precomputed-edges-file", + help="Path to allen's precomputed edges file, for syn weights and locations", + required=True, +) def from_allen_circuit( - nodes_file, node_types_file, edges_file, edge_types_file, synapse_location_file, output + nodes_file, node_types_file, edges_file, edge_types_file, precomputed_edges_file, output ): """Provide SONATA nodes with MorphoElectrical info""" from brainbuilder.utils.sonata import convert_allen_v1 + from brainbuilder.utils.sonata import split_population as module from voxcell import CellCollection from pathlib import Path @@ -63,10 +68,12 @@ def from_allen_circuit( node_file_name = Path(nodes_file).name edge_file_name = Path(edges_file).name + split_output = Path(output) / "split_circuit" + assert not split_output.exists(), f"Please remove {split_output} first" cells_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) - edges_df = convert_allen_v1.prepare_synapses(edges_df, cells_df, synapse_location_file) + edges_df = convert_allen_v1.prepare_synapses(edges_df, cells_df, precomputed_edges_file) # drop columns not needed for OBI simulator cells_df.drop(["tuning_angle"], axis=1, inplace=True) @@ -77,11 +84,20 @@ def from_allen_circuit( Path(output).mkdir(parents=True, exist_ok=True) cells = CellCollection.from_dataframe(cells_df, index_offset=0) cells.population_name = node_pop - cells.save_sonata(Path(output) / node_file_name, cells_df) + output_nodes = Path(output) / node_file_name + output_edges = Path(output) / edge_file_name + cells.save_sonata(output_nodes, cells_df) - with h5py.File(Path(output) / edge_file_name, "w") as h5f: + with h5py.File(output_edges, "w") as h5f: convert_allen_v1.write_edges_from_dataframe(edges_df, src_pop, tgt_pop, h5f) + # Split populations + # Create the directory + split_output.mkdir(parents=True, exist_ok=True) + module.split_population( + split_output, attribute="model_type", nodes_path=output_nodes, edges_path=output_edges + ) + @app.command() @click.argument("cells-path") diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 0084618..9317831 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -11,8 +11,8 @@ def sonata_to_dataframe(sonata_file, file_type="nodes"): assert len(population_names) == 1, "Single population is supported only" population_name = population_names[0] - population = h5f[f"/{file_type}/{population_name}"] - assert "0" in population, 'Single group "0" is supported only' + population = h5f[f"{file_type}/{population_name}"] + assert "0" in population, "group '0' doesn't exst" group = population["0"] for key in group.keys(): @@ -70,29 +70,53 @@ def load_allen_edges(edges_file, edge_types_file): return edges_df, pop[0], pop[1] -def prepare_synapses(edges_df, nodes_df, syn_location_file): +def prepare_synapses(edges_df, nodes_df, precomputed_edges_file): adjust_synapse_weights(edges_df, nodes_df) + # Read synapse location and weights from precomputed edges file + syn_0_df, syn_1_df = read_precomputed_edges_file(precomputed_edges_file) biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] + # For edges targeting point cells, multiple syn_weight by nsys mask_point = edges_df["target_node_id"].isin(point_gids) edges_df.loc[mask_point, "conductance"] *= edges_df.loc[mask_point, "nsyns"] + assert np.allclose(edges_df.loc[mask_point, "conductance"], abs(syn_1_df["syn_weight"])), ( + "point syn weight is not consistent with the precomputed file" + ) + # For edges targeting biophysical cells, expand synapses repeat_counts = edges_df["nsyns"].where(edges_df["target_node_id"].isin(biophysical_gids), 1) edges_df_expanded = edges_df.loc[edges_df.index.repeat(repeat_counts)].reset_index(drop=True) - - # Read synapse location from --syn-location-file - syn_df = pd.read_csv(syn_location_file, sep=r"\s+") - mask = edges_df_expanded["target_node_id"].isin(biophysical_gids) - assert np.allclose(edges_df_expanded.loc[mask, "conductance"], syn_df["syn_weight"]) + mask_biophysical = edges_df_expanded["target_node_id"].isin(biophysical_gids) + assert np.allclose( + edges_df_expanded.loc[mask_biophysical, "conductance"], syn_0_df["syn_weight"] + ), "biophysical syn weight is not consistent with the precomputed file" edges_df_expanded[["afferent_section_id", "afferent_section_pos"]] = np.nan - edges_df_expanded.loc[mask, "afferent_section_id"] = syn_df["sec_id"] - edges_df_expanded.loc[mask, "afferent_section_pos"] = syn_df["sec_x"] + edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_0_df["sec_id"] + edges_df_expanded.loc[mask_biophysical, "afferent_section_pos"] = syn_0_df["sec_x"] return edges_df_expanded +def read_precomputed_edges_file(precomputed_edges_file): + res = [] + with h5py.File(precomputed_edges_file, "r") as h5f: + population_names = list(h5f["/edges"].keys()) + assert len(population_names) == 1, "Single population is supported only" + population_name = population_names[0] + + population = h5f[f"/edges/{population_name}"] + for group_name in ["0", "1"]: + assert group_name in population, f"group {group_name} doesn't exst" + group = population[group_name] + syn_weight = group["syn_weight"][()] + sec_id = group["sec_id"][()] if "sec_id" in group else np.empty(len(syn_weight)) + sec_x = group["sec_x"][()] if "sec_x" in group else np.empty(len(syn_weight)) + res.append(pd.DataFrame({"syn_weight": syn_weight, "sec_id": sec_id, "sec_x": sec_x})) + return res + + def adjust_synapse_weights(edges_df, nodes_df): src_df = nodes_df.loc[edges_df["source_node_id"], ["tuning_angle", "x", "z"]].reset_index( drop=True From e59d5323cd77b41d7d9dea17e8d6cde2a1dedf2c Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Wed, 8 Oct 2025 16:41:48 +0200 Subject: [PATCH 08/28] convert projection edges --- brainbuilder/app/sonata.py | 69 ++++++++++++++++++- brainbuilder/utils/sonata/convert_allen_v1.py | 13 ++-- 2 files changed, 75 insertions(+), 7 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 20fe473..c91489a 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -77,7 +77,7 @@ def from_allen_circuit( # drop columns not needed for OBI simulator cells_df.drop(["tuning_angle"], axis=1, inplace=True) - edges_df.drop(["weight_function", "weight_sigma", "syn_weight"], axis=1, inplace=True) + edges_df.drop(["weight_function", "weight_sigma", "syn_weight", "nsyns"], axis=1, inplace=True) # save to sonata h5 files if not Path(output).exists(): @@ -98,6 +98,73 @@ def from_allen_circuit( split_output, attribute="model_type", nodes_path=output_nodes, edges_path=output_edges ) +@app.command() +@click.option("-o", "--output", help="directory to output SONATA files", required=True) +@click.option("--nodes-file", help="Path to nodes file", required=True) +@click.option("--node-types-file", help="Path to node type file", required=True) +@click.option("--edges-file", help="Path to edges file", required=True) +@click.option("--edge-types-file", help="Path to edge type file", required=True) +@click.option( + "--precomputed-edges-file", + help="Path to allen's precomputed edges file, for syn weights and locations", + required=True, +) +def from_allen_projection_edges( + nodes_file, node_types_file, edges_file, edge_types_file, precomputed_edges_file, output +): + """Provide SONATA nodes with MorphoElectrical info""" + from brainbuilder.utils.sonata import convert_allen_v1 + from brainbuilder.utils.sonata import split_population as module + from pathlib import Path + + import h5py + import numpy as np + + nodes_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) + edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) + assert edges_df["weight_function"].unique() == ["wmax"], "weight_function is not only wmax" + edges_df.rename(columns={"syn_weight": "conductance"}, inplace=True) + edges_df.drop(["weight_function", "weight_sigma"], axis=1, inplace=True) + + # Read synapse location and weights from precomputed edges file + syn_biophysical_df, syn_point_df = convert_allen_v1.load_precomputed_edges_file(precomputed_edges_file) + + # split projecting to src->biophysical, src->point_process + biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] + point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] + biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))] + point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))] + + + # For edges targeting point cells, multiple syn_weight by nsys + point_edges.loc[:, "conductance"] *= point_edges["nsyns"] + # cross check with precompuated file to make sure the weights are correct + assert np.allclose(point_edges["conductance"], abs(syn_point_df["syn_weight"])), ( + "point syn weight is not consistent with the precomputed file" + ) + + # For edges targeting biophysical cells, expand synapses + repeat_counts = biophysical_edges["nsyns"] + biophysical_edges=biophysical_edges.loc[biophysical_edges.index.repeat(repeat_counts)].reset_index(drop=True) + assert np.allclose( + biophysical_edges["conductance"], syn_biophysical_df["syn_weight"] + ), "biophysical syn weight is not consistent with the precomputed file" + biophysical_edges["afferent_section_id"] = syn_biophysical_df["sec_id"] + biophysical_edges["afferent_section_pos"] = syn_biophysical_df["sec_x"] + + # save -> biophyscial edges + if not Path(output).exists(): + Path(output).mkdir(parents=True, exist_ok=True) + edge_file_name = f"edges_{src_pop}_biophysical.h5" + output_edges = Path(output) / edge_file_name + with h5py.File(output_edges, "w") as h5f: + convert_allen_v1.write_edges_from_dataframe(biophysical_edges, src_pop, "biophysical", h5f) + + # save -> point_process edges + edge_file_name = f"edges_{src_pop}_point_process.h5" + output_edges = Path(output) / edge_file_name + with h5py.File(output_edges, "w") as h5f: + convert_allen_v1.write_edges_from_dataframe(point_edges, src_pop, "point_process", h5f) @app.command() @click.argument("cells-path") diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 9317831..b14769e 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -73,7 +73,7 @@ def load_allen_edges(edges_file, edge_types_file): def prepare_synapses(edges_df, nodes_df, precomputed_edges_file): adjust_synapse_weights(edges_df, nodes_df) # Read synapse location and weights from precomputed edges file - syn_0_df, syn_1_df = read_precomputed_edges_file(precomputed_edges_file) + syn_biophysical_df, syn_point_df = load_precomputed_edges_file(precomputed_edges_file) biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] @@ -81,7 +81,8 @@ def prepare_synapses(edges_df, nodes_df, precomputed_edges_file): # For edges targeting point cells, multiple syn_weight by nsys mask_point = edges_df["target_node_id"].isin(point_gids) edges_df.loc[mask_point, "conductance"] *= edges_df.loc[mask_point, "nsyns"] - assert np.allclose(edges_df.loc[mask_point, "conductance"], abs(syn_1_df["syn_weight"])), ( + # cross check with precompuated file to make sure the weights are correct + assert np.allclose(edges_df.loc[mask_point, "conductance"], abs(syn_point_df["syn_weight"])), ( "point syn weight is not consistent with the precomputed file" ) @@ -90,16 +91,16 @@ def prepare_synapses(edges_df, nodes_df, precomputed_edges_file): edges_df_expanded = edges_df.loc[edges_df.index.repeat(repeat_counts)].reset_index(drop=True) mask_biophysical = edges_df_expanded["target_node_id"].isin(biophysical_gids) assert np.allclose( - edges_df_expanded.loc[mask_biophysical, "conductance"], syn_0_df["syn_weight"] + edges_df_expanded.loc[mask_biophysical, "conductance"], syn_biophysical_df["syn_weight"] ), "biophysical syn weight is not consistent with the precomputed file" edges_df_expanded[["afferent_section_id", "afferent_section_pos"]] = np.nan - edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_0_df["sec_id"] - edges_df_expanded.loc[mask_biophysical, "afferent_section_pos"] = syn_0_df["sec_x"] + edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_biophysical_df["sec_id"] + edges_df_expanded.loc[mask_biophysical, "afferent_section_pos"] = syn_biophysical_df["sec_x"] return edges_df_expanded -def read_precomputed_edges_file(precomputed_edges_file): +def load_precomputed_edges_file(precomputed_edges_file): res = [] with h5py.File(precomputed_edges_file, "r") as h5f: population_names = list(h5f["/edges"].keys()) From ac7fcbfceda719cdac64160b8a2012df03efc5ab Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Wed, 8 Oct 2025 16:53:01 +0200 Subject: [PATCH 09/28] refactor --- brainbuilder/app/sonata.py | 33 ++++--------------- brainbuilder/utils/sonata/convert_allen_v1.py | 1 + 2 files changed, 7 insertions(+), 27 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index c91489a..8da9691 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -98,6 +98,7 @@ def from_allen_circuit( split_output, attribute="model_type", nodes_path=output_nodes, edges_path=output_edges ) + @app.command() @click.option("-o", "--output", help="directory to output SONATA files", required=True) @click.option("--nodes-file", help="Path to nodes file", required=True) @@ -114,20 +115,14 @@ def from_allen_projection_edges( ): """Provide SONATA nodes with MorphoElectrical info""" from brainbuilder.utils.sonata import convert_allen_v1 - from brainbuilder.utils.sonata import split_population as module from pathlib import Path import h5py - import numpy as np - - nodes_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) - edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) - assert edges_df["weight_function"].unique() == ["wmax"], "weight_function is not only wmax" - edges_df.rename(columns={"syn_weight": "conductance"}, inplace=True) - edges_df.drop(["weight_function", "weight_sigma"], axis=1, inplace=True) - # Read synapse location and weights from precomputed edges file - syn_biophysical_df, syn_point_df = convert_allen_v1.load_precomputed_edges_file(precomputed_edges_file) + nodes_df, _node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) + edges_df, src_pop, _tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) + edges_df = convert_allen_v1.prepare_synapses(edges_df, nodes_df, precomputed_edges_file) + edges_df.drop(["weight_function", "weight_sigma", "nsyns"], axis=1, inplace=True) # split projecting to src->biophysical, src->point_process biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] @@ -135,23 +130,6 @@ def from_allen_projection_edges( biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))] point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))] - - # For edges targeting point cells, multiple syn_weight by nsys - point_edges.loc[:, "conductance"] *= point_edges["nsyns"] - # cross check with precompuated file to make sure the weights are correct - assert np.allclose(point_edges["conductance"], abs(syn_point_df["syn_weight"])), ( - "point syn weight is not consistent with the precomputed file" - ) - - # For edges targeting biophysical cells, expand synapses - repeat_counts = biophysical_edges["nsyns"] - biophysical_edges=biophysical_edges.loc[biophysical_edges.index.repeat(repeat_counts)].reset_index(drop=True) - assert np.allclose( - biophysical_edges["conductance"], syn_biophysical_df["syn_weight"] - ), "biophysical syn weight is not consistent with the precomputed file" - biophysical_edges["afferent_section_id"] = syn_biophysical_df["sec_id"] - biophysical_edges["afferent_section_pos"] = syn_biophysical_df["sec_x"] - # save -> biophyscial edges if not Path(output).exists(): Path(output).mkdir(parents=True, exist_ok=True) @@ -166,6 +144,7 @@ def from_allen_projection_edges( with h5py.File(output_edges, "w") as h5f: convert_allen_v1.write_edges_from_dataframe(point_edges, src_pop, "point_process", h5f) + @app.command() @click.argument("cells-path") @click.option("-o", "--output", help="Path to output SONATA nodes", required=True) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index b14769e..b8af770 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -125,6 +125,7 @@ def adjust_synapse_weights(edges_df, nodes_df): tgt_df = nodes_df.loc[edges_df["target_node_id"], ["tuning_angle", "x", "z"]].reset_index( drop=True ) + edges_df.loc[:, "conductance"] = edges_df["syn_weight"] # default cond edges_df.loc[edges_df["weight_function"] == "DirectionRule_others", "conductance"] = ( DirectionRule_others(edges_df, src_df, tgt_df) ) From 4714476cb4c1c909f095b6432c436ffa4a19d537 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Thu, 9 Oct 2025 16:00:59 +0200 Subject: [PATCH 10/28] fix assign df row-by-row instead by index --- brainbuilder/app/sonata.py | 4 ++-- brainbuilder/utils/sonata/convert_allen_v1.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 8da9691..3d03e86 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -127,8 +127,8 @@ def from_allen_projection_edges( # split projecting to src->biophysical, src->point_process biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] - biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))] - point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))] + biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))].reset_index(drop=True) + point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))].reset_index(drop=True) # save -> biophyscial edges if not Path(output).exists(): diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index b8af770..16e19e4 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -66,7 +66,6 @@ def load_allen_edges(edges_file, edge_types_file): on="edge_type_id", how="left", ) - # edges_df.rename(columns={"syn_weight": "conductance"}, inplace=True) return edges_df, pop[0], pop[1] @@ -94,8 +93,8 @@ def prepare_synapses(edges_df, nodes_df, precomputed_edges_file): edges_df_expanded.loc[mask_biophysical, "conductance"], syn_biophysical_df["syn_weight"] ), "biophysical syn weight is not consistent with the precomputed file" edges_df_expanded[["afferent_section_id", "afferent_section_pos"]] = np.nan - edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_biophysical_df["sec_id"] - edges_df_expanded.loc[mask_biophysical, "afferent_section_pos"] = syn_biophysical_df["sec_x"] + edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_biophysical_df["sec_id"].to_numpy() # row-to-row, not by index + edges_df_expanded.loc[mask_biophysical, "afferent_section_pos"] = syn_biophysical_df["sec_x"].to_numpy() return edges_df_expanded From 7071355dcc54536efa8a8129abf95bc16e04daf3 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Thu, 9 Oct 2025 17:09:48 +0200 Subject: [PATCH 11/28] rename cells to nodes --- brainbuilder/app/sonata.py | 14 ++++++++------ brainbuilder/utils/sonata/convert_allen_v1.py | 8 ++++++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 3d03e86..109d47f 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -71,22 +71,22 @@ def from_allen_circuit( split_output = Path(output) / "split_circuit" assert not split_output.exists(), f"Please remove {split_output} first" - cells_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) + nodes_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) - edges_df = convert_allen_v1.prepare_synapses(edges_df, cells_df, precomputed_edges_file) + edges_df = convert_allen_v1.prepare_synapses(edges_df, nodes_df, precomputed_edges_file) # drop columns not needed for OBI simulator - cells_df.drop(["tuning_angle"], axis=1, inplace=True) + nodes_df.drop(["tuning_angle"], axis=1, inplace=True) edges_df.drop(["weight_function", "weight_sigma", "syn_weight", "nsyns"], axis=1, inplace=True) # save to sonata h5 files if not Path(output).exists(): Path(output).mkdir(parents=True, exist_ok=True) - cells = CellCollection.from_dataframe(cells_df, index_offset=0) + cells = CellCollection.from_dataframe(nodes_df, index_offset=0) cells.population_name = node_pop output_nodes = Path(output) / node_file_name output_edges = Path(output) / edge_file_name - cells.save_sonata(output_nodes, cells_df) + cells.save_sonata(output_nodes) with h5py.File(output_edges, "w") as h5f: convert_allen_v1.write_edges_from_dataframe(edges_df, src_pop, tgt_pop, h5f) @@ -127,7 +127,9 @@ def from_allen_projection_edges( # split projecting to src->biophysical, src->point_process biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] - biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))].reset_index(drop=True) + biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))].reset_index( + drop=True + ) point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))].reset_index(drop=True) # save -> biophyscial edges diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 16e19e4..7198c61 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -93,8 +93,12 @@ def prepare_synapses(edges_df, nodes_df, precomputed_edges_file): edges_df_expanded.loc[mask_biophysical, "conductance"], syn_biophysical_df["syn_weight"] ), "biophysical syn weight is not consistent with the precomputed file" edges_df_expanded[["afferent_section_id", "afferent_section_pos"]] = np.nan - edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_biophysical_df["sec_id"].to_numpy() # row-to-row, not by index - edges_df_expanded.loc[mask_biophysical, "afferent_section_pos"] = syn_biophysical_df["sec_x"].to_numpy() + edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_biophysical_df[ + "sec_id" + ].to_numpy() # row-to-row, not by index + edges_df_expanded.loc[mask_biophysical, "afferent_section_pos"] = syn_biophysical_df[ + "sec_x" + ].to_numpy() return edges_df_expanded From 58bfec7e5098cc0d8098354a3b1421227aa5768c Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Fri, 10 Oct 2025 12:19:59 +0200 Subject: [PATCH 12/28] correct projection, refactor index_range_group functions --- brainbuilder/app/sonata.py | 6 ++++++ brainbuilder/utils/sonata/convert_allen_v1.py | 21 ++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 109d47f..27ee477 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -130,7 +130,13 @@ def from_allen_projection_edges( biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))].reset_index( drop=True ) + biophysical_id_map = dict(zip(biophysical_gids, range(len(biophysical_gids)))) + biophysical_edges["target_node_id"] = biophysical_edges["target_node_id"].map( + biophysical_id_map + ) point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))].reset_index(drop=True) + point_id_map = dict(zip(point_gids, range(len(point_gids)))) + point_edges["target_node_id"] = point_edges["target_node_id"].map(point_id_map) # save -> biophyscial edges if not Path(output).exists(): diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 7198c61..fab35cc 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -157,16 +157,19 @@ def write_edges_from_dataframe(data_df, src_pop, tgt_pop, outfile): def write_index_group(group, grouped_df): - node_to_edge_ids = { - key: indices_to_ranges(list(value)) for key, value in grouped_df.groups.items() - } - range_ids = ranges_per_node(node_to_edge_ids) - edge_ids = list(chain.from_iterable(node_to_edge_ids.values())) + """Write the index group for sonata edges file. + grouped_df.groups = {"node_id": [list of edge indices]} + """ + edge_ids_list_per_node = [indices_to_ranges(list(idx)) for idx in grouped_df.groups.values()] + range_ids = ranges_per_node(edge_ids_list_per_node) + edge_ids = list(chain.from_iterable(edge_ids_list_per_node)) group.create_dataset("node_id_to_ranges", data=range_ids) group.create_dataset("range_to_edge_id", data=edge_ids) def indices_to_ranges(indices): + """Given a list of [indices], return list of [start, end) ranges . + e.g. [0,1,2,7,8,10,11,12] -> [[0,3], [7,9], [10,13]]""" if not indices: return [] arr = np.sort(np.array(indices)) @@ -178,10 +181,14 @@ def indices_to_ranges(indices): return np.stack((arr[starts], arr[ends - 1] + 1), axis=1) -def ranges_per_node(node_to_edge_ids): +def ranges_per_node(edge_ids_list): + """Given list of [edge_ids], return list of [start, end) ranges. + e.g. [[[0,3], [3,5], [5,8]], [[9,10]]] -> [[0,3],[3,4]] + Range 0 -> ids[0,3), Range 1 -> ids[3,4), etc.] + """ res = [] start = 0 - for ranges in node_to_edge_ids.values(): + for ranges in edge_ids_list: n_ranges = len(ranges) end = start + n_ranges res.append([start, end]) From ae354309ef20bce7dfea82d9ee98b9855976c514 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Fri, 10 Oct 2025 14:17:26 +0200 Subject: [PATCH 13/28] Revert "correct projection, refactor index_range_group functions" This reverts commit 58bfec7e5098cc0d8098354a3b1421227aa5768c. --- brainbuilder/app/sonata.py | 6 ------ brainbuilder/utils/sonata/convert_allen_v1.py | 21 +++++++------------ 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 27ee477..109d47f 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -130,13 +130,7 @@ def from_allen_projection_edges( biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))].reset_index( drop=True ) - biophysical_id_map = dict(zip(biophysical_gids, range(len(biophysical_gids)))) - biophysical_edges["target_node_id"] = biophysical_edges["target_node_id"].map( - biophysical_id_map - ) point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))].reset_index(drop=True) - point_id_map = dict(zip(point_gids, range(len(point_gids)))) - point_edges["target_node_id"] = point_edges["target_node_id"].map(point_id_map) # save -> biophyscial edges if not Path(output).exists(): diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index fab35cc..7198c61 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -157,19 +157,16 @@ def write_edges_from_dataframe(data_df, src_pop, tgt_pop, outfile): def write_index_group(group, grouped_df): - """Write the index group for sonata edges file. - grouped_df.groups = {"node_id": [list of edge indices]} - """ - edge_ids_list_per_node = [indices_to_ranges(list(idx)) for idx in grouped_df.groups.values()] - range_ids = ranges_per_node(edge_ids_list_per_node) - edge_ids = list(chain.from_iterable(edge_ids_list_per_node)) + node_to_edge_ids = { + key: indices_to_ranges(list(value)) for key, value in grouped_df.groups.items() + } + range_ids = ranges_per_node(node_to_edge_ids) + edge_ids = list(chain.from_iterable(node_to_edge_ids.values())) group.create_dataset("node_id_to_ranges", data=range_ids) group.create_dataset("range_to_edge_id", data=edge_ids) def indices_to_ranges(indices): - """Given a list of [indices], return list of [start, end) ranges . - e.g. [0,1,2,7,8,10,11,12] -> [[0,3], [7,9], [10,13]]""" if not indices: return [] arr = np.sort(np.array(indices)) @@ -181,14 +178,10 @@ def indices_to_ranges(indices): return np.stack((arr[starts], arr[ends - 1] + 1), axis=1) -def ranges_per_node(edge_ids_list): - """Given list of [edge_ids], return list of [start, end) ranges. - e.g. [[[0,3], [3,5], [5,8]], [[9,10]]] -> [[0,3],[3,4]] - Range 0 -> ids[0,3), Range 1 -> ids[3,4), etc.] - """ +def ranges_per_node(node_to_edge_ids): res = [] start = 0 - for ranges in edge_ids_list: + for ranges in node_to_edge_ids.values(): n_ranges = len(ranges) end = start + n_ranges res.append([start, end]) From a76ec9983fd9b968dc426a4d7135e0a802652c14 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Fri, 10 Oct 2025 14:28:34 +0200 Subject: [PATCH 14/28] Revert "Revert "correct projection, refactor index_range_group functions"" This reverts commit ae354309ef20bce7dfea82d9ee98b9855976c514. --- brainbuilder/app/sonata.py | 6 ++++++ brainbuilder/utils/sonata/convert_allen_v1.py | 21 ++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 109d47f..27ee477 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -130,7 +130,13 @@ def from_allen_projection_edges( biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))].reset_index( drop=True ) + biophysical_id_map = dict(zip(biophysical_gids, range(len(biophysical_gids)))) + biophysical_edges["target_node_id"] = biophysical_edges["target_node_id"].map( + biophysical_id_map + ) point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))].reset_index(drop=True) + point_id_map = dict(zip(point_gids, range(len(point_gids)))) + point_edges["target_node_id"] = point_edges["target_node_id"].map(point_id_map) # save -> biophyscial edges if not Path(output).exists(): diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 7198c61..fab35cc 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -157,16 +157,19 @@ def write_edges_from_dataframe(data_df, src_pop, tgt_pop, outfile): def write_index_group(group, grouped_df): - node_to_edge_ids = { - key: indices_to_ranges(list(value)) for key, value in grouped_df.groups.items() - } - range_ids = ranges_per_node(node_to_edge_ids) - edge_ids = list(chain.from_iterable(node_to_edge_ids.values())) + """Write the index group for sonata edges file. + grouped_df.groups = {"node_id": [list of edge indices]} + """ + edge_ids_list_per_node = [indices_to_ranges(list(idx)) for idx in grouped_df.groups.values()] + range_ids = ranges_per_node(edge_ids_list_per_node) + edge_ids = list(chain.from_iterable(edge_ids_list_per_node)) group.create_dataset("node_id_to_ranges", data=range_ids) group.create_dataset("range_to_edge_id", data=edge_ids) def indices_to_ranges(indices): + """Given a list of [indices], return list of [start, end) ranges . + e.g. [0,1,2,7,8,10,11,12] -> [[0,3], [7,9], [10,13]]""" if not indices: return [] arr = np.sort(np.array(indices)) @@ -178,10 +181,14 @@ def indices_to_ranges(indices): return np.stack((arr[starts], arr[ends - 1] + 1), axis=1) -def ranges_per_node(node_to_edge_ids): +def ranges_per_node(edge_ids_list): + """Given list of [edge_ids], return list of [start, end) ranges. + e.g. [[[0,3], [3,5], [5,8]], [[9,10]]] -> [[0,3],[3,4]] + Range 0 -> ids[0,3), Range 1 -> ids[3,4), etc.] + """ res = [] start = 0 - for ranges in node_to_edge_ids.values(): + for ranges in edge_ids_list: n_ranges = len(ranges) end = start + n_ranges res.append([start, end]) From 3dd6251f16be3ad1b2896ddd650f411f817076d1 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Fri, 10 Oct 2025 15:54:30 +0200 Subject: [PATCH 15/28] correct write index group --- brainbuilder/app/sonata.py | 41 +++++++++++++++---- brainbuilder/utils/sonata/convert_allen_v1.py | 29 +++++++------ 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 27ee477..b9bcb4f 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -88,8 +88,11 @@ def from_allen_circuit( output_edges = Path(output) / edge_file_name cells.save_sonata(output_nodes) + n_source_nodes = n_target_nodes = len(nodes_df) with h5py.File(output_edges, "w") as h5f: - convert_allen_v1.write_edges_from_dataframe(edges_df, src_pop, tgt_pop, h5f) + convert_allen_v1.write_edges_from_dataframe( + edges_df, src_pop, tgt_pop, n_source_nodes, n_target_nodes, h5f + ) # Split populations # Create the directory @@ -101,8 +104,9 @@ def from_allen_circuit( @app.command() @click.option("-o", "--output", help="directory to output SONATA files", required=True) -@click.option("--nodes-file", help="Path to nodes file", required=True) -@click.option("--node-types-file", help="Path to node type file", required=True) +@click.option("--n-source-nodes", help="number of virtual source nodes", type=int, required=True) +@click.option("--target-nodes-file", help="Path to the target nodes file", required=True) +@click.option("--target-node-types-file", help="Path to the target node type file", required=True) @click.option("--edges-file", help="Path to edges file", required=True) @click.option("--edge-types-file", help="Path to edge type file", required=True) @click.option( @@ -111,7 +115,13 @@ def from_allen_circuit( required=True, ) def from_allen_projection_edges( - nodes_file, node_types_file, edges_file, edge_types_file, precomputed_edges_file, output + n_source_nodes, + target_nodes_file, + target_node_types_file, + edges_file, + edge_types_file, + precomputed_edges_file, + output, ): """Provide SONATA nodes with MorphoElectrical info""" from brainbuilder.utils.sonata import convert_allen_v1 @@ -119,7 +129,9 @@ def from_allen_projection_edges( import h5py - nodes_df, _node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) + nodes_df, _node_pop = convert_allen_v1.load_allen_nodes( + target_nodes_file, target_node_types_file + ) edges_df, src_pop, _tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) edges_df = convert_allen_v1.prepare_synapses(edges_df, nodes_df, precomputed_edges_file) edges_df.drop(["weight_function", "weight_sigma", "nsyns"], axis=1, inplace=True) @@ -138,19 +150,32 @@ def from_allen_projection_edges( point_id_map = dict(zip(point_gids, range(len(point_gids)))) point_edges["target_node_id"] = point_edges["target_node_id"].map(point_id_map) - # save -> biophyscial edges if not Path(output).exists(): Path(output).mkdir(parents=True, exist_ok=True) + + # save -> all edges + edge_file_name = Path(edges_file).name + output_edges = Path(output) / edge_file_name + with h5py.File(output_edges, "w") as h5f: + convert_allen_v1.write_edges_from_dataframe( + edges_df, src_pop, "v1", n_source_nodes, len(nodes_df), h5f + ) + + # save -> biophyscial edges edge_file_name = f"edges_{src_pop}_biophysical.h5" output_edges = Path(output) / edge_file_name with h5py.File(output_edges, "w") as h5f: - convert_allen_v1.write_edges_from_dataframe(biophysical_edges, src_pop, "biophysical", h5f) + convert_allen_v1.write_edges_from_dataframe( + biophysical_edges, src_pop, "biophysical", n_source_nodes, len(biophysical_gids), h5f + ) # save -> point_process edges edge_file_name = f"edges_{src_pop}_point_process.h5" output_edges = Path(output) / edge_file_name with h5py.File(output_edges, "w") as h5f: - convert_allen_v1.write_edges_from_dataframe(point_edges, src_pop, "point_process", h5f) + convert_allen_v1.write_edges_from_dataframe( + point_edges, src_pop, "point_process", 17400, len(point_gids), h5f + ) @app.command() diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index fab35cc..48fab32 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -137,7 +137,7 @@ def adjust_synapse_weights(edges_df, nodes_df): ) -def write_edges_from_dataframe(data_df, src_pop, tgt_pop, outfile): +def write_edges_from_dataframe(data_df, src_pop, tgt_pop, n_source_nodes, n_target_nodes, outfile): edge_population = f"{src_pop}__{tgt_pop}__chemical" group = outfile.create_group(f"/edges/{edge_population}") group_pop = group.create_group("0") @@ -152,17 +152,19 @@ def write_edges_from_dataframe(data_df, src_pop, tgt_pop, outfile): group_pop.create_dataset(attribute_name, data=data_df[attribute_name].to_numpy()) group_indices_src = group.create_group("indices/source_to_target") group_indices_tgt = group.create_group("indices/target_to_source") - write_index_group(group_indices_src, data_df.groupby("source_node_id")) - write_index_group(group_indices_tgt, data_df.groupby("target_node_id")) + write_index_group(group_indices_src, data_df.groupby("source_node_id"), n_source_nodes) + write_index_group(group_indices_tgt, data_df.groupby("target_node_id"), n_target_nodes) -def write_index_group(group, grouped_df): - """Write the index group for sonata edges file. +def write_index_group(group, grouped_df, n_nodes): + """Write the index group for nodes ids: [0, n_nodes-1] grouped_df.groups = {"node_id": [list of edge indices]} """ - edge_ids_list_per_node = [indices_to_ranges(list(idx)) for idx in grouped_df.groups.values()] - range_ids = ranges_per_node(edge_ids_list_per_node) - edge_ids = list(chain.from_iterable(edge_ids_list_per_node)) + node_to_edge_ids = dict.fromkeys(list(range(n_nodes)), []) + for key, value in grouped_df.groups.items(): + node_to_edge_ids[key] = indices_to_ranges(list(value)) + range_ids = ranges_per_node(node_to_edge_ids) + edge_ids = list(chain.from_iterable(node_to_edge_ids.values())) group.create_dataset("node_id_to_ranges", data=range_ids) group.create_dataset("range_to_edge_id", data=edge_ids) @@ -171,7 +173,7 @@ def indices_to_ranges(indices): """Given a list of [indices], return list of [start, end) ranges . e.g. [0,1,2,7,8,10,11,12] -> [[0,3], [7,9], [10,13]]""" if not indices: - return [] + return [0, 0] arr = np.sort(np.array(indices)) # find where consecutive list breaks breaks = np.where(np.diff(arr) != 1)[0] + 1 @@ -181,17 +183,20 @@ def indices_to_ranges(indices): return np.stack((arr[starts], arr[ends - 1] + 1), axis=1) -def ranges_per_node(edge_ids_list): +def ranges_per_node(node_to_edge_ids): """Given list of [edge_ids], return list of [start, end) ranges. e.g. [[[0,3], [3,5], [5,8]], [[9,10]]] -> [[0,3],[3,4]] Range 0 -> ids[0,3), Range 1 -> ids[3,4), etc.] """ res = [] start = 0 - for ranges in edge_ids_list: + for ranges in node_to_edge_ids.values(): n_ranges = len(ranges) end = start + n_ranges - res.append([start, end]) + if n_ranges == 0: + res.append([0, 0]) + else: + res.append([start, end]) start = end return res From bd8e263111794d01739c37faf0ec29ba8a26b746 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Fri, 10 Oct 2025 22:05:25 +0200 Subject: [PATCH 16/28] add synapse parameters tau1 tau2 erev for exp2syn --- brainbuilder/app/sonata.py | 31 +++++++++++--- brainbuilder/utils/sonata/convert_allen_v1.py | 40 +++++++++++++++++-- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index b9bcb4f..61fdce7 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -45,18 +45,25 @@ def from_mvd3(mvd3, output, model_type, mecombo_info, population): @app.command() -@click.option("-o", "--output", help="directory to output SONATA files", required=True) +@click.option("-o", "--output", help="Directory to output SONATA files", required=True) @click.option("--nodes-file", help="Path to nodes file", required=True) @click.option("--node-types-file", help="Path to node type file", required=True) @click.option("--edges-file", help="Path to edges file", required=True) @click.option("--edge-types-file", help="Path to edge type file", required=True) +@click.option("--syn-parameter-dir", help="Directory to synapse parameters files", required=True) @click.option( "--precomputed-edges-file", help="Path to allen's precomputed edges file, for syn weights and locations", required=True, ) def from_allen_circuit( - nodes_file, node_types_file, edges_file, edge_types_file, precomputed_edges_file, output + nodes_file, + node_types_file, + edges_file, + edge_types_file, + precomputed_edges_file, + syn_parameter_dir, + output, ): """Provide SONATA nodes with MorphoElectrical info""" from brainbuilder.utils.sonata import convert_allen_v1 @@ -73,11 +80,17 @@ def from_allen_circuit( nodes_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) - edges_df = convert_allen_v1.prepare_synapses(edges_df, nodes_df, precomputed_edges_file) + edges_df = convert_allen_v1.prepare_synapses( + edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir + ) # drop columns not needed for OBI simulator nodes_df.drop(["tuning_angle"], axis=1, inplace=True) - edges_df.drop(["weight_function", "weight_sigma", "syn_weight", "nsyns"], axis=1, inplace=True) + edges_df.drop( + ["weight_function", "weight_sigma", "syn_weight", "nsyns", "dynamics_params"], + axis=1, + inplace=True, + ) # save to sonata h5 files if not Path(output).exists(): @@ -114,6 +127,7 @@ def from_allen_circuit( help="Path to allen's precomputed edges file, for syn weights and locations", required=True, ) +@click.option("--syn-parameter-dir", help="Directory to synapse parameters files", required=True) def from_allen_projection_edges( n_source_nodes, target_nodes_file, @@ -121,6 +135,7 @@ def from_allen_projection_edges( edges_file, edge_types_file, precomputed_edges_file, + syn_parameter_dir, output, ): """Provide SONATA nodes with MorphoElectrical info""" @@ -133,8 +148,12 @@ def from_allen_projection_edges( target_nodes_file, target_node_types_file ) edges_df, src_pop, _tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) - edges_df = convert_allen_v1.prepare_synapses(edges_df, nodes_df, precomputed_edges_file) - edges_df.drop(["weight_function", "weight_sigma", "nsyns"], axis=1, inplace=True) + edges_df = convert_allen_v1.prepare_synapses( + edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir + ) + edges_df.drop( + ["weight_function", "weight_sigma", "nsyns", "dynamics_params"], axis=1, inplace=True + ) # split projecting to src->biophysical, src->point_process biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 48fab32..effe80b 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -2,6 +2,8 @@ import pandas as pd import numpy as np from itertools import chain +from brainbuilder import utils +from pathlib import Path def sonata_to_dataframe(sonata_file, file_type="nodes"): @@ -62,15 +64,32 @@ def load_allen_edges(edges_file, edge_types_file): edges_df, pop = sonata_to_dataframe(edges_file, file_type="edges") assert len(pop) == 2, "Should return source and target population names for edges" edges_df = edges_df.merge( - edge_types_df[["edge_type_id", "syn_weight", "weight_function", "weight_sigma", "delay"]], + edge_types_df[ + [ + "edge_type_id", + "syn_weight", + "weight_function", + "weight_sigma", + "delay", + "dynamics_params", + ] + ], on="edge_type_id", how="left", ) return edges_df, pop[0], pop[1] -def prepare_synapses(edges_df, nodes_df, precomputed_edges_file): +def prepare_synapses(edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir): adjust_synapse_weights(edges_df, nodes_df) + edges_df = add_synapse_parameters(edges_df, syn_parameter_dir) + edges_df_expanded = add_precomputed_synapse_locations( + edges_df, nodes_df, precomputed_edges_file + ) + return edges_df_expanded + + +def add_precomputed_synapse_locations(edges_df, nodes_df, precomputed_edges_file): # Read synapse location and weights from precomputed edges file syn_biophysical_df, syn_point_df = load_precomputed_edges_file(precomputed_edges_file) @@ -85,7 +104,7 @@ def prepare_synapses(edges_df, nodes_df, precomputed_edges_file): "point syn weight is not consistent with the precomputed file" ) - # For edges targeting biophysical cells, expand synapses + # For edges targeting biophysical cells, expand synapses, apply precomputed sec_id and seg_x repeat_counts = edges_df["nsyns"].where(edges_df["target_node_id"].isin(biophysical_gids), 1) edges_df_expanded = edges_df.loc[edges_df.index.repeat(repeat_counts)].reset_index(drop=True) mask_biophysical = edges_df_expanded["target_node_id"].isin(biophysical_gids) @@ -103,6 +122,21 @@ def prepare_synapses(edges_df, nodes_df, precomputed_edges_file): return edges_df_expanded +def add_synapse_parameters(edges_df, sym_parameter_dir): + # We rename tau1, tau2 and erev with the BBP synapse parameter names in the output file, so that we can run with our simulator directly + syn_params_map = {"dynamics_params": [], "facilitation_time": [], "decay_time": [], "u_syn": []} + for json_file in edges_df["dynamics_params"].unique(): + params = utils.load_json(Path(sym_parameter_dir) / json_file) + if params["level_of_detail"] == "exp2syn": + syn_params_map["dynamics_params"].append(json_file) + syn_params_map["facilitation_time"].append(params["tau1"]) + syn_params_map["decay_time"].append(params["tau2"]) + syn_params_map["u_syn"].append(params["erev"]) + # create a dataframe from syn_params_map + syn_params_df = pd.DataFrame(syn_params_map) + return edges_df.merge(syn_params_df, on="dynamics_params", how="left") + + def load_precomputed_edges_file(precomputed_edges_file): res = [] with h5py.File(precomputed_edges_file, "r") as h5f: From b6f9fe8a1c09ad720da1210d3bb2704f97c9a62d Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Mon, 20 Oct 2025 17:56:52 +0200 Subject: [PATCH 17/28] add dummy values for edge parameters --- brainbuilder/utils/sonata/convert_allen_v1.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index effe80b..e3d01b2 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -54,6 +54,7 @@ def load_allen_nodes(nodes_file, node_types_file): cells_df["rotation_angle_zaxis"] = cells_df["rotation_angle_zaxis"].fillna(0) cells_df["morphology"] = cells_df["morphology"].fillna("None") + # add dummy attributes cells_df[["mtype", "etype"]] = "None" return cells_df, node_population @@ -83,6 +84,9 @@ def load_allen_edges(edges_file, edge_types_file): def prepare_synapses(edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir): adjust_synapse_weights(edges_df, nodes_df) edges_df = add_synapse_parameters(edges_df, syn_parameter_dir) + edges_df[["depression_time"]] = -1 + edges_df[["n_rrp_vesicles"]] = -1 + edges_df[["syn_type_id"]] = -1 edges_df_expanded = add_precomputed_synapse_locations( edges_df, nodes_df, precomputed_edges_file ) From 77daa5c9126c5bdc997002fb135f0a243dab343a Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Thu, 23 Oct 2025 13:13:09 +0200 Subject: [PATCH 18/28] refactor --- brainbuilder/utils/sonata/convert_allen_v1.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index e3d01b2..6d1a0f7 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -55,7 +55,7 @@ def load_allen_nodes(nodes_file, node_types_file): cells_df["morphology"] = cells_df["morphology"].fillna("None") # add dummy attributes - cells_df[["mtype", "etype"]] = "None" + add_dummy_values(cells_df, ["mtype", "etype"], "None") return cells_df, node_population @@ -84,15 +84,19 @@ def load_allen_edges(edges_file, edge_types_file): def prepare_synapses(edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir): adjust_synapse_weights(edges_df, nodes_df) edges_df = add_synapse_parameters(edges_df, syn_parameter_dir) - edges_df[["depression_time"]] = -1 - edges_df[["n_rrp_vesicles"]] = -1 - edges_df[["syn_type_id"]] = -1 + add_dummy_values(edges_df, ["depression_time", "n_rrp_vesicles", "syn_type_id"], -1) edges_df_expanded = add_precomputed_synapse_locations( edges_df, nodes_df, precomputed_edges_file ) return edges_df_expanded +def add_dummy_values(df, attribute_names, default_value): + for attribute_name in attribute_names: + if attribute_name not in df.columns: + df[attribute_name] = default_value + + def add_precomputed_synapse_locations(edges_df, nodes_df, precomputed_edges_file): # Read synapse location and weights from precomputed edges file syn_biophysical_df, syn_point_df = load_precomputed_edges_file(precomputed_edges_file) From 48eff6120b2f44592c53a9dc0293bdba721cdb84 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Mon, 27 Oct 2025 16:55:43 +0100 Subject: [PATCH 19/28] fix default value --- brainbuilder/utils/sonata/convert_allen_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 6d1a0f7..5818141 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -119,7 +119,7 @@ def add_precomputed_synapse_locations(edges_df, nodes_df, precomputed_edges_file assert np.allclose( edges_df_expanded.loc[mask_biophysical, "conductance"], syn_biophysical_df["syn_weight"] ), "biophysical syn weight is not consistent with the precomputed file" - edges_df_expanded[["afferent_section_id", "afferent_section_pos"]] = np.nan + edges_df_expanded[["afferent_section_id", "afferent_section_pos"]] = -1 edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_biophysical_df[ "sec_id" ].to_numpy() # row-to-row, not by index From 362848ae141c8f1b1074b152c19fae1c1c3e95a7 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Tue, 28 Oct 2025 14:25:41 +0100 Subject: [PATCH 20/28] resolve warning --- brainbuilder/utils/sonata/convert_allen_v1.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 5818141..7488a3f 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -119,7 +119,8 @@ def add_precomputed_synapse_locations(edges_df, nodes_df, precomputed_edges_file assert np.allclose( edges_df_expanded.loc[mask_biophysical, "conductance"], syn_biophysical_df["syn_weight"] ), "biophysical syn weight is not consistent with the precomputed file" - edges_df_expanded[["afferent_section_id", "afferent_section_pos"]] = -1 + edges_df_expanded["afferent_section_id"] = -1 + edges_df_expanded["afferent_section_pos"] = -1. edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_biophysical_df[ "sec_id" ].to_numpy() # row-to-row, not by index From 3998d4f19db61bf2cde0795e663d1394a8ac0993 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Fri, 31 Oct 2025 13:39:32 +0100 Subject: [PATCH 21/28] add syn_class and location in nodes.h5 --- brainbuilder/utils/sonata/convert_allen_v1.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 7488a3f..7759770 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -38,12 +38,23 @@ def load_allen_nodes(nodes_file, node_types_file): cells_df, node_population = sonata_to_dataframe(nodes_file, file_type="nodes") cells_df = cells_df.merge( node_types_df[ - ["node_type_id", "dynamics_params", "morphology", "rotation_angle_zaxis", "model_type"] + [ + "node_type_id", + "dynamics_params", + "morphology", + "rotation_angle_zaxis", + "model_type", + "ei", + "location", + ] ], on="node_type_id", how="left", ) - cells_df.rename(columns={"dynamics_params": "model_template"}, inplace=True) + cells_df.rename( + columns={"dynamics_params": "model_template", "ei": "synapse_class", "location": "layer"}, + inplace=True, + ) # hoc template name can not be started with number, prefix with BP_ where necessary cells_df["model_template"] = cells_df["model_template"].str.replace( r"^([0-9][A-Za-z0-9_]*)(?:\.json)?$|^([A-Za-z][A-Za-z0-9_]*)(?:\.json)?$", @@ -53,6 +64,7 @@ def load_allen_nodes(nodes_file, node_types_file): cells_df["morphology"] = cells_df["morphology"].str.replace(r"\.[^.]+$", "", regex=True) cells_df["rotation_angle_zaxis"] = cells_df["rotation_angle_zaxis"].fillna(0) cells_df["morphology"] = cells_df["morphology"].fillna("None") + cells_df["synapse_class"] = cells_df["synapse_class"].map({"e": "EXC", "i": "INH"}) # add dummy attributes add_dummy_values(cells_df, ["mtype", "etype"], "None") @@ -120,7 +132,7 @@ def add_precomputed_synapse_locations(edges_df, nodes_df, precomputed_edges_file edges_df_expanded.loc[mask_biophysical, "conductance"], syn_biophysical_df["syn_weight"] ), "biophysical syn weight is not consistent with the precomputed file" edges_df_expanded["afferent_section_id"] = -1 - edges_df_expanded["afferent_section_pos"] = -1. + edges_df_expanded["afferent_section_pos"] = -1.0 edges_df_expanded.loc[mask_biophysical, "afferent_section_id"] = syn_biophysical_df[ "sec_id" ].to_numpy() # row-to-row, not by index From 7246b566f5507d4e04167f8f7ff733ea682f30f4 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Fri, 7 Nov 2025 14:21:11 +0100 Subject: [PATCH 22/28] add script to compute synapse locations --- brainbuilder/app/sonata.py | 45 ++++++- brainbuilder/utils/sonata/convert_allen_v1.py | 114 ++++++++++++++++++ 2 files changed, 153 insertions(+), 6 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 61fdce7..835bc62 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -56,7 +56,7 @@ def from_mvd3(mvd3, output, model_type, mecombo_info, population): help="Path to allen's precomputed edges file, for syn weights and locations", required=True, ) -def from_allen_circuit( +def convert_allen_circuit( nodes_file, node_types_file, edges_file, @@ -65,7 +65,6 @@ def from_allen_circuit( syn_parameter_dir, output, ): - """Provide SONATA nodes with MorphoElectrical info""" from brainbuilder.utils.sonata import convert_allen_v1 from brainbuilder.utils.sonata import split_population as module from voxcell import CellCollection @@ -87,7 +86,15 @@ def from_allen_circuit( # drop columns not needed for OBI simulator nodes_df.drop(["tuning_angle"], axis=1, inplace=True) edges_df.drop( - ["weight_function", "weight_sigma", "syn_weight", "nsyns", "dynamics_params"], + [ + "weight_function", + "weight_sigma", + "syn_weight", + "nsyns", + "dynamics_params", + "distance_range", + "target_sections", + ], axis=1, inplace=True, ) @@ -128,7 +135,7 @@ def from_allen_circuit( required=True, ) @click.option("--syn-parameter-dir", help="Directory to synapse parameters files", required=True) -def from_allen_projection_edges( +def convert_allen_projection_edges( n_source_nodes, target_nodes_file, target_node_types_file, @@ -138,7 +145,6 @@ def from_allen_projection_edges( syn_parameter_dir, output, ): - """Provide SONATA nodes with MorphoElectrical info""" from brainbuilder.utils.sonata import convert_allen_v1 from pathlib import Path @@ -152,7 +158,16 @@ def from_allen_projection_edges( edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir ) edges_df.drop( - ["weight_function", "weight_sigma", "nsyns", "dynamics_params"], axis=1, inplace=True + [ + "weight_function", + "weight_sigma", + "nsyns", + "dynamics_params", + "distance_range", + "target_sections", + ], + axis=1, + inplace=True, ) # split projecting to src->biophysical, src->point_process @@ -197,6 +212,24 @@ def from_allen_projection_edges( ) +@app.command() +@click.option("-o", "--output-dir", help="Directory to output SONATA files", required=True) +@click.option("--nodes-file", help="Path to the target nodes file", required=True) +@click.option("--node-types-file", help="Path to the target node type file", required=True) +@click.option("--edges-file", help="Path to edges file", required=True) +@click.option("--edge-types-file", help="Path to edge type file", required=True) +@click.option("--morphology-dir", help="Directory to morphology file", required=True) +def precompute_allen_synapse_locations( + output_dir, nodes_file, node_types_file, edges_file, edge_types_file, morphology_dir +): + """Precompute synapse locations for Allen V1 circuit""" + from brainbuilder.utils.sonata import convert_allen_v1 + + convert_allen_v1.compute_synapse_loctations( + nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir + ) + + @app.command() @click.argument("cells-path") @click.option("-o", "--output", help="Path to output SONATA nodes", required=True) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 7759770..2eee0ba 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -85,6 +85,8 @@ def load_allen_edges(edges_file, edge_types_file): "weight_sigma", "delay", "dynamics_params", + "distance_range", + "target_sections", ] ], on="edge_type_id", @@ -298,3 +300,115 @@ def DirectionRule_EE(edge_props, src_node, trg_node): phase_scale_ratio = phase_scale_ratio * (5.5 / 4 - 11.0 / 1680 * theta_tar_scale) return w_multiplier_180 * phase_scale_ratio * edge_props["syn_weight"] + + +def compute_synapse_loctations( + nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir +): + nodes_df, _node_pop = load_allen_nodes(nodes_file, node_types_file) + edges_df, src_pop, tgt_pop = load_allen_edges(edges_file, edge_types_file) + adjust_synapse_weights(edges_df, nodes_df) + biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] + biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))] + point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] + point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))] + point_edges.loc[:, "syn_weight"] *= point_edges["nsyns"] + sec_ids, seg_xs = find_section_locations(biophysical_edges, nodes_df, morphology_dir) + repeat_counts = biophysical_edges["nsyns"] + biophysical_edges = biophysical_edges.loc[ + biophysical_edges.index.repeat(repeat_counts) + ].reset_index(drop=True) + biophysical_edges["sec_id"] = sec_ids + biophysical_edges["sec_x"] = seg_xs + + if not Path(output_dir).exists(): + Path(output_dir).mkdir() + output_prefix = f"{src_pop}_{tgt_pop}" + biophysical_edges.to_csv( + Path(output_dir) / f"{output_prefix}_biophysical_edges_with_syn_locations.csv", + index=False, + columns=[ + "source_node_id", + "target_node_id", + "edge_type_id", + "nsyns", + "sec_id", + "sec_x", + "syn_weight", + ], + ) + output_file = Path(output_dir) / f"{output_prefix}_syn_locations.h5" + print(f"write output file {output_file}") + with h5py.File(output_file, "w") as h5f: + edge_population = f"{src_pop}__{tgt_pop}__chemical" + group = h5f.create_group(f"/edges/{edge_population}") + group_pop = group.create_group("0") + group_pop.create_dataset("sec_id", data=biophysical_edges["sec_id"].to_numpy()) + group_pop.create_dataset("sec_x", data=biophysical_edges["sec_x"].to_numpy()) + group_pop.create_dataset("syn_weight", data=biophysical_edges["syn_weight"].to_numpy()) + group_pop = group.create_group("1") + group_pop.create_dataset("syn_weight", data=point_edges["syn_weight"].to_numpy()) + + +def find_section_locations(edges_df, nodes_df, morph_dir): + from tqdm import tqdm + + all_sec_ids = [] + all_seg_xs = [] + for edge_row in tqdm(edges_df.itertuples(index=True, name="Edge"), total=len(edges_df)): + gid = edge_row.target_node_id + morpho_file = Path(morph_dir) / (nodes_df.iloc[gid]["morphology"] + ".swc") + assert morpho_file.exists(), f"Morphology file {morpho_file} does not exist" + # if morpho_file != check_file: + # continue + distance_range = edge_row.distance_range + nsyns = edge_row.nsyns + target_sections = edge_row.target_sections + if isinstance(distance_range, str): + distance_range = distance_range.strip("[]") + distance_range = [float(x) for x in distance_range.split(",")] + if isinstance(target_sections, str): + target_sections = target_sections.strip("[]") + target_sections = [ + x.replace('"', "").replace(" ", "") for x in target_sections.split(",") + ] + sec_ids, seg_xs = choose_synapse_locations( + nsyns, distance_range, target_sections, str(morpho_file) + ) + all_sec_ids.append(sec_ids) + all_seg_xs.append(seg_xs) + return np.concatenate(all_sec_ids), np.concatenate(all_seg_xs) + + +morphology_cache = {} + + +def choose_synapse_locations(nsyns, distance_range, target_sections, morph_file, rng_seed=None): + from bmtk.builder.bionet.swc_reader import SWCReader + + cache_key = (morph_file, tuple(target_sections), tuple(distance_range)) + if cache_key in morphology_cache: + tar_seg_ix, tar_seg_prob, morph_reader = morphology_cache[cache_key] + else: + morph_reader = SWCReader(morph_file, rng_seed) + morph_reader.set_segment_dl(20) + # morph_reader.fix_axon() // no apply replace axons to preserve the original indices, align with OBI + tar_seg_ix, tar_seg_prob = morph_reader.find_sections( + section_names=target_sections, distance_range=distance_range + ) + morphology_cache[cache_key] = (tar_seg_ix, tar_seg_prob, morph_reader) + + # print(f"tar_seg_ix={tar_seg_ix} tar_seg_prob={tar_seg_prob}") + secs_ix = morph_reader._prng.choice(tar_seg_ix, nsyns, p=tar_seg_prob) + sec_ids = morph_reader.seg_props.sec_id[secs_ix] + seg_xs = morph_reader.seg_props.x[secs_ix] + assert max(sec_ids) < morph_reader.n_sections, ( + f"sec_id {max(sec_ids)} exceeds number of sections {SWCReader.n_sections} in morphology {morph_file}" + ) + # sec_ids, seg_xs = morph_reader.choose_sections( + # section_names=target_sections, + # distance_range=distance_range, + # n_sections=nsyns + # ) + + return sec_ids, seg_xs From 28775f57877cd2dfb6e32b3766ef066972e225ac Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Wed, 12 Nov 2025 16:08:10 +0100 Subject: [PATCH 23/28] add more to deal with bmtk test circuits --- brainbuilder/app/sonata.py | 15 ++-- brainbuilder/utils/sonata/convert_allen_v1.py | 68 +++++++++++++------ 2 files changed, 57 insertions(+), 26 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 835bc62..d6f4320 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -54,7 +54,8 @@ def from_mvd3(mvd3, output, model_type, mecombo_info, population): @click.option( "--precomputed-edges-file", help="Path to allen's precomputed edges file, for syn weights and locations", - required=True, + default=None, + show_default=True, ) def convert_allen_circuit( nodes_file, @@ -84,12 +85,11 @@ def convert_allen_circuit( ) # drop columns not needed for OBI simulator - nodes_df.drop(["tuning_angle"], axis=1, inplace=True) + nodes_df.drop(["tuning_angle"], axis=1, inplace=True, errors="ignore") edges_df.drop( [ "weight_function", "weight_sigma", - "syn_weight", "nsyns", "dynamics_params", "distance_range", @@ -97,6 +97,7 @@ def convert_allen_circuit( ], axis=1, inplace=True, + errors="ignore", ) # save to sonata h5 files @@ -132,7 +133,8 @@ def convert_allen_circuit( @click.option( "--precomputed-edges-file", help="Path to allen's precomputed edges file, for syn weights and locations", - required=True, + default=None, + show_default=True, ) @click.option("--syn-parameter-dir", help="Directory to synapse parameters files", required=True) def convert_allen_projection_edges( @@ -168,6 +170,7 @@ def convert_allen_projection_edges( ], axis=1, inplace=True, + errors="ignore", ) # split projecting to src->biophysical, src->point_process @@ -208,7 +211,7 @@ def convert_allen_projection_edges( output_edges = Path(output) / edge_file_name with h5py.File(output_edges, "w") as h5f: convert_allen_v1.write_edges_from_dataframe( - point_edges, src_pop, "point_process", 17400, len(point_gids), h5f + point_edges, src_pop, "point_process", n_source_nodes, len(point_gids), h5f ) @@ -225,7 +228,7 @@ def precompute_allen_synapse_locations( """Precompute synapse locations for Allen V1 circuit""" from brainbuilder.utils.sonata import convert_allen_v1 - convert_allen_v1.compute_synapse_loctations( + convert_allen_v1.compute_synapse_locations( nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir ) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 2eee0ba..8006852 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -1,13 +1,14 @@ import h5py import pandas as pd import numpy as np +import re from itertools import chain from brainbuilder import utils from pathlib import Path +from collections import defaultdict def sonata_to_dataframe(sonata_file, file_type="nodes"): - cells_df = pd.DataFrame() with h5py.File(sonata_file, "r") as h5f: population_names = list(h5f[f"/{file_type}"].keys()) assert len(population_names) == 1, "Single population is supported only" @@ -15,10 +16,18 @@ def sonata_to_dataframe(sonata_file, file_type="nodes"): population = h5f[f"{file_type}/{population_name}"] assert "0" in population, "group '0' doesn't exst" - group = population["0"] - for key in group.keys(): - cells_df[key] = group[key][()] + data = defaultdict(list) + for group_name in population.keys(): + # loop through groups /0, /1 ... in allen's sonata files + if not group_name.isdigit(): + continue + group = population[group_name] + for key in group.keys(): + data[key].extend(group[key][()]) + + cells_df = pd.DataFrame(data) + # cells_df = pd.DataFrame.from_dict(data, orient='index').transpose() type_id_key = "node_type_id" if file_type == "nodes" else "edge_type_id" cells_df[type_id_key] = population[type_id_key][()] res_pop = population_name @@ -96,13 +105,14 @@ def load_allen_edges(edges_file, edge_types_file): def prepare_synapses(edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir): - adjust_synapse_weights(edges_df, nodes_df) edges_df = add_synapse_parameters(edges_df, syn_parameter_dir) add_dummy_values(edges_df, ["depression_time", "n_rrp_vesicles", "syn_type_id"], -1) - edges_df_expanded = add_precomputed_synapse_locations( - edges_df, nodes_df, precomputed_edges_file - ) - return edges_df_expanded + if "weight_function" in edges_df.columns and "weight_sigma" in edges_df.columns: + adjust_synapse_weights(edges_df, nodes_df) + if precomputed_edges_file: + edges_df = add_precomputed_synapse_locations(edges_df, nodes_df, precomputed_edges_file) + edges_df.rename(columns={"syn_weight": "conductance"}, inplace=True) + return edges_df def add_dummy_values(df, attribute_names, default_value): @@ -120,9 +130,9 @@ def add_precomputed_synapse_locations(edges_df, nodes_df, precomputed_edges_file # For edges targeting point cells, multiple syn_weight by nsys mask_point = edges_df["target_node_id"].isin(point_gids) - edges_df.loc[mask_point, "conductance"] *= edges_df.loc[mask_point, "nsyns"] + edges_df.loc[mask_point, "syn_weight"] *= edges_df.loc[mask_point, "nsyns"] # cross check with precompuated file to make sure the weights are correct - assert np.allclose(edges_df.loc[mask_point, "conductance"], abs(syn_point_df["syn_weight"])), ( + assert np.allclose(edges_df.loc[mask_point, "syn_weight"], abs(syn_point_df["syn_weight"])), ( "point syn weight is not consistent with the precomputed file" ) @@ -131,7 +141,7 @@ def add_precomputed_synapse_locations(edges_df, nodes_df, precomputed_edges_file edges_df_expanded = edges_df.loc[edges_df.index.repeat(repeat_counts)].reset_index(drop=True) mask_biophysical = edges_df_expanded["target_node_id"].isin(biophysical_gids) assert np.allclose( - edges_df_expanded.loc[mask_biophysical, "conductance"], syn_biophysical_df["syn_weight"] + edges_df_expanded.loc[mask_biophysical, "syn_weight"], syn_biophysical_df["syn_weight"] ), "biophysical syn weight is not consistent with the precomputed file" edges_df_expanded["afferent_section_id"] = -1 edges_df_expanded["afferent_section_pos"] = -1.0 @@ -185,13 +195,15 @@ def adjust_synapse_weights(edges_df, nodes_df): tgt_df = nodes_df.loc[edges_df["target_node_id"], ["tuning_angle", "x", "z"]].reset_index( drop=True ) - edges_df.loc[:, "conductance"] = edges_df["syn_weight"] # default cond - edges_df.loc[edges_df["weight_function"] == "DirectionRule_others", "conductance"] = ( + edges_df.loc[edges_df["weight_function"] == "DirectionRule_others", "syn_weight"] = ( DirectionRule_others(edges_df, src_df, tgt_df) ) - edges_df.loc[edges_df["weight_function"] == "DirectionRule_EE", "conductance"] = ( + edges_df.loc[edges_df["weight_function"] == "DirectionRule_EE", "syn_weight"] = ( DirectionRule_EE(edges_df, src_df, tgt_df) ) + edges_df.loc[edges_df["weight_function"] == "gaussianLL", "syn_weight"] = gaussianLL( + edges_df, src_df, tgt_df + ) def write_edges_from_dataframe(data_df, src_pop, tgt_pop, n_source_nodes, n_target_nodes, outfile): @@ -302,7 +314,25 @@ def DirectionRule_EE(edge_props, src_node, trg_node): return w_multiplier_180 * phase_scale_ratio * edge_props["syn_weight"] -def compute_synapse_loctations( +def gaussianLL(edge_props, src_props, trg_props): + src_tuning = src_props["tuning_angle"] + tar_tuning = trg_props["tuning_angle"] + + mask = src_tuning.isna() + src_tuning.loc[mask] = np.random.uniform(0.0, 360.0) + mask = tar_tuning.isna() + tar_tuning.loc[mask] = np.random.uniform(0.0, 360.0) + + w0 = edge_props["syn_weight"] + sigma = edge_props["weight_sigma"] + + delta_tuning = abs(abs(abs(180.0 - abs(tar_tuning - src_tuning) % 360.0) - 90.0) - 90.0) + weight = w0 * np.exp(-((delta_tuning / sigma) ** 2)) + + return weight + + +def compute_synapse_locations( nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir ): nodes_df, _node_pop = load_allen_nodes(nodes_file, node_types_file) @@ -369,9 +399,7 @@ def find_section_locations(edges_df, nodes_df, morph_dir): distance_range = [float(x) for x in distance_range.split(",")] if isinstance(target_sections, str): target_sections = target_sections.strip("[]") - target_sections = [ - x.replace('"', "").replace(" ", "") for x in target_sections.split(",") - ] + target_sections = [re.sub(r"[\"'\s]", "", x) for x in target_sections.split(",")] sec_ids, seg_xs = choose_synapse_locations( nsyns, distance_range, target_sections, str(morpho_file) ) @@ -392,7 +420,7 @@ def choose_synapse_locations(nsyns, distance_range, target_sections, morph_file, else: morph_reader = SWCReader(morph_file, rng_seed) morph_reader.set_segment_dl(20) - # morph_reader.fix_axon() // no apply replace axons to preserve the original indices, align with OBI + # morph_reader.fix_axon() // NO replace axons to preserve the original indices, align with OBI tar_seg_ix, tar_seg_prob = morph_reader.find_sections( section_names=target_sections, distance_range=distance_range ) From 767dcc385ac6a34328cc10ceb8365217be3bdca1 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Thu, 13 Nov 2025 15:44:48 +0100 Subject: [PATCH 24/28] small reformat --- brainbuilder/app/sonata.py | 380 +++++++++++++++++++------------------ 1 file changed, 191 insertions(+), 189 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index d6f4320..8d4fba4 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -44,195 +44,6 @@ def from_mvd3(mvd3, output, model_type, mecombo_info, population): convert.provide_me_info(mvd3, output, model_type, mecombo_info, population) -@app.command() -@click.option("-o", "--output", help="Directory to output SONATA files", required=True) -@click.option("--nodes-file", help="Path to nodes file", required=True) -@click.option("--node-types-file", help="Path to node type file", required=True) -@click.option("--edges-file", help="Path to edges file", required=True) -@click.option("--edge-types-file", help="Path to edge type file", required=True) -@click.option("--syn-parameter-dir", help="Directory to synapse parameters files", required=True) -@click.option( - "--precomputed-edges-file", - help="Path to allen's precomputed edges file, for syn weights and locations", - default=None, - show_default=True, -) -def convert_allen_circuit( - nodes_file, - node_types_file, - edges_file, - edge_types_file, - precomputed_edges_file, - syn_parameter_dir, - output, -): - from brainbuilder.utils.sonata import convert_allen_v1 - from brainbuilder.utils.sonata import split_population as module - from voxcell import CellCollection - from pathlib import Path - - import h5py - - node_file_name = Path(nodes_file).name - edge_file_name = Path(edges_file).name - split_output = Path(output) / "split_circuit" - assert not split_output.exists(), f"Please remove {split_output} first" - - nodes_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) - edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) - edges_df = convert_allen_v1.prepare_synapses( - edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir - ) - - # drop columns not needed for OBI simulator - nodes_df.drop(["tuning_angle"], axis=1, inplace=True, errors="ignore") - edges_df.drop( - [ - "weight_function", - "weight_sigma", - "nsyns", - "dynamics_params", - "distance_range", - "target_sections", - ], - axis=1, - inplace=True, - errors="ignore", - ) - - # save to sonata h5 files - if not Path(output).exists(): - Path(output).mkdir(parents=True, exist_ok=True) - cells = CellCollection.from_dataframe(nodes_df, index_offset=0) - cells.population_name = node_pop - output_nodes = Path(output) / node_file_name - output_edges = Path(output) / edge_file_name - cells.save_sonata(output_nodes) - - n_source_nodes = n_target_nodes = len(nodes_df) - with h5py.File(output_edges, "w") as h5f: - convert_allen_v1.write_edges_from_dataframe( - edges_df, src_pop, tgt_pop, n_source_nodes, n_target_nodes, h5f - ) - - # Split populations - # Create the directory - split_output.mkdir(parents=True, exist_ok=True) - module.split_population( - split_output, attribute="model_type", nodes_path=output_nodes, edges_path=output_edges - ) - - -@app.command() -@click.option("-o", "--output", help="directory to output SONATA files", required=True) -@click.option("--n-source-nodes", help="number of virtual source nodes", type=int, required=True) -@click.option("--target-nodes-file", help="Path to the target nodes file", required=True) -@click.option("--target-node-types-file", help="Path to the target node type file", required=True) -@click.option("--edges-file", help="Path to edges file", required=True) -@click.option("--edge-types-file", help="Path to edge type file", required=True) -@click.option( - "--precomputed-edges-file", - help="Path to allen's precomputed edges file, for syn weights and locations", - default=None, - show_default=True, -) -@click.option("--syn-parameter-dir", help="Directory to synapse parameters files", required=True) -def convert_allen_projection_edges( - n_source_nodes, - target_nodes_file, - target_node_types_file, - edges_file, - edge_types_file, - precomputed_edges_file, - syn_parameter_dir, - output, -): - from brainbuilder.utils.sonata import convert_allen_v1 - from pathlib import Path - - import h5py - - nodes_df, _node_pop = convert_allen_v1.load_allen_nodes( - target_nodes_file, target_node_types_file - ) - edges_df, src_pop, _tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) - edges_df = convert_allen_v1.prepare_synapses( - edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir - ) - edges_df.drop( - [ - "weight_function", - "weight_sigma", - "nsyns", - "dynamics_params", - "distance_range", - "target_sections", - ], - axis=1, - inplace=True, - errors="ignore", - ) - - # split projecting to src->biophysical, src->point_process - biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] - point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] - biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))].reset_index( - drop=True - ) - biophysical_id_map = dict(zip(biophysical_gids, range(len(biophysical_gids)))) - biophysical_edges["target_node_id"] = biophysical_edges["target_node_id"].map( - biophysical_id_map - ) - point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))].reset_index(drop=True) - point_id_map = dict(zip(point_gids, range(len(point_gids)))) - point_edges["target_node_id"] = point_edges["target_node_id"].map(point_id_map) - - if not Path(output).exists(): - Path(output).mkdir(parents=True, exist_ok=True) - - # save -> all edges - edge_file_name = Path(edges_file).name - output_edges = Path(output) / edge_file_name - with h5py.File(output_edges, "w") as h5f: - convert_allen_v1.write_edges_from_dataframe( - edges_df, src_pop, "v1", n_source_nodes, len(nodes_df), h5f - ) - - # save -> biophyscial edges - edge_file_name = f"edges_{src_pop}_biophysical.h5" - output_edges = Path(output) / edge_file_name - with h5py.File(output_edges, "w") as h5f: - convert_allen_v1.write_edges_from_dataframe( - biophysical_edges, src_pop, "biophysical", n_source_nodes, len(biophysical_gids), h5f - ) - - # save -> point_process edges - edge_file_name = f"edges_{src_pop}_point_process.h5" - output_edges = Path(output) / edge_file_name - with h5py.File(output_edges, "w") as h5f: - convert_allen_v1.write_edges_from_dataframe( - point_edges, src_pop, "point_process", n_source_nodes, len(point_gids), h5f - ) - - -@app.command() -@click.option("-o", "--output-dir", help="Directory to output SONATA files", required=True) -@click.option("--nodes-file", help="Path to the target nodes file", required=True) -@click.option("--node-types-file", help="Path to the target node type file", required=True) -@click.option("--edges-file", help="Path to edges file", required=True) -@click.option("--edge-types-file", help="Path to edge type file", required=True) -@click.option("--morphology-dir", help="Directory to morphology file", required=True) -def precompute_allen_synapse_locations( - output_dir, nodes_file, node_types_file, edges_file, edge_types_file, morphology_dir -): - """Precompute synapse locations for Allen V1 circuit""" - from brainbuilder.utils.sonata import convert_allen_v1 - - convert_allen_v1.compute_synapse_locations( - nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir - ) - - @app.command() @click.argument("cells-path") @click.option("-o", "--output", help="Path to output SONATA nodes", required=True) @@ -551,3 +362,194 @@ def clip_morphologies(output, circuit, population_name): from brainbuilder.utils.sonata import clip clip.morphologies(output, circuit, population_name) + + +@app.command() +@click.option("-o", "--output", help="Directory of output SONATA files", required=True) +@click.option("--nodes-file", help="Path to nodes file", required=True) +@click.option("--node-types-file", help="Path to node type file", required=True) +@click.option("--edges-file", help="Path to edges file", required=True) +@click.option("--edge-types-file", help="Path to edge type file", required=True) +@click.option("--syn-parameter-dir", help="Directory of synapse parameters files", required=True) +@click.option( + "--precomputed-edges-file", + help="Path to allen's precomputed edges file, for syn weights and locations", + default=None, + show_default=True, +) +def convert_allen_circuit( + nodes_file, + node_types_file, + edges_file, + edge_types_file, + precomputed_edges_file, + syn_parameter_dir, + output, +): + """Convert nodes and inner connectivity edges file for the Allen V1 circuit""" + from brainbuilder.utils.sonata import convert_allen_v1 + from brainbuilder.utils.sonata import split_population as module + from voxcell import CellCollection + from pathlib import Path + + import h5py + + node_file_name = Path(nodes_file).name + edge_file_name = Path(edges_file).name + split_output = Path(output) / "split_circuit" + assert not split_output.exists(), f"Please remove {split_output} first" + + nodes_df, node_pop = convert_allen_v1.load_allen_nodes(nodes_file, node_types_file) + edges_df, src_pop, tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) + edges_df = convert_allen_v1.prepare_synapses( + edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir + ) + + # drop columns not needed for OBI simulator + nodes_df.drop(["tuning_angle"], axis=1, inplace=True, errors="ignore") + edges_df.drop( + [ + "weight_function", + "weight_sigma", + "nsyns", + "dynamics_params", + "distance_range", + "target_sections", + ], + axis=1, + inplace=True, + errors="ignore", + ) + + # save to sonata h5 files + if not Path(output).exists(): + Path(output).mkdir(parents=True, exist_ok=True) + cells = CellCollection.from_dataframe(nodes_df, index_offset=0) + cells.population_name = node_pop + output_nodes = Path(output) / node_file_name + output_edges = Path(output) / edge_file_name + cells.save_sonata(output_nodes) + + n_source_nodes = n_target_nodes = len(nodes_df) + with h5py.File(output_edges, "w") as h5f: + convert_allen_v1.write_edges_from_dataframe( + edges_df, src_pop, tgt_pop, n_source_nodes, n_target_nodes, h5f + ) + + # Split populations + # Create the directory + split_output.mkdir(parents=True, exist_ok=True) + module.split_population( + split_output, attribute="model_type", nodes_path=output_nodes, edges_path=output_edges + ) + + +@app.command() +@click.option("-o", "--output", help="directory of output SONATA files", required=True) +@click.option("--n-source-nodes", help="number of virtual source nodes", type=int, required=True) +@click.option("--target-nodes-file", help="Path to the target nodes file", required=True) +@click.option("--target-node-types-file", help="Path to the target node type file", required=True) +@click.option("--edges-file", help="Path to edges file", required=True) +@click.option("--edge-types-file", help="Path to edge type file", required=True) +@click.option( + "--precomputed-edges-file", + help="Path to allen's precomputed edges file, for syn weights and locations", + default=None, + show_default=True, +) +@click.option("--syn-parameter-dir", help="Directory of synapse parameters files", required=True) +def convert_allen_projection_edges( + n_source_nodes, + target_nodes_file, + target_node_types_file, + edges_file, + edge_types_file, + precomputed_edges_file, + syn_parameter_dir, + output, +): + """Convert projection edges file for the Allen V1 circuit""" + from brainbuilder.utils.sonata import convert_allen_v1 + from pathlib import Path + + import h5py + + nodes_df, _node_pop = convert_allen_v1.load_allen_nodes( + target_nodes_file, target_node_types_file + ) + edges_df, src_pop, _tgt_pop = convert_allen_v1.load_allen_edges(edges_file, edge_types_file) + edges_df = convert_allen_v1.prepare_synapses( + edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir + ) + edges_df.drop( + [ + "weight_function", + "weight_sigma", + "nsyns", + "dynamics_params", + "distance_range", + "target_sections", + ], + axis=1, + inplace=True, + errors="ignore", + ) + + # split projecting to src->biophysical, src->point_process + biophysical_gids = nodes_df.index[nodes_df["model_type"] == "biophysical"] + point_gids = nodes_df.index[nodes_df["model_type"] == "point_process"] + biophysical_edges = edges_df[(edges_df["target_node_id"].isin(biophysical_gids))].reset_index( + drop=True + ) + biophysical_id_map = dict(zip(biophysical_gids, range(len(biophysical_gids)))) + biophysical_edges["target_node_id"] = biophysical_edges["target_node_id"].map( + biophysical_id_map + ) + point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))].reset_index(drop=True) + point_id_map = dict(zip(point_gids, range(len(point_gids)))) + point_edges["target_node_id"] = point_edges["target_node_id"].map(point_id_map) + + if not Path(output).exists(): + Path(output).mkdir(parents=True, exist_ok=True) + + # save -> all edges + edge_file_name = Path(edges_file).name + output_edges = Path(output) / edge_file_name + with h5py.File(output_edges, "w") as h5f: + convert_allen_v1.write_edges_from_dataframe( + edges_df, src_pop, "v1", n_source_nodes, len(nodes_df), h5f + ) + + # save -> biophyscial edges + edge_file_name = f"edges_{src_pop}_biophysical.h5" + output_edges = Path(output) / edge_file_name + with h5py.File(output_edges, "w") as h5f: + convert_allen_v1.write_edges_from_dataframe( + biophysical_edges, src_pop, "biophysical", n_source_nodes, len(biophysical_gids), h5f + ) + + # save -> point_process edges + edge_file_name = f"edges_{src_pop}_point_process.h5" + output_edges = Path(output) / edge_file_name + with h5py.File(output_edges, "w") as h5f: + convert_allen_v1.write_edges_from_dataframe( + point_edges, src_pop, "point_process", n_source_nodes, len(point_gids), h5f + ) + + +@app.command() +@click.option("-o", "--output-dir", help="Directory of output SONATA files", required=True) +@click.option("--nodes-file", help="Path to the target nodes file", required=True) +@click.option("--node-types-file", help="Path to the target node type file", required=True) +@click.option("--edges-file", help="Path to edges file", required=True) +@click.option("--edge-types-file", help="Path to edge type file", required=True) +@click.option("--morphology-dir", help="Directory of morphology file", required=True) +def precompute_allen_synapse_locations( + output_dir, nodes_file, node_types_file, edges_file, edge_types_file, morphology_dir +): + """Precompute synapse locations for Allen V1 circuit""" + from brainbuilder.utils.sonata import convert_allen_v1 + + convert_allen_v1.compute_synapse_locations( + nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir + ) From 906306a9a78d021b8954a4823dd4fe7f270ca168 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Thu, 11 Dec 2025 16:20:24 +0100 Subject: [PATCH 25/28] add threshold currect from user's file --- brainbuilder/app/sonata.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 8d4fba4..a28d0cc 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -553,3 +553,38 @@ def precompute_allen_synapse_locations( convert_allen_v1.compute_synapse_locations( nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir ) + + +@app.command() +@click.option("-o", "--output", help="Directory of output SONATA files", required=True) +@click.option("--nodes-file", help="Path to nodes file", required=True) +@click.option("--attributes-file", help="Path to the csv of additional attribute , for syn weights and locations", + default=None, + show_default=True, +) +def add_nodes_attributes( + nodes_file, + attributes_file, + output, +): + from voxcell import CellCollection + import pandas as pd + cells = CellCollection.load(nodes_file) + nodes_df = cells.as_dataframe(index_offset=0) + attributes_df = pd.read_csv(attributes_file, sep=r",").sort_values(by="node_id") + print(nodes_df) + print(attributes_df) + for name in ["threshold_current", "holding_current"]: + nodes_df[name] = attributes_df[name].to_numpy() # row-to-row, not by index + + nodes_df.rename( + columns={"threshold_current": "@dynamics:threshold_current", + "holding_current": "@dynamics:holding_current"}, + inplace=True, + ) + print(nodes_df) + updated_cells = CellCollection.from_dataframe(nodes_df, index_offset=0) + updated_cells.population_name = cells.population_name + Path(output).mkdir(parents=True, exist_ok=True) + filename=Path(nodes_file).name + updated_cells.save(f"{output}/{filename}") From e7eb4002dcaa53cf339bd403a4557c6d5d2a9a0a Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Thu, 18 Dec 2025 15:36:56 +0100 Subject: [PATCH 26/28] compute synapse location as bmtk, using gid as the seed --- brainbuilder/app/sonata.py | 17 +++++++++++------ brainbuilder/utils/sonata/convert_allen_v1.py | 14 +++++++++++--- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index a28d0cc..215cd49 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -541,18 +541,23 @@ def convert_allen_projection_edges( @click.option("-o", "--output-dir", help="Directory of output SONATA files", required=True) @click.option("--nodes-file", help="Path to the target nodes file", required=True) @click.option("--node-types-file", help="Path to the target node type file", required=True) -@click.option("--edges-file", help="Path to edges file", required=True) -@click.option("--edge-types-file", help="Path to edge type file", required=True) @click.option("--morphology-dir", help="Directory of morphology file", required=True) def precompute_allen_synapse_locations( - output_dir, nodes_file, node_types_file, edges_file, edge_types_file, morphology_dir + output_dir, nodes_file, node_types_file, morphology_dir ): """Precompute synapse locations for Allen V1 circuit""" from brainbuilder.utils.sonata import convert_allen_v1 - convert_allen_v1.compute_synapse_locations( - nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir - ) + # nodes_file = "network/v1_nodes.h5" + # node_types_file = "network/v1_node_types.csv" + # morphology_dir = "/Users/weji/workdir/JIRA/allen_v1/bmtk/examples/bio_components/morphologies" + edges = ["network/v1_v1_edges.h5", "network/lgn_v1_edges.h5", "network/bkg_v1_edges.h5"] + edges_types = ["network/v1_v1_edge_types.csv", "network/lgn_v1_edge_types.csv", "network/bkg_v1_edge_types.csv"] + for edges_file, edge_types_file in zip(edges, edges_types): + print(f"Compute synapse locations for edges: {edges_file}") + convert_allen_v1.compute_synapse_locations( + nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir + ) @app.command() diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 8006852..37ef5f1 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -27,6 +27,7 @@ def sonata_to_dataframe(sonata_file, file_type="nodes"): data[key].extend(group[key][()]) cells_df = pd.DataFrame(data) + # # Create DataFrame with NaN for missing values, but very slow # cells_df = pd.DataFrame.from_dict(data, orient='index').transpose() type_id_key = "node_type_id" if file_type == "nodes" else "edge_type_id" cells_df[type_id_key] = population[type_id_key][()] @@ -401,14 +402,15 @@ def find_section_locations(edges_df, nodes_df, morph_dir): target_sections = target_sections.strip("[]") target_sections = [re.sub(r"[\"'\s]", "", x) for x in target_sections.split(",")] sec_ids, seg_xs = choose_synapse_locations( - nsyns, distance_range, target_sections, str(morpho_file) + nsyns, distance_range, target_sections, str(morpho_file), rng_seed=gid ) all_sec_ids.append(sec_ids) all_seg_xs.append(seg_xs) return np.concatenate(all_sec_ids), np.concatenate(all_seg_xs) -morphology_cache = {} +morphology_cache = {} # cache for the same morphology file and target range +prng_cache = {} # one rng per gid with the seed = gid, as bmtk does def choose_synapse_locations(nsyns, distance_range, target_sections, morph_file, rng_seed=None): @@ -427,7 +429,13 @@ def choose_synapse_locations(nsyns, distance_range, target_sections, morph_file, morphology_cache[cache_key] = (tar_seg_ix, tar_seg_prob, morph_reader) # print(f"tar_seg_ix={tar_seg_ix} tar_seg_prob={tar_seg_prob}") - secs_ix = morph_reader._prng.choice(tar_seg_ix, nsyns, p=tar_seg_prob) + if rng_seed in prng_cache: + prng = prng_cache[rng_seed] + else: + prng = np.random.RandomState(rng_seed) + prng_cache[rng_seed] = prng + + secs_ix = prng.choice(tar_seg_ix, nsyns, p=tar_seg_prob) sec_ids = morph_reader.seg_props.sec_id[secs_ix] seg_xs = morph_reader.seg_props.x[secs_ix] assert max(sec_ids) < morph_reader.n_sections, ( From 1a56ea2ebb67ae4f103dbe0144620b0bf933b902 Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Thu, 18 Dec 2025 16:47:26 +0100 Subject: [PATCH 27/28] refactor --- brainbuilder/app/sonata.py | 30 +++++++++++-------- brainbuilder/utils/sonata/convert_allen_v1.py | 18 +++++------ 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 215cd49..9030ca7 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -541,20 +541,21 @@ def convert_allen_projection_edges( @click.option("-o", "--output-dir", help="Directory of output SONATA files", required=True) @click.option("--nodes-file", help="Path to the target nodes file", required=True) @click.option("--node-types-file", help="Path to the target node type file", required=True) +@click.option( + "--edges-files", + nargs=2, + multiple=True, + help="Path to v1, lgn, bkg edges and the corresponding edges type files", +) @click.option("--morphology-dir", help="Directory of morphology file", required=True) def precompute_allen_synapse_locations( - output_dir, nodes_file, node_types_file, morphology_dir + output_dir, nodes_file, node_types_file, morphology_dir, edges_files ): """Precompute synapse locations for Allen V1 circuit""" from brainbuilder.utils.sonata import convert_allen_v1 - # nodes_file = "network/v1_nodes.h5" - # node_types_file = "network/v1_node_types.csv" - # morphology_dir = "/Users/weji/workdir/JIRA/allen_v1/bmtk/examples/bio_components/morphologies" - edges = ["network/v1_v1_edges.h5", "network/lgn_v1_edges.h5", "network/bkg_v1_edges.h5"] - edges_types = ["network/v1_v1_edge_types.csv", "network/lgn_v1_edge_types.csv", "network/bkg_v1_edge_types.csv"] - for edges_file, edge_types_file in zip(edges, edges_types): - print(f"Compute synapse locations for edges: {edges_file}") + for edges_file, edge_types_file in edges_files: + print(f"Compute synapse locations for edges: {edges_file} {edge_types_file}") convert_allen_v1.compute_synapse_locations( nodes_file, node_types_file, edges_file, edge_types_file, output_dir, morphology_dir ) @@ -563,7 +564,9 @@ def precompute_allen_synapse_locations( @app.command() @click.option("-o", "--output", help="Directory of output SONATA files", required=True) @click.option("--nodes-file", help="Path to nodes file", required=True) -@click.option("--attributes-file", help="Path to the csv of additional attribute , for syn weights and locations", +@click.option( + "--attributes-file", + help="Path to the csv of additional attribute , for syn weights and locations", default=None, show_default=True, ) @@ -574,6 +577,7 @@ def add_nodes_attributes( ): from voxcell import CellCollection import pandas as pd + cells = CellCollection.load(nodes_file) nodes_df = cells.as_dataframe(index_offset=0) attributes_df = pd.read_csv(attributes_file, sep=r",").sort_values(by="node_id") @@ -583,13 +587,15 @@ def add_nodes_attributes( nodes_df[name] = attributes_df[name].to_numpy() # row-to-row, not by index nodes_df.rename( - columns={"threshold_current": "@dynamics:threshold_current", - "holding_current": "@dynamics:holding_current"}, + columns={ + "threshold_current": "@dynamics:threshold_current", + "holding_current": "@dynamics:holding_current", + }, inplace=True, ) print(nodes_df) updated_cells = CellCollection.from_dataframe(nodes_df, index_offset=0) updated_cells.population_name = cells.population_name Path(output).mkdir(parents=True, exist_ok=True) - filename=Path(nodes_file).name + filename = Path(nodes_file).name updated_cells.save(f"{output}/{filename}") diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index 37ef5f1..b57cf6d 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -409,24 +409,24 @@ def find_section_locations(edges_df, nodes_df, morph_dir): return np.concatenate(all_sec_ids), np.concatenate(all_seg_xs) -morphology_cache = {} # cache for the same morphology file and target range -prng_cache = {} # one rng per gid with the seed = gid, as bmtk does +morphology_cache = {} # cache for the same morphology file and target range +prng_cache = {} # one rng per gid with the seed = gid, as bmtk does def choose_synapse_locations(nsyns, distance_range, target_sections, morph_file, rng_seed=None): from bmtk.builder.bionet.swc_reader import SWCReader - cache_key = (morph_file, tuple(target_sections), tuple(distance_range)) + cache_key = morph_file if cache_key in morphology_cache: - tar_seg_ix, tar_seg_prob, morph_reader = morphology_cache[cache_key] + morph_reader = morphology_cache[cache_key] else: morph_reader = SWCReader(morph_file, rng_seed) morph_reader.set_segment_dl(20) + morphology_cache[cache_key] = morph_reader # morph_reader.fix_axon() // NO replace axons to preserve the original indices, align with OBI - tar_seg_ix, tar_seg_prob = morph_reader.find_sections( - section_names=target_sections, distance_range=distance_range - ) - morphology_cache[cache_key] = (tar_seg_ix, tar_seg_prob, morph_reader) + tar_seg_ix, tar_seg_prob = morph_reader.find_sections( + section_names=target_sections, distance_range=distance_range + ) # print(f"tar_seg_ix={tar_seg_ix} tar_seg_prob={tar_seg_prob}") if rng_seed in prng_cache: @@ -434,7 +434,7 @@ def choose_synapse_locations(nsyns, distance_range, target_sections, morph_file, else: prng = np.random.RandomState(rng_seed) prng_cache[rng_seed] = prng - + secs_ix = prng.choice(tar_seg_ix, nsyns, p=tar_seg_prob) sec_ids = morph_reader.seg_props.sec_id[secs_ix] seg_xs = morph_reader.seg_props.x[secs_ix] From ddf8df3956ec449f584dc8dc4925a2a8727dc6dc Mon Sep 17 00:00:00 2001 From: Weina Ji Date: Tue, 6 Jan 2026 14:06:46 +0100 Subject: [PATCH 28/28] rename to tau1, tau2 and erev --- brainbuilder/app/sonata.py | 12 +++++++++++- brainbuilder/utils/sonata/convert_allen_v1.py | 11 +++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/brainbuilder/app/sonata.py b/brainbuilder/app/sonata.py index 9030ca7..1a0671f 100644 --- a/brainbuilder/app/sonata.py +++ b/brainbuilder/app/sonata.py @@ -508,6 +508,16 @@ def convert_allen_projection_edges( point_edges = edges_df[(edges_df["target_node_id"].isin(point_gids))].reset_index(drop=True) point_id_map = dict(zip(point_gids, range(len(point_gids)))) point_edges["target_node_id"] = point_edges["target_node_id"].map(point_id_map) + point_edges.drop( + [ + "tau1", + "tau2", + "erev" + ], + axis=1, + inplace=True, + errors="ignore", + ) if not Path(output).exists(): Path(output).mkdir(parents=True, exist_ok=True) @@ -566,7 +576,7 @@ def precompute_allen_synapse_locations( @click.option("--nodes-file", help="Path to nodes file", required=True) @click.option( "--attributes-file", - help="Path to the csv of additional attribute , for syn weights and locations", + help="Path to the csv of additional attribute, for threshold_current and holding current ", default=None, show_default=True, ) diff --git a/brainbuilder/utils/sonata/convert_allen_v1.py b/brainbuilder/utils/sonata/convert_allen_v1.py index b57cf6d..c966a8c 100644 --- a/brainbuilder/utils/sonata/convert_allen_v1.py +++ b/brainbuilder/utils/sonata/convert_allen_v1.py @@ -107,7 +107,7 @@ def load_allen_edges(edges_file, edge_types_file): def prepare_synapses(edges_df, nodes_df, precomputed_edges_file, syn_parameter_dir): edges_df = add_synapse_parameters(edges_df, syn_parameter_dir) - add_dummy_values(edges_df, ["depression_time", "n_rrp_vesicles", "syn_type_id"], -1) + # add_dummy_values(edges_df, ["depression_time", "n_rrp_vesicles", "syn_type_id"], -1) if "weight_function" in edges_df.columns and "weight_sigma" in edges_df.columns: adjust_synapse_weights(edges_df, nodes_df) if precomputed_edges_file: @@ -157,15 +157,14 @@ def add_precomputed_synapse_locations(edges_df, nodes_df, precomputed_edges_file def add_synapse_parameters(edges_df, sym_parameter_dir): - # We rename tau1, tau2 and erev with the BBP synapse parameter names in the output file, so that we can run with our simulator directly - syn_params_map = {"dynamics_params": [], "facilitation_time": [], "decay_time": [], "u_syn": []} + syn_params_map = {"dynamics_params": [], "tau1": [], "tau2": [], "erev": []} for json_file in edges_df["dynamics_params"].unique(): params = utils.load_json(Path(sym_parameter_dir) / json_file) if params["level_of_detail"] == "exp2syn": syn_params_map["dynamics_params"].append(json_file) - syn_params_map["facilitation_time"].append(params["tau1"]) - syn_params_map["decay_time"].append(params["tau2"]) - syn_params_map["u_syn"].append(params["erev"]) + syn_params_map["tau1"].append(params["tau1"]) + syn_params_map["tau2"].append(params["tau2"]) + syn_params_map["erev"].append(params["erev"]) # create a dataframe from syn_params_map syn_params_df = pd.DataFrame(syn_params_map) return edges_df.merge(syn_params_df, on="dynamics_params", how="left")