From d44e413ef7275062541e0e3c130abb5a97dc79b9 Mon Sep 17 00:00:00 2001 From: Zhuokun Ding Date: Fri, 15 Apr 2022 20:05:41 +0000 Subject: [PATCH 1/5] Add convenient way to overide default setting --- .../NeuroDataPipelineManagement.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/neuro_data/pipeline_management/NeuroDataPipelineManagement.py b/neuro_data/pipeline_management/NeuroDataPipelineManagement.py index 7b3a1cb..c53c075 100644 --- a/neuro_data/pipeline_management/NeuroDataPipelineManagement.py +++ b/neuro_data/pipeline_management/NeuroDataPipelineManagement.py @@ -11,7 +11,8 @@ PREPROC_ID = 0 class NeuroDataPipelineManagement(): - def __init__(self): + def __init__(self, preproc_id=PREPROC_ID): + self.preproc_id = preproc_id pass @staticmethod @@ -48,8 +49,7 @@ def manually_insert_layer_for_scan(target_scan, layer): neuron_unit_key['layer'] = layer pipeline_anatomy.LayerMembership().insert1(neuron_unit_key, allow_direct_insert=True) - @staticmethod - def process_static_scans(target_scans): + def process_static_scans(self, target_scans): """ Function that goes and check for every table that needs to be populate as well as provide an option to manaully populate AreaMembership and LayerMembership, assuming that all the neurons can be label the same Area and Layer @@ -159,11 +159,11 @@ def process_static_scans(target_scans): # Populate Frame print("[NeuroData.Static Populate]: Populating Frame:") - Frame.populate(dict(preproc_id = PREPROC_ID), ConditionTier & target_scan) + Frame.populate(dict(preproc_id = self.preproc_id), ConditionTier & target_scan) # Populate InputResponse print("[NeuroData.Static Populate]: Populating InputResponse:") - InputResponse().populate(target_scan_done_key, dict(preproc_id = PREPROC_ID)) + InputResponse().populate(target_scan_done_key, dict(preproc_id = self.preproc_id)) # Populate Eye print("[NeuroData.Static Populate]: Populating Eye:") @@ -175,7 +175,7 @@ def process_static_scans(target_scans): # Insert Scan into StaticMultiDatasetGroupAssignment with whatever is the next highest_group_id print("[NeuroData.Static Populate]: Inserting Scan into StaticMultiDatasetGroupAssignment with next largest group_id:") - target_input_response_key = (InputResponse & target_scan & dict(preproc_id=PREPROC_ID)).fetch1('KEY') + target_input_response_key = (InputResponse & target_scan & dict(preproc_id=self.preproc_id)).fetch1('KEY') if StaticMultiDatasetGroupAssignment & target_input_response_key: print("[NeuroData.Static Populate]: Scan is already in StaticMultiDatasetGroupAssignment, skipping") else: @@ -188,7 +188,7 @@ def process_static_scans(target_scans): StaticMultiDataset().fill() print('[NeuroData.Static Populate]: Generating HDF5 File') - InputResponse().get_filename(dict(**target_scan, preproc_id = PREPROC_ID)) + InputResponse().get_filename(dict(**target_scan, preproc_id = self.preproc_id)) print('[PROCESSING COMPLETED FOR SCAN: ' + str(target_scan) + ']\n') From 36d9b342d80f94f1922d1dba787ac3bd663bf0c4 Mon Sep 17 00:00:00 2001 From: Zhuokun Ding Date: Fri, 15 Apr 2022 20:07:03 +0000 Subject: [PATCH 2/5] add StimulusType data config which does not subsample area or layer --- neuro_data/static_images/configs.py | 56 ++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/neuro_data/static_images/configs.py b/neuro_data/static_images/configs.py index 9e168d0..f0f4bd2 100644 --- a/neuro_data/static_images/configs.py +++ b/neuro_data/static_images/configs.py @@ -198,7 +198,9 @@ def get_loaders(self, datasets, tier, batch_size, stimulus_types, Sampler): return loaders def load_data(self, key, tier=None, batch_size=1, key_order=None, - exclude_from_normalization=None, stimulus_types=None, Sampler=None): + stimulus_types=None, Sampler=None): + stimulus_types = key.pop('stimulus_type') + exclude = key.pop('exclude').split(',') log.info('Loading {} dataset with tier={}'.format( self._stimulus_type, tier)) datasets = StaticMultiDataset().fetch_data(key, key_order=key_order) @@ -211,7 +213,7 @@ def load_data(self, key, tier=None, batch_size=1, key_order=None, log.info('Using statistics source ' + key['stats_source']) datasets = self.add_transforms( - key, datasets, exclude=exclude_from_normalization) + key, datasets, exclude=exclude) loaders = self.get_loaders( datasets, tier, batch_size, stimulus_types, Sampler) @@ -221,10 +223,7 @@ class AreaLayerRawMixin(StimulusTypeMixin): def load_data(self, key, tier=None, batch_size=1, key_order=None, stimulus_types=None, Sampler=None, **kwargs): log.info('Ignoring input arguments: "' + '", "'.join(kwargs.keys()) + '"' + 'when creating datasets') - exclude = key.pop('exclude').split(',') - stimulus_types = key.pop('stimulus_type') datasets, loaders = super().load_data(key, tier, batch_size, key_order, - exclude_from_normalization=exclude, stimulus_types=stimulus_types, Sampler=Sampler) @@ -401,6 +400,31 @@ def load_data(self, key, cuda=False, oracle=False, **kwargs): return datasets, loaders + class StimulusType(dj.Part, StimulusTypeMixin): + definition = """ # stimulus type + -> master + --- + stats_source : varchar(50) # normalization source + stimulus_type : varchar(50) # type of stimulus + exclude : varchar(512) # what inputs to exclude from normalization + normalize : bool # whether to use a normalizer or not + normalize_per_image : bool # whether to normalize each input separately + """ + + def describe(self, key): + return "Stimulus type {stimulus_type}. normalize={normalize} on {stats_source} (except '{exclude}')".format( + **key) + + @property + def content(self): + for p in product(['all'], + ['stimulus.Frame'], + [''], + [True], + [False], + ): + yield dict(zip(self.heading.secondary_attributes, p)) + class CorrectedAreaLayer(dj.Part, AreaLayerRawMixin): definition = """ -> master @@ -427,7 +451,7 @@ def content(self): [True, False], ['L4', 'L2/3', 'L6'], ['V1', 'LM']): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) class ModeledAreaLayer(dj.Part, AreaLayerModelMixin): definition = """ @@ -441,7 +465,7 @@ def content(self): for p in [ (0,) ]: - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) class MultipleAreasOneLayer(dj.Part, AreaLayerRawMixin): definition = """ @@ -468,7 +492,7 @@ def content(self): [True, False], ['L4', 'L2/3'], ['all-unknown', 'all']): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) class MultipleAreasMultipleLayers(dj.Part, AreaLayerRawMixin): definition = """ @@ -495,7 +519,7 @@ def content(self): [True, False], ['all-unset', 'all'], ['all-unknown', 'all']): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) ############ Below are data configs that were using the buggy normalizer ################# class AreaLayer(dj.Part, BackwardCompatibilityMixin, AreaLayerRawMixin): @@ -522,7 +546,7 @@ def content(self): [True], ['L4', 'L2/3'], ['V1', 'LM']): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) class AreaLayerPercentOracle(dj.Part, BackwardCompatibilityMixin, AreaLayerRawMixin): definition = """ @@ -563,7 +587,7 @@ def content(self): ['V1'], [25], [75]): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) for p in product(['all'], ['stimulus.Frame', '~stimulus.Frame'], ['images,responses'], @@ -573,7 +597,7 @@ def content(self): ['V1'], [75], [100]): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) for p in product(['all'], ['stimulus.Frame', '~stimulus.Frame'], ['images,responses'], @@ -583,7 +607,7 @@ def content(self): ['V1'], [0], [100]): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) def load_data(self, key, tier=None, batch_size=1, key_order=None, stimulus_types=None, Sampler=None): from .stats import Oracle @@ -647,7 +671,7 @@ def content(self): [0.2], ['L2/3'], ['V1']): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) def load_data(self, key, **kwargs): return super().load_data(key, balanced=False, **kwargs) @@ -682,7 +706,7 @@ def content(self): [0.2], ['L2/3'], ['V1']): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) def load_data(self, key, **kwargs): return super().load_data(key, balanced=True, **kwargs) @@ -713,7 +737,7 @@ def content(self): ['L2/3'], ['V1'], [-3]): - yield dict(zip(self.heading.dependent_attributes, p)) + yield dict(zip(self.heading.secondary_attributes, p)) def load_data(self, key, tier=None, batch_size=1, Sampler=None, t_first=False, cuda=False): From d6f76901b06c7b8d07d650234094bf648d332462 Mon Sep 17 00:00:00 2001 From: Zhuokun Ding Date: Fri, 15 Apr 2022 20:07:28 +0000 Subject: [PATCH 3/5] allow processing segmentation compartment other than soma --- neuro_data/static_images/data_schemas.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/neuro_data/static_images/data_schemas.py b/neuro_data/static_images/data_schemas.py index b20fd3d..1d1f2b6 100644 --- a/neuro_data/static_images/data_schemas.py +++ b/neuro_data/static_images/data_schemas.py @@ -96,7 +96,11 @@ def make(self, key): self.insert(StaticScanCandidate & key, ignore_extra_fields=True) pipe = (fuse.ScanDone() & key).fetch1('pipe') pipe = dj.create_virtual_module(pipe, 'pipeline_' + pipe) - units = (fuse.ScanDone * pipe.ScanSet.Unit * pipe.MaskClassification.Type & key + # check segmentation compartment, if not soma, skip MaskClassification + if (shared.MaskType.proj(compartment='type') & (pipe.SegmentationTask & key)).fetch1('compartment') != 'soma': + units = (fuse.ScanDone * pipe.ScanSet.Unit & key & dict(pipe_version=1)) + else: + units = (fuse.ScanDone * pipe.ScanSet.Unit * pipe.MaskClassification.Type & key & dict(pipe_version=1, type='soma')) assert len(units) > 0, 'No units found!' self.Unit().insert(units, @@ -528,10 +532,16 @@ def load_traces_and_frametimes(self, key): ndepth = len(dj.U('z') & (pipe.ScanInfo.Field() & k)) frame_times = (stimulus.Sync() & key).fetch1('frame_times').squeeze()[::ndepth] - soma = pipe.MaskClassification.Type() & dict(type='soma') + # if segmentation compartment is not soma, skip maskclassification + mask_type = (Preprocessing & key).fetch1('mask_type') + if mask_type == 'all': + compartment_restrict = {} + else: + assert mask_type in shared.MaskType, f'mask_type {mask_type} not found in shared.MaskType' + compartment_restrict = pipe.MaskClassification.Type() & dict(type=mask_type) spikes = (dj.U('field', 'channel') * pipe.Activity.Trace() * StaticScan.Unit() \ - * pipe.ScanSet.UnitInfo() & soma & key) + * pipe.ScanSet.UnitInfo() & compartment_restrict & key) traces, ms_delay, trace_keys = spikes.fetch('trace', 'ms_delay', dj.key, order_by='animal_id, session, scan_idx, unit_id') delay = np.fromiter(ms_delay / 1000, dtype=np.float) From f59d3f950a43658b8bc44626d0819f19cce73e9a Mon Sep 17 00:00:00 2001 From: Zhuokun Ding Date: Fri, 15 Apr 2022 21:02:35 +0000 Subject: [PATCH 4/5] Catch unexpected key word arguments --- neuro_data/static_images/configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neuro_data/static_images/configs.py b/neuro_data/static_images/configs.py index f0f4bd2..0783294 100644 --- a/neuro_data/static_images/configs.py +++ b/neuro_data/static_images/configs.py @@ -198,7 +198,7 @@ def get_loaders(self, datasets, tier, batch_size, stimulus_types, Sampler): return loaders def load_data(self, key, tier=None, batch_size=1, key_order=None, - stimulus_types=None, Sampler=None): + stimulus_types=None, Sampler=None, **kwargs): stimulus_types = key.pop('stimulus_type') exclude = key.pop('exclude').split(',') log.info('Loading {} dataset with tier={}'.format( From 47cdf247a120bef915d0afa26f86f6d76a86c69c Mon Sep 17 00:00:00 2001 From: Zhuokun Ding Date: Fri, 15 Apr 2022 21:25:20 +0000 Subject: [PATCH 5/5] reuse virtual module to avoid deadlocks --- neuro_data/static_images/dataset_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neuro_data/static_images/dataset_config.py b/neuro_data/static_images/dataset_config.py index 32d6b26..e4bb78e 100644 --- a/neuro_data/static_images/dataset_config.py +++ b/neuro_data/static_images/dataset_config.py @@ -17,11 +17,11 @@ StaticMultiDataset, StaticScan, schema, + stimulus, + base, ) from .ds_pipe import DvScanInfo -stimulus = dj.create_virtual_module("stimulus", "pipeline_stimulus") -base = dj.create_virtual_module("base", "neurostatic_base")