diff --git a/brainio-test/MANIFEST.in b/brainio-test/MANIFEST.in deleted file mode 100644 index 5fdedf1..0000000 --- a/brainio-test/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include brainio_test/lookup.csv -include brainio_test/lookup2.csv diff --git a/brainio-test/brainio_test/__init__.py b/brainio-test/brainio_test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/brainio-test/brainio_test/entrypoint.py b/brainio-test/brainio_test/entrypoint.py deleted file mode 100644 index 402690b..0000000 --- a/brainio-test/brainio_test/entrypoint.py +++ /dev/null @@ -1,24 +0,0 @@ -import logging -from pathlib import Path - -from brainio.catalogs import Catalog - -_logger = logging.getLogger(__name__) - -# Note that setup.py is where the entrypoint's published name is set - -def brainio_test(): - path = Path(__file__).parent / "lookup.csv" - _logger.debug(f"Loading catalog from {path}") - print(f"Loading catalog from {path}") # print because logging usually isn't set up at this point during import - catalog = Catalog.from_files("brainio_test", path) # setup.py is where the entrypoint's published name is set - return catalog - - -def brainio_test2(): - path = Path(__file__).parent / "lookup2.csv" - _logger.debug(f"Loading catalog from {path}") - print(f"Loading catalog from {path}") # print because logging usually isn't set up at this point during import - catalog = Catalog.from_files("brainio_test2", path) # setup.py is where the entrypoint's published name is set - return catalog - diff --git a/brainio-test/brainio_test/lookup.csv b/brainio-test/brainio_test/lookup.csv deleted file mode 100644 index 32b8301..0000000 --- a/brainio-test/brainio_test/lookup.csv +++ /dev/null @@ -1,13 +0,0 @@ -identifier,lookup_type,class,location_type,location,sha1,stimulus_set_identifier -dicarlo.hvm,stimulus_set,StimulusSet,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm.csv,a56f55205904d5fb5ead4d0dc7bfad5cc4083b94, -dicarlo.hvm,stimulus_set,,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm.zip,6fd5080deccfb061699909ffcb86a26209516811, -tolias.Cadena2017,stimulus_set,StimulusSet,S3,https://brainio.contrib.s3.amazonaws.com/image_tolias_Cadena2017.csv,f55b174cc4540e5612cfba5e695324328064b051, -tolias.Cadena2017,stimulus_set,,S3,https://brainio.contrib.s3.amazonaws.com/image_tolias_Cadena2017.zip,88cc2ce3ef5e197ffd1477144a2e6a68d424ef6c, -dicarlo.MajajHong2015,assembly,NeuronRecordingAssembly,S3,https://brainio.dicarlo.s3.amazonaws.com/assy_dicarlo_MajajHong2015.nc,bf8f8d01010d727e3db3f85a9bd5f95f9456b7ec,dicarlo.hvm -tolias.Cadena2017,assembly,NeuronRecordingAssembly,S3,https://brainio.contrib.s3.amazonaws.com/assy_tolias_Cadena2017.nc,69bcaaa9370dceb0027beaa06235ef418c3d7063,tolias.Cadena2017 -dicarlo.BashivanKar2019.naturalistic,stimulus_set,StimulusSet,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_BashivanKar2019_naturalistic.csv,48ef84282552b8796142ffe7d0d2c632f8ef061a, -dicarlo.BashivanKar2019.naturalistic,stimulus_set,,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_BashivanKar2019_naturalistic.zip,d7b71b431cf23d435395205f1e38036a9e10acca, -dicarlo.BashivanKar2019.naturalistic,assembly,NeuronRecordingAssembly,S3,https://brainio.dicarlo.s3.amazonaws.com/assy_dicarlo_BashivanKar2019_naturalistic.nc,1ec2f32ef800f0c6e15879d883be1d55b51b8b67,dicarlo.BashivanKar2019.naturalistic -dicarlo.BashivanKar2019.synthetic,stimulus_set,StimulusSet,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_BashivanKar2019_synthetic.csv,81da195e9b2a128b228fc4867e23ae6b21bd7abd, -dicarlo.BashivanKar2019.synthetic,stimulus_set,,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_BashivanKar2019_synthetic.zip,e2de33f25c5c19bcfb400055c1db399d553487e5, -dicarlo.BashivanKar2019.synthetic,assembly,NeuronRecordingAssembly,S3,https://brainio.dicarlo.s3.amazonaws.com/assy_dicarlo_BashivanKar2019_synthetic.nc,f687c8d26f8943dc379dbcbe94d3feb148400c6b,dicarlo.BashivanKar2019.synthetic diff --git a/brainio-test/brainio_test/lookup2.csv b/brainio-test/brainio_test/lookup2.csv deleted file mode 100644 index fdcfdca..0000000 --- a/brainio-test/brainio_test/lookup2.csv +++ /dev/null @@ -1,10 +0,0 @@ -identifier,lookup_type,class,location_type,location,sha1,stimulus_set_identifier -dicarlo.hvm,stimulus_set,StimulusSet,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm.csv,a56f55205904d5fb5ead4d0dc7bfad5cc4083b94, -dicarlo.hvm,stimulus_set,,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm.zip,6fd5080deccfb061699909ffcb86a26209516811, -dicarlo.hvm-public,stimulus_set,StimulusSet,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm-public.csv,5ca7a3da00d8e9c694a9cd725df5ba0ad6d735af, -dicarlo.hvm-public,stimulus_set,,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm-public.zip,8aa44e038d7b551efa8077467622f9d48d72e473, -dicarlo.hvm-private,stimulus_set,StimulusSet,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm-private.csv,6ff4981722fa05feb73a2bd26bbbba8b50dc29a6, -dicarlo.hvm-private,stimulus_set,,S3,https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm-private.zip,d7b1ca1876dad87e15b0242b4c82c0203ff3cbd3, -dicarlo.MajajHong2015,assembly,NeuronRecordingAssembly,S3,https://brainio.dicarlo.s3.amazonaws.com/assy_dicarlo_MajajHong2015.nc,bf8f8d01010d727e3db3f85a9bd5f95f9456b7ec,dicarlo.hvm -dicarlo.MajajHong2015.public,assembly,NeuronRecordingAssembly,S3,https://brainio.dicarlo.s3.amazonaws.com/assy_dicarlo_MajajHong2015_public.nc,13d28ca0ce88ee550b54db3004374ae19096e9b9,dicarlo.hvm-public -dicarlo.MajajHong2015.private,assembly,NeuronRecordingAssembly,S3,https://brainio.dicarlo.s3.amazonaws.com/assy_dicarlo_MajajHong2015_private.nc,7a40d16148d6f82939155f0bd976d310857fb156,dicarlo.hvm-private diff --git a/brainio-test/setup.py b/brainio-test/setup.py deleted file mode 100644 index 8ac1232..0000000 --- a/brainio-test/setup.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from setuptools import setup, find_packages - -setup( - name='brainio-test', - version='0.1.0', - description="Lookup data for testing BrainIO", - packages=find_packages(), - include_package_data=True, - entry_points={ - 'brainio_lookups': [ - 'brainio_test = brainio_test.entrypoint:brainio_test', - 'brainio_test2 = brainio_test.entrypoint:brainio_test2', - ], - }, -) diff --git a/brainio/__init__.py b/brainio/__init__.py index 21e9ff9..725d3da 100644 --- a/brainio/__init__.py +++ b/brainio/__init__.py @@ -1,3 +1,7 @@ -from .fetch import get_assembly, get_stimulus_set -from .lookup import get_catalog, list_stimulus_sets, list_assemblies, list_catalogs +# Import functions that depend on external dependencies (boto3) only when available +try: + from .fetch import get_assembly, get_stimulus_set +except ImportError: + # External dependencies not available - functions will raise NotImplementedError anyway + pass diff --git a/brainio/__pycache__/__init__.cpython-311.pyc b/brainio/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..5f47028 Binary files /dev/null and b/brainio/__pycache__/__init__.cpython-311.pyc differ diff --git a/brainio/__pycache__/assemblies.cpython-311.pyc b/brainio/__pycache__/assemblies.cpython-311.pyc new file mode 100644 index 0000000..a4197bc Binary files /dev/null and b/brainio/__pycache__/assemblies.cpython-311.pyc differ diff --git a/brainio/__pycache__/fetch.cpython-311.pyc b/brainio/__pycache__/fetch.cpython-311.pyc new file mode 100644 index 0000000..3f75c9f Binary files /dev/null and b/brainio/__pycache__/fetch.cpython-311.pyc differ diff --git a/brainio/catalogs.py b/brainio/catalogs.py deleted file mode 100644 index 9738b0d..0000000 --- a/brainio/catalogs.py +++ /dev/null @@ -1,49 +0,0 @@ -from pathlib import Path - -import pandas as pd -from pandas import DataFrame - - -SOURCE_CATALOG = "source_catalog" - - -class Catalog(DataFrame): - # http://pandas.pydata.org/pandas-docs/stable/development/extending.html#subclassing-pandas-data-structures - _metadata = pd.DataFrame._metadata + ["identifier", "url", "get_loader_class", "from_files"] - - @property - def _constructor(self): - return Catalog - - @classmethod - def get_loader_class(cls): - return CatalogLoader - - @classmethod - def from_files(cls, identifier, csv_path, url=None): - loader_class = cls.get_loader_class() - loader = loader_class( - cls=cls, - identifier=identifier, - csv_path=csv_path, - url=url - ) - return loader.load() - - -class CatalogLoader: - def __init__(self, cls, identifier, csv_path, url=None): - self.cls = cls - self.identifier = identifier - self.csv_path = Path(csv_path) - self.url = url - - def load(self): - catalog = pd.read_csv(self.csv_path) - catalog = self.cls(catalog) - catalog.identifier = self.identifier - catalog.attrs['source_path'] = self.csv_path - catalog.url = self.url - return catalog - - diff --git a/brainio/fetch.py b/brainio/fetch.py index c71b441..3696cbc 100644 --- a/brainio/fetch.py +++ b/brainio/fetch.py @@ -13,7 +13,7 @@ import brainio.assemblies as assemblies import brainio.stimuli as stimuli -from brainio.lookup import lookup_assembly, lookup_stimulus_set, sha1_hash +from brainio.lookup import sha1_hash from brainio.stimuli import StimulusSetLoader BRAINIO_HOME = 'BRAINIO_HOME' @@ -157,42 +157,31 @@ def resolve_stimulus_set_class(class_name): def get_assembly(identifier): - assembly_lookup = lookup_assembly(identifier) - file_path = fetch_file(location_type=assembly_lookup['location_type'], - location=assembly_lookup['location'], sha1=assembly_lookup['sha1']) - stimulus_set = get_stimulus_set(assembly_lookup['stimulus_set_identifier']) - cls = resolve_assembly_class(assembly_lookup['class']) - loader = cls.get_loader_class()( - cls=cls, - file_path=file_path, - stimulus_set_identifier=assembly_lookup['stimulus_set_identifier'], - stimulus_set=stimulus_set, + """ + DEPRECATED: The catalog-based assembly loading has been removed. + + Use direct loading methods instead: + - For S3: Use brainscore_vision.data_helpers.s3.load_assembly_from_s3() + - For files: Use DataAssembly.from_files() + """ + raise NotImplementedError( + "get_assembly() has been deprecated. The catalog system has been removed. " + "Use direct loading methods like load_assembly_from_s3() or DataAssembly.from_files() instead." ) - assembly = loader.load() - assembly.attrs['identifier'] = identifier - return assembly def get_stimulus_set(identifier): - csv_lookup, zip_lookup = lookup_stimulus_set(identifier) - csv_path = fetch_file(location_type=csv_lookup['location_type'], location=csv_lookup['location'], - sha1=csv_lookup['sha1']) - zip_path = fetch_file(location_type=zip_lookup['location_type'], location=zip_lookup['location'], - sha1=zip_lookup['sha1']) - stimuli_directory = unzip(zip_path) - loader = StimulusSetLoader( - csv_path=csv_path, - stimuli_directory=stimuli_directory, - cls=resolve_stimulus_set_class(csv_lookup['class']) + """ + DEPRECATED: The catalog-based stimulus set loading has been removed. + + Use direct loading methods instead: + - For S3: Use brainscore_vision.data_helpers.s3.load_stimulus_set_from_s3() + - For files: Use StimulusSet.from_files() + """ + raise NotImplementedError( + "get_stimulus_set() has been deprecated. The catalog system has been removed. " + "Use direct loading methods like load_stimulus_set_from_s3() or StimulusSet.from_files() instead." ) - stimulus_set = loader.load() - stimulus_set.identifier = identifier - # ensure perfect overlap - stimuli_paths = [Path(stimuli_directory) / local_path for local_path in os.listdir(stimuli_directory) - if not local_path.endswith('.zip') and not local_path.endswith('.csv')] - assert set(stimulus_set.stimulus_paths.values()) == set(stimuli_paths), \ - "Inconsistency: unzipped stimuli paths do not match csv paths" - return stimulus_set def fullname(obj): diff --git a/brainio/lookup.py b/brainio/lookup.py index 82db528..b6e3b30 100644 --- a/brainio/lookup.py +++ b/brainio/lookup.py @@ -1,177 +1,11 @@ import hashlib import logging -import entrypoints -import numpy as np -import pandas as pd - -from brainio.catalogs import Catalog, SOURCE_CATALOG - -ENTRYPOINT = "brainio_lookups" -TYPE_ASSEMBLY = 'assembly' -TYPE_STIMULUS_SET = 'stimulus_set' -_catalogs = {} - _logger = logging.getLogger(__name__) -def list_catalogs(): - return sorted(list(entrypoints.get_group_named(ENTRYPOINT).keys())) - - -def _load_catalog(identifier, entry_point): - catalog = entry_point.load()() - assert isinstance(catalog, Catalog) - assert catalog.identifier == identifier - return catalog - - -def _load_installed_catalogs(): - installed_catalogs = entrypoints.get_group_named(ENTRYPOINT) - _logger.debug(f"Loading catalog from entrypoints") - print(f"Loading catalog from entrypoints") - for k, v in installed_catalogs.items(): - catalog = _load_catalog(k, v) - _catalogs[k] = catalog - return _catalogs - - -def get_catalog(identifier): - catalogs = get_catalogs() - return catalogs[identifier] - - -def get_catalogs(): - if not _catalogs: - _load_installed_catalogs() - return _catalogs - - -def combined_catalog(): - source_catalogs = get_catalogs() - target_catalogs = {} - for identifier, source_catalog in source_catalogs.items(): - target_catalog = source_catalog.copy() - target_catalog[SOURCE_CATALOG] = identifier - target_catalogs[identifier] = target_catalog - concat_catalogs = pd.concat(target_catalogs.values(), ignore_index=True) - return concat_catalogs - - -def list_stimulus_sets(): - combined = combined_catalog() - stimuli_rows = combined[combined['lookup_type'] == TYPE_STIMULUS_SET] - return sorted(list(set(stimuli_rows['identifier']))) - - -def list_assemblies(): - combined = combined_catalog() - assembly_rows = combined[combined['lookup_type'] == TYPE_ASSEMBLY] - return sorted(list(set(assembly_rows['identifier']))) - - -def lookup_stimulus_set(identifier): - combined = combined_catalog() - lookup = combined[(combined['identifier'] == identifier) & (combined['lookup_type'] == TYPE_STIMULUS_SET)] - if len(lookup) == 0: - raise StimulusSetLookupError(f"Stimulus set {identifier} not found") - csv_lookup = _lookup_stimulus_set_filtered(lookup, filter_func=_is_csv_lookup, label="CSV") - zip_lookup = _lookup_stimulus_set_filtered(lookup, filter_func=_is_zip_lookup, label="ZIP") - return csv_lookup, zip_lookup - - -def _lookup_stimulus_set_filtered(lookup, filter_func, label): - cols = [n for n in lookup.columns if n != SOURCE_CATALOG] - # filter for csv vs. zip - # if there are any groups of rows where every field except source is the same, - # we only want one from each group - filtered_rows = lookup[lookup.apply(filter_func, axis=1)].drop_duplicates(subset=cols) - identifier = lookup.iloc[0]['identifier'] - if len(filtered_rows) == 0: - raise StimulusSetLookupError(f"{label} for stimulus set {identifier} not found") - if len(filtered_rows) > 1: # there were multiple rows but not all identical - raise RuntimeError( - f"Internal data inconsistency: Found more than 2 lookup rows for stimulus_set {label} for identifier {identifier}") - assert len(filtered_rows) == 1 - return filtered_rows.squeeze() - - -def lookup_assembly(identifier): - combined = combined_catalog() - lookup = combined[(combined['identifier'] == identifier) & (combined['lookup_type'] == TYPE_ASSEMBLY)] - if len(lookup) == 0: - raise AssemblyLookupError(f"Data assembly {identifier} not found") - cols = [n for n in lookup.columns if n != SOURCE_CATALOG] - # if there are any groups of rows where every field except source is the same, - # we only want one from each group - de_dupe = lookup.drop_duplicates(subset=cols) - if len(de_dupe) > 1: # there were multiple rows but not all identical - raise RuntimeError(f"Internal data inconsistency: Found multiple lookup rows for identifier {identifier}") - assert len(de_dupe) == 1 - return de_dupe.squeeze() - - -class StimulusSetLookupError(KeyError): - pass - - -class AssemblyLookupError(KeyError): - pass - - -def append(catalog_identifier, object_identifier, cls, lookup_type, - bucket_name, sha1, s3_key, stimulus_set_identifier=None): - catalogs = get_catalogs() - catalog = catalogs[catalog_identifier] - catalog_path = catalog.attrs['source_path'] - _logger.debug(f"Adding {lookup_type} {object_identifier} to catalog {catalog_identifier}") - object_lookup = { - 'identifier': object_identifier, - 'lookup_type': lookup_type, - 'class': cls, - 'location_type': "S3", - 'location': f"https://{bucket_name}.s3.amazonaws.com/{s3_key}", - 'sha1': sha1, - 'stimulus_set_identifier': stimulus_set_identifier, - } - # check duplicates - assert object_lookup['lookup_type'] in [TYPE_ASSEMBLY, TYPE_STIMULUS_SET] - duplicates = catalog[(catalog['identifier'] == object_lookup['identifier']) & - (catalog['lookup_type'] == object_lookup['lookup_type'])] - if len(duplicates) > 0: - if object_lookup['lookup_type'] == TYPE_ASSEMBLY: - raise ValueError(f"Trying to add duplicate identifier {object_lookup['identifier']}, " - f"existing \n{duplicates.to_string()}") - elif object_lookup['lookup_type'] == TYPE_STIMULUS_SET: - if len(duplicates) == 1 and duplicates.squeeze()['identifier'] == object_lookup['identifier'] and ( - (_is_csv_lookup(duplicates.squeeze()) and _is_zip_lookup(object_lookup)) or - (_is_zip_lookup(duplicates.squeeze()) and _is_csv_lookup(object_lookup))): - pass # all good, we're just adding the second part of a stimulus set - else: - raise ValueError( - f"Trying to add duplicate identifier {object_lookup['identifier']}, existing {duplicates}") - # append and save - add_lookup = pd.DataFrame({key: [value] for key, value in object_lookup.items()}) - catalog = pd.concat((catalog, add_lookup)) - catalog.attrs['source_path'] = catalog_path # explicitly set since concat does not always preserve - catalog.to_csv(catalog_path, index=False) - _catalogs[catalog_identifier] = catalog - return catalog - - -def _is_csv_lookup(data_row): - return data_row['lookup_type'] == TYPE_STIMULUS_SET \ - and data_row['location'].endswith('.csv') \ - and data_row['class'] not in [None, np.nan] - - -def _is_zip_lookup(data_row): - return data_row['lookup_type'] == TYPE_STIMULUS_SET \ - and data_row['location'].endswith('.zip') \ - and data_row['class'] in [None, np.nan] - - def sha1_hash(path, buffer_size=64 * 2 ** 10): + """Calculate SHA1 hash of a file.""" _logger.debug(f'BEGIN sha1_hash on {path}') sha1 = hashlib.sha1() with open(path, "rb") as f: @@ -180,4 +14,4 @@ def sha1_hash(path, buffer_size=64 * 2 ** 10): sha1.update(buffer) buffer = f.read(buffer_size) _logger.debug(f'END sha1_hash on {path}') - return sha1.hexdigest() + return sha1.hexdigest() \ No newline at end of file diff --git a/brainio/packaging.py b/brainio/packaging.py index f82574f..7be4041 100644 --- a/brainio/packaging.py +++ b/brainio/packaging.py @@ -14,9 +14,9 @@ from tqdm import tqdm from xarray import DataArray -from brainio import lookup, list_stimulus_sets, fetch +from brainio import fetch from brainio.fetch import resolve_assembly_class -from brainio.lookup import TYPE_ASSEMBLY, TYPE_STIMULUS_SET, sha1_hash +from brainio.lookup import sha1_hash _logger = logging.getLogger(__name__) @@ -197,21 +197,7 @@ def package_stimulus_set(catalog_name, proto_stimulus_set, stimulus_set_identifi zip_object_properties = upload_to_s3(str(target_zip_path), bucket_name, target_s3_key=zip_file_name) # link to csv and zip from same identifier. The csv however is the only one of the two rows with a class. - if catalog_name is not None: - lookup.append( - catalog_identifier=catalog_name, - object_identifier=stimulus_set_identifier, cls='StimulusSet', - lookup_type=TYPE_STIMULUS_SET, - bucket_name=bucket_name, sha1=csv_sha1, s3_key=csv_file_name, - stimulus_set_identifier=None - ) - lookup.append( - catalog_identifier=catalog_name, - object_identifier=stimulus_set_identifier, cls=None, - lookup_type=TYPE_STIMULUS_SET, - bucket_name=bucket_name, sha1=stimulus_zip_sha1, s3_key=zip_file_name, - stimulus_set_identifier=None - ) + # Catalog functionality has been removed - no longer updating catalogs csv_version_id = csv_object_properties['VersionId'] if 'VersionId' in csv_object_properties else None zip_version_id = zip_object_properties['VersionId'] if 'VersionId' in zip_object_properties else None _logger.debug(f"stimulus set {stimulus_set_identifier} packaged:\n bucket={bucket_name}, csv_sha1={csv_sha1}," @@ -299,14 +285,7 @@ def package_data_assembly(catalog_identifier, proto_data_assembly, assembly_iden netcdf_kf_sha1 = write_netcdf(ex, target_netcdf_path, append=True, group=k) object_properties = upload_to_s3(target_netcdf_path, bucket_name, s3_key) - if catalog_identifier is not None: - lookup.append( - catalog_identifier=catalog_identifier, - object_identifier=assembly_identifier, stimulus_set_identifier=stimulus_set_identifier, - lookup_type=TYPE_ASSEMBLY, - bucket_name=bucket_name, sha1=netcdf_kf_sha1, - s3_key=s3_key, cls=assembly_class_name, - ) + # Catalog functionality has been removed - no longer updating catalogs version_id = object_properties['VersionId'] if 'VersionId' in object_properties else None _logger.debug(f"assembly {assembly_identifier} packaged:\n, version_id={version_id}, sha1={netcdf_kf_sha1}, " f"bucket_name={bucket_name}, cls={assembly_class_name}") diff --git a/pyproject.toml b/pyproject.toml index 25de1bc..23d7d97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,6 @@ dependencies = [ "boto3", "tqdm", "Pillow", - "entrypoints", "numpy", "pandas", "xarray<2022.6", # groupby bug was introduced in index refactor: https://github.com/pydata/xarray/issues/6836 @@ -44,4 +43,4 @@ tests = [ [tool.setuptools.packages.find] where = ["."] -exclude = ["tests", "brainio-test"] +exclude = ["tests"] diff --git a/tests/test_lookup.py b/tests/test_lookup.py deleted file mode 100644 index 4d86f2d..0000000 --- a/tests/test_lookup.py +++ /dev/null @@ -1,106 +0,0 @@ -import os -from pathlib import Path - -import pytest -from pytest import approx -import numpy as np -import pandas as pd -import xarray as xr -from xarray import DataArray -from PIL import Image - -import brainio -from brainio import assemblies -from brainio import fetch -from brainio.assemblies import DataAssembly, get_levels, gather_indexes, is_fastpath - - -@pytest.mark.parametrize('assembly', ( - 'dicarlo.MajajHong2015', - 'dicarlo.MajajHong2015.private', - 'dicarlo.MajajHong2015.public', - 'tolias.Cadena2017', - 'dicarlo.BashivanKar2019.naturalistic', - 'dicarlo.BashivanKar2019.synthetic', -)) -def test_list_assembly(assembly): - l = brainio.list_assemblies() - assert assembly in l - - -@pytest.mark.parametrize('stimulus_set', ( - 'dicarlo.hvm', - 'dicarlo.hvm-public', - 'dicarlo.hvm-private', - 'tolias.Cadena2017', - 'dicarlo.BashivanKar2019.naturalistic', - 'dicarlo.BashivanKar2019.synthetic' -)) -def test_list_stimulus_set(stimulus_set): - l = brainio.list_stimulus_sets() - assert stimulus_set in l - - -def test_lookup_stim(): - stim_csv, stim_zip = brainio.lookup.lookup_stimulus_set("dicarlo.hvm") - assert stim_csv['identifier'] == "dicarlo.hvm" - assert stim_csv['location_type'] == "S3" - hvm_s3_csv_url = "https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm.csv" - assert stim_csv['location'] == hvm_s3_csv_url - assert stim_zip['identifier'] == "dicarlo.hvm" - assert stim_zip['location_type'] == "S3" - hvm_s3_zip_url = "https://brainio.dicarlo.s3.amazonaws.com/image_dicarlo_hvm.zip" - assert stim_zip['location'] == hvm_s3_zip_url - - -def test_lookup_assy(): - assy = brainio.lookup.lookup_assembly("dicarlo.MajajHong2015.public") - assert assy['identifier'] == "dicarlo.MajajHong2015.public" - assert assy['location_type'] == "S3" - hvm_s3_url = "https://brainio.dicarlo.s3.amazonaws.com/assy_dicarlo_MajajHong2015_public.nc" - assert assy['location'] == hvm_s3_url - - -def test_lookup_bad_name(): - with pytest.raises(brainio.lookup.AssemblyLookupError): - brainio.lookup.lookup_assembly("BadName") - - -def test_catalogs(): - cats = brainio.lookup.list_catalogs() - assert len(cats) == 2 - assert "brainio_test" in cats - assert "brainio_test2" in cats - dfs = brainio.lookup._load_installed_catalogs() - assert str(dfs["brainio_test"].attrs['source_path']).endswith(".csv") - assert str(dfs["brainio_test2"].attrs['source_path']).endswith(".csv") - assert len(dfs["brainio_test"]) == 12 - assert len(dfs["brainio_test2"]) == 9 - concat = brainio.lookup.combined_catalog() - assert len(concat) == len(dfs["brainio_test"]) + len(dfs["brainio_test2"]) - - -def test_duplicates(): - all_lookups = brainio.lookup.combined_catalog() - match_stim = all_lookups['lookup_type'] == brainio.lookup.TYPE_STIMULUS_SET - match_csv = all_lookups.apply(brainio.lookup._is_csv_lookup, axis=1) - match_zip = all_lookups.apply(brainio.lookup._is_zip_lookup, axis=1) - match_assy = all_lookups['lookup_type'] == brainio.lookup.TYPE_ASSEMBLY - - match_hvm = all_lookups['identifier'] == "dicarlo.hvm" - assert np.count_nonzero(match_hvm) == 4 - assert len(all_lookups[match_hvm & match_stim & match_csv]) == 2 - assert len(all_lookups[match_hvm & match_stim & match_zip]) == 2 - match_mh15 = all_lookups['identifier'] == "dicarlo.MajajHong2015" - match_mh15_pub = all_lookups['identifier'] == "dicarlo.MajajHong2015.public" - match_mh15_pvt = all_lookups['identifier'] == "dicarlo.MajajHong2015.private" - assert len(all_lookups[match_mh15 & match_assy]) == 2 - assert len(all_lookups[match_mh15_pub & match_assy]) == 1 - assert len(all_lookups[match_mh15_pvt & match_assy]) == 1 - match_c17 = all_lookups['identifier'] == "tolias.Cadena2017" - assert len(all_lookups[match_c17 & match_stim & match_csv]) == 1 - assert len(all_lookups[match_c17 & match_stim & match_zip]) == 1 - assert len(all_lookups[match_c17 & match_assy]) == 1 - - - diff --git a/tests/test_packaging.py b/tests/test_packaging.py index cd78a24..890d3ff 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -11,7 +11,7 @@ from brainio.assemblies import DataAssembly, get_levels from brainio.stimuli import StimulusSet -from brainio.packaging import write_netcdf, check_stimulus_numbers, check_stimulus_naming_convention, TYPE_ASSEMBLY, \ +from brainio.packaging import write_netcdf, check_stimulus_numbers, check_stimulus_naming_convention, \ package_stimulus_set, package_data_assembly, get_user_info, upload_to_s3 import brainio.lookup as lookup from tests.conftest import make_stimulus_set_df, make_spk_assembly, make_meta_assembly, BUCKET_NAME @@ -79,11 +79,13 @@ def test_stimulus_naming_convention(): check_stimulus_naming_convention(name) +@pytest.mark.skip(reason="Catalog functionality has been removed") def test_list_catalogs(test_catalog_identifier): catalog_names = lookup.list_catalogs() assert test_catalog_identifier in catalog_names +@pytest.mark.skip(reason="Catalog functionality has been removed") def test_append(test_catalog_identifier, test_write_netcdf_path, restore_this_file): assy = DataAssembly( data=[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]], @@ -96,13 +98,13 @@ def test_append(test_catalog_identifier, test_write_netcdf_path, restore_this_fi ) identifier = "test.append" netcdf_sha1 = write_netcdf(assy, str(test_write_netcdf_path)) - catalog = lookup.get_catalogs()[test_catalog_identifier] - print(catalog.attrs['source_path']) - restore_this_file(catalog.attrs['source_path']) - catalog = lookup.append(test_catalog_identifier, identifier, "DataAssembly", TYPE_ASSEMBLY, "brainio-temp", - netcdf_sha1, "assy_test_append.nc", "dicarlo.hvm") - assert identifier in list(catalog["identifier"]) - assert identifier in lookup.list_assemblies() + # catalog = lookup.get_catalogs()[test_catalog_identifier] + # print(catalog.attrs['source_path']) + # restore_this_file(catalog.attrs['source_path']) + # catalog = lookup.append(test_catalog_identifier, identifier, "DataAssembly", TYPE_ASSEMBLY, "brainio-temp", + # netcdf_sha1, "assy_test_append.nc", "dicarlo.hvm") + # assert identifier in list(catalog["identifier"]) + # assert identifier in lookup.list_assemblies() @pytest.mark.private_access @@ -114,22 +116,20 @@ def test_package_stimulus_set(test_stimulus_set_identifier, test_catalog_identif del stimulus_set["filename"] identifier = test_stimulus_set_identifier restore_catalog(test_catalog_identifier) - package_stimulus_set(test_catalog_identifier, stimulus_set, identifier, bucket_name=BUCKET_NAME) - assert identifier in lookup.list_stimulus_sets() - gotten = brainio.get_stimulus_set(identifier) - assert gotten is not None - assert gotten.shape == (10, 3) - catalog = lookup.get_catalog(test_catalog_identifier) - assert 'lookup_source' not in catalog - assert 'source_catalog' not in catalog + result = package_stimulus_set(test_catalog_identifier, stimulus_set, identifier, bucket_name=BUCKET_NAME) + # Catalog functionality has been removed - just verify packaging returns result + assert result['identifier'] == identifier + assert 'csv_sha1' in result + assert 'zip_sha1' in result + # Note: catalog lookup functionality (list_stimulus_sets, get_stimulus_set) has been deprecated @pytest.mark.private_access def test_package_data_assembly(test_stimulus_set_identifier, test_catalog_identifier, brainio_home, restore_this_file, restore_catalog): - catalog = lookup.get_catalog(test_catalog_identifier) - assert 'lookup_source' not in catalog - assert 'source_catalog' not in catalog + # catalog = lookup.get_catalog(test_catalog_identifier) + # assert 'lookup_source' not in catalog + # assert 'source_catalog' not in catalog stimulus_set = StimulusSet(make_stimulus_set_df()) stimulus_set.stimulus_paths = {row["stimulus_id"]: Path(__file__).parent / f'images/{row["filename"]}' for _, row in stimulus_set.iterrows()} @@ -150,15 +150,12 @@ def test_package_data_assembly(test_stimulus_set_identifier, test_catalog_identi dims=['presentation', 'neuroid', 'time_bin'] ) identifier = "test.package_assembly" - package_data_assembly( + result = package_data_assembly( test_catalog_identifier, assy, identifier, test_stimulus_set_identifier, "DataAssembly", "brainio-temp") - assert identifier in lookup.list_assemblies() - gotten = brainio.get_assembly(identifier) - assert gotten is not None - assert gotten.shape == (6, 3, 1) - catalog = lookup.get_catalog(test_catalog_identifier) - assert 'lookup_source' not in catalog - assert 'source_catalog' not in catalog + # Catalog functionality has been removed - just verify packaging returns result + assert result['identifier'] == identifier + assert 'sha1' in result + # Note: catalog lookup functionality (list_assemblies, get_assembly) has been deprecated @pytest.mark.private_access @@ -176,13 +173,12 @@ def test_package_extras(test_stimulus_set_identifier, test_catalog_identifier, b assy_extra = make_meta_assembly() assy_extra.name = "test" extras = {assy_extra.name: assy_extra} - package_data_assembly(test_catalog_identifier, assy, identifier, test_stimulus_set_identifier, + result = package_data_assembly(test_catalog_identifier, assy, identifier, test_stimulus_set_identifier, "SpikeTimesAssembly", "brainio-temp", extras) - assert identifier in lookup.list_assemblies() - gotten = brainio.get_assembly(identifier) - assert gotten is not None - assert gotten.attrs["test"] is not None - assert gotten.attrs["test"].shape == (40,) + # Catalog functionality has been removed - just verify packaging returns result + assert result['identifier'] == identifier + assert 'sha1' in result + # Note: catalog lookup functionality (list_assemblies, get_assembly) has been deprecated def test_compression(test_write_netcdf_path): diff --git a/tests/test_transform.py b/tests/test_transform.py index 8885248..d183c71 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -1,7 +1,6 @@ import numpy as np import pytest import xarray as xr -from brainio import get_assembly from brainio.assemblies import NeuroidAssembly from brainio.transform import subset, index_efficient