brain-score · KartikP · Sep 17, 2025 · Sep 17, 2025
diff --git a/brainio-test/MANIFEST.in b/brainio-test/MANIFEST.in
diff --git a/brainio-test/brainio_test/__init__.py b/brainio-test/brainio_test/__init__.py
diff --git a/brainio-test/brainio_test/entrypoint.py b/brainio-test/brainio_test/entrypoint.py
diff --git a/brainio-test/brainio_test/lookup.csv b/brainio-test/brainio_test/lookup.csv
diff --git a/brainio-test/brainio_test/lookup2.csv b/brainio-test/brainio_test/lookup2.csv
diff --git a/brainio-test/setup.py b/brainio-test/setup.py
diff --git a/brainio/__init__.py b/brainio/__init__.py
@@ -1,3 +1,7 @@
-from .fetch import get_assembly, get_stimulus_set
-from .lookup import get_catalog, list_stimulus_sets, list_assemblies, list_catalogs
+# Import functions that depend on external dependencies (boto3) only when available
+try:
+    from .fetch import get_assembly, get_stimulus_set
+except ImportError:
+    # External dependencies not available - functions will raise NotImplementedError anyway
+    pass
 
diff --git a/brainio/__pycache__/__init__.cpython-311.pyc b/brainio/__pycache__/__init__.cpython-311.pyc
diff --git a/brainio/__pycache__/assemblies.cpython-311.pyc b/brainio/__pycache__/assemblies.cpython-311.pyc
diff --git a/brainio/__pycache__/fetch.cpython-311.pyc b/brainio/__pycache__/fetch.cpython-311.pyc
diff --git a/brainio/catalogs.py b/brainio/catalogs.py
diff --git a/brainio/fetch.py b/brainio/fetch.py
@@ -13,7 +13,7 @@
 
 import brainio.assemblies as assemblies
 import brainio.stimuli as stimuli
-from brainio.lookup import lookup_assembly, lookup_stimulus_set, sha1_hash
+from brainio.lookup import sha1_hash
 from brainio.stimuli import StimulusSetLoader
 
 BRAINIO_HOME = 'BRAINIO_HOME'
@@ -157,42 +157,31 @@ def resolve_stimulus_set_class(class_name):
 
 
 def get_assembly(identifier):
-    assembly_lookup = lookup_assembly(identifier)
-    file_path = fetch_file(location_type=assembly_lookup['location_type'],
-                           location=assembly_lookup['location'], sha1=assembly_lookup['sha1'])
-    stimulus_set = get_stimulus_set(assembly_lookup['stimulus_set_identifier'])
-    cls = resolve_assembly_class(assembly_lookup['class'])
-    loader = cls.get_loader_class()(
-        cls=cls,
-        file_path=file_path,
-        stimulus_set_identifier=assembly_lookup['stimulus_set_identifier'],
-        stimulus_set=stimulus_set,
+    """
+    DEPRECATED: The catalog-based assembly loading has been removed.
+
+    Use direct loading methods instead:
+    - For S3: Use brainscore_vision.data_helpers.s3.load_assembly_from_s3()
+    - For files: Use DataAssembly.from_files()
+    """
+    raise NotImplementedError(
+        "get_assembly() has been deprecated. The catalog system has been removed. "
+        "Use direct loading methods like load_assembly_from_s3() or DataAssembly.from_files() instead."
     )
-    assembly = loader.load()
-    assembly.attrs['identifier'] = identifier
-    return assembly
 
 
 def get_stimulus_set(identifier):
-    csv_lookup, zip_lookup = lookup_stimulus_set(identifier)
-    csv_path = fetch_file(location_type=csv_lookup['location_type'], location=csv_lookup['location'],
-                          sha1=csv_lookup['sha1'])
-    zip_path = fetch_file(location_type=zip_lookup['location_type'], location=zip_lookup['location'],
-                          sha1=zip_lookup['sha1'])
-    stimuli_directory = unzip(zip_path)
-    loader = StimulusSetLoader(
-        csv_path=csv_path,
-        stimuli_directory=stimuli_directory,
-        cls=resolve_stimulus_set_class(csv_lookup['class'])
+    """
+    DEPRECATED: The catalog-based stimulus set loading has been removed.
+
+    Use direct loading methods instead:
+    - For S3: Use brainscore_vision.data_helpers.s3.load_stimulus_set_from_s3()
+    - For files: Use StimulusSet.from_files()
+    """
+    raise NotImplementedError(
+        "get_stimulus_set() has been deprecated. The catalog system has been removed. "
+        "Use direct loading methods like load_stimulus_set_from_s3() or StimulusSet.from_files() instead."
     )
-    stimulus_set = loader.load()
-    stimulus_set.identifier = identifier
-    # ensure perfect overlap
-    stimuli_paths = [Path(stimuli_directory) / local_path for local_path in os.listdir(stimuli_directory)
-                     if not local_path.endswith('.zip') and not local_path.endswith('.csv')]
-    assert set(stimulus_set.stimulus_paths.values()) == set(stimuli_paths), \
-        "Inconsistency: unzipped stimuli paths do not match csv paths"
-    return stimulus_set
 
 
 def fullname(obj):

diff --git a/brainio/lookup.py b/brainio/lookup.py
@@ -1,177 +1,11 @@
 import hashlib
 import logging
 
-import entrypoints
-import numpy as np
-import pandas as pd
-
-from brainio.catalogs import Catalog, SOURCE_CATALOG
-
-ENTRYPOINT = "brainio_lookups"
-TYPE_ASSEMBLY = 'assembly'
-TYPE_STIMULUS_SET = 'stimulus_set'
-_catalogs = {}
-
 _logger = logging.getLogger(__name__)
 
 
-def list_catalogs():
-    return sorted(list(entrypoints.get_group_named(ENTRYPOINT).keys()))
-
-
-def _load_catalog(identifier, entry_point):
-    catalog = entry_point.load()()
-    assert isinstance(catalog, Catalog)
-    assert catalog.identifier == identifier
-    return catalog
-
-
-def _load_installed_catalogs():
-    installed_catalogs = entrypoints.get_group_named(ENTRYPOINT)
-    _logger.debug(f"Loading catalog from entrypoints")
-    print(f"Loading catalog from entrypoints")
-    for k, v in installed_catalogs.items():
-        catalog = _load_catalog(k, v)
-        _catalogs[k] = catalog
-    return _catalogs
-
-
-def get_catalog(identifier):
-    catalogs = get_catalogs()
-    return catalogs[identifier]
-
-
-def get_catalogs():
-    if not _catalogs:
-        _load_installed_catalogs()
-    return _catalogs
-
-
-def combined_catalog():
-    source_catalogs = get_catalogs()
-    target_catalogs = {}
-    for identifier, source_catalog in source_catalogs.items():
-        target_catalog = source_catalog.copy()
-        target_catalog[SOURCE_CATALOG] = identifier
-        target_catalogs[identifier] = target_catalog
-    concat_catalogs = pd.concat(target_catalogs.values(), ignore_index=True)
-    return concat_catalogs
-
-
-def list_stimulus_sets():
-    combined = combined_catalog()
-    stimuli_rows = combined[combined['lookup_type'] == TYPE_STIMULUS_SET]
-    return sorted(list(set(stimuli_rows['identifier'])))
-
-
-def list_assemblies():
-    combined = combined_catalog()
-    assembly_rows = combined[combined['lookup_type'] == TYPE_ASSEMBLY]
-    return sorted(list(set(assembly_rows['identifier'])))
-
-
-def lookup_stimulus_set(identifier):
-    combined = combined_catalog()
-    lookup = combined[(combined['identifier'] == identifier) & (combined['lookup_type'] == TYPE_STIMULUS_SET)]
-    if len(lookup) == 0:
-        raise StimulusSetLookupError(f"Stimulus set {identifier} not found")
-    csv_lookup = _lookup_stimulus_set_filtered(lookup, filter_func=_is_csv_lookup, label="CSV")
-    zip_lookup = _lookup_stimulus_set_filtered(lookup, filter_func=_is_zip_lookup, label="ZIP")
-    return csv_lookup, zip_lookup
-
-
-def _lookup_stimulus_set_filtered(lookup, filter_func, label):
-    cols = [n for n in lookup.columns if n != SOURCE_CATALOG]
-    # filter for csv vs. zip
-    # if there are any groups of rows where every field except source is the same,
-    # we only want one from each group
-    filtered_rows = lookup[lookup.apply(filter_func, axis=1)].drop_duplicates(subset=cols)
-    identifier = lookup.iloc[0]['identifier']
-    if len(filtered_rows) == 0:
-        raise StimulusSetLookupError(f"{label} for stimulus set {identifier} not found")
-    if len(filtered_rows) > 1: # there were multiple rows but not all identical
-        raise RuntimeError(
-            f"Internal data inconsistency: Found more than 2 lookup rows for stimulus_set {label} for identifier {identifier}")
-    assert len(filtered_rows) == 1
-    return filtered_rows.squeeze()
-
-
-def lookup_assembly(identifier):
-    combined = combined_catalog()
-    lookup = combined[(combined['identifier'] == identifier) & (combined['lookup_type'] == TYPE_ASSEMBLY)]
-    if len(lookup) == 0:
-        raise AssemblyLookupError(f"Data assembly {identifier} not found")
-    cols = [n for n in lookup.columns if n != SOURCE_CATALOG]
-    # if there are any groups of rows where every field except source is the same,
-    # we only want one from each group
-    de_dupe = lookup.drop_duplicates(subset=cols)
-    if len(de_dupe) > 1: # there were multiple rows but not all identical
-        raise RuntimeError(f"Internal data inconsistency: Found multiple lookup rows for identifier {identifier}")
-    assert len(de_dupe) == 1
-    return de_dupe.squeeze()
-
-
-class StimulusSetLookupError(KeyError):
-    pass
-
-
-class AssemblyLookupError(KeyError):
-    pass
-
-
-def append(catalog_identifier, object_identifier, cls, lookup_type,
-           bucket_name, sha1, s3_key, stimulus_set_identifier=None):
-    catalogs = get_catalogs()
-    catalog = catalogs[catalog_identifier]
-    catalog_path = catalog.attrs['source_path']
-    _logger.debug(f"Adding {lookup_type} {object_identifier} to catalog {catalog_identifier}")
-    object_lookup = {
-        'identifier': object_identifier,
-        'lookup_type': lookup_type,
-        'class': cls,
-        'location_type': "S3",
-        'location': f"https://{bucket_name}.s3.amazonaws.com/{s3_key}",
-        'sha1': sha1,
-        'stimulus_set_identifier': stimulus_set_identifier,
-    }
-    # check duplicates
-    assert object_lookup['lookup_type'] in [TYPE_ASSEMBLY, TYPE_STIMULUS_SET]
-    duplicates = catalog[(catalog['identifier'] == object_lookup['identifier']) &
-                         (catalog['lookup_type'] == object_lookup['lookup_type'])]
-    if len(duplicates) > 0:
-        if object_lookup['lookup_type'] == TYPE_ASSEMBLY:
-            raise ValueError(f"Trying to add duplicate identifier {object_lookup['identifier']}, "
-                             f"existing \n{duplicates.to_string()}")
-        elif object_lookup['lookup_type'] == TYPE_STIMULUS_SET:
-            if len(duplicates) == 1 and duplicates.squeeze()['identifier'] == object_lookup['identifier'] and (
-                    (_is_csv_lookup(duplicates.squeeze()) and _is_zip_lookup(object_lookup)) or
-                    (_is_zip_lookup(duplicates.squeeze()) and _is_csv_lookup(object_lookup))):
-                pass  # all good, we're just adding the second part of a stimulus set
-            else:
-                raise ValueError(
-                    f"Trying to add duplicate identifier {object_lookup['identifier']}, existing {duplicates}")
-    # append and save
-    add_lookup = pd.DataFrame({key: [value] for key, value in object_lookup.items()})
-    catalog = pd.concat((catalog, add_lookup))
-    catalog.attrs['source_path'] = catalog_path  # explicitly set since concat does not always preserve
-    catalog.to_csv(catalog_path, index=False)
-    _catalogs[catalog_identifier] = catalog
-    return catalog
-
-
-def _is_csv_lookup(data_row):
-    return data_row['lookup_type'] == TYPE_STIMULUS_SET \
-           and data_row['location'].endswith('.csv') \
-           and data_row['class'] not in [None, np.nan]
-
-
-def _is_zip_lookup(data_row):
-    return data_row['lookup_type'] == TYPE_STIMULUS_SET \
-           and data_row['location'].endswith('.zip') \
-           and data_row['class'] in [None, np.nan]
-
-
 def sha1_hash(path, buffer_size=64 * 2 ** 10):
+    """Calculate SHA1 hash of a file."""
     _logger.debug(f'BEGIN sha1_hash on {path}')
     sha1 = hashlib.sha1()
     with open(path, "rb") as f:
@@ -180,4 +14,4 @@ def sha1_hash(path, buffer_size=64 * 2 ** 10):
             sha1.update(buffer)
             buffer = f.read(buffer_size)
     _logger.debug(f'END   sha1_hash on {path}')
-    return sha1.hexdigest()
+    return sha1.hexdigest()