From 93a197c319ce2e27302d9c3b9ec4752ddf148e7c Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 25 Apr 2022 09:48:54 -0400
Subject: [PATCH 01/18] Create DatasetSeacher class.

---
 nimare/dataset.py | 1011 +++++++++++++++++++++++----------------------
 1 file changed, 511 insertions(+), 500 deletions(-)

diff --git a/nimare/dataset.py b/nimare/dataset.py
index 4ba6893df..b98c20bba 100755
--- a/nimare/dataset.py
+++ b/nimare/dataset.py
@@ -26,639 +26,650 @@
 LGR = logging.getLogger(__name__)
 
 
-class Dataset(NiMAREBase):
-    """Storage container for a coordinate- and/or image-based meta-analytic dataset/database.
+class DatasetSeacher(NiMAREBase):
+    """A tool for searching Datasets."""
 
-    .. versionchanged:: 0.0.9
+    def get(self, dataset, dict_, drop_invalid=True):
+        """Retrieve files and/or metadata from the current Dataset.
 
-        * [ENH] Add merge method to Dataset class
+        Parameters
+        ----------
+        dict_ : :obj:`dict`
+            Dictionary specifying images or metadata to collect.
+            Keys should be variables to be used as keys for results dictionary.
+            Values should be tuples with two values:
+            type (e.g., 'image' or 'metadata') and specific field corresponding
+            to column of type-specific DataFrame (e.g., 'z' or 'sample_sizes').
+        drop_invalid : :obj:`bool`, optional
+            Whether to automatically ignore any studies without the required data or not.
+            Default is False.
 
-    .. versionchanged:: 0.0.8
+        Returns
+        -------
+        results : :obj:`dict`
+            A dictionary of lists of requested data. Keys correspond to the keys in ``dict_``.
 
-        * [FIX] Set ``nimare.dataset.Dataset.basepath`` in :func:`update_path` using absolute path.
+        Examples
+        --------
+        >>> dset.get({'z_maps': ('image', 'z'), 'sample_sizes': ('metadata', 'sample_sizes')})
+        >>> dset.get({'coordinates': ('coordinates', None)})
+        """
+        results = {}
+        results["id"] = dataset.ids
+        keep_idx = np.arange(len(dataset.ids), dtype=int)
+        for k, vals in dict_.items():
+            if vals[0] == "image":
+                temp = dataset.get_images(imtype=vals[1])
+            elif vals[0] == "metadata":
+                temp = dataset.get_metadata(field=vals[1])
+            elif vals[0] == "coordinates":
+                # Break DataFrame down into a list of study-specific DataFrames
+                temp = [
+                    dataset.coordinates.loc[dataset.coordinates["id"] == id_]
+                    for id_ in dataset.ids
+                ]
+                # Replace empty DataFrames with Nones
+                temp = [t if t.size else None for t in temp]
+            elif vals[0] == "annotations":
+                # Break DataFrame down into a list of study-specific DataFrames
+                temp = [
+                    dataset.annotations.loc[dataset.annotations["id"] == id_]
+                    for id_ in dataset.ids
+                ]
+                # Replace empty DataFrames with Nones
+                temp = [t if t.size else None for t in temp]
+            else:
+                raise ValueError(f"Input '{vals[0]}' not understood.")
 
-    Parameters
-    ----------
-    source : :obj:`str` or :obj:`dict`
-        JSON file containing dictionary with database information or the dict()
-        object
+            results[k] = temp
+            temp_keep_idx = np.where([t is not None for t in temp])[0]
+            keep_idx = np.intersect1d(keep_idx, temp_keep_idx)
 
-    target : :obj:`str`, optional
-        Desired coordinate space for coordinates. Names follow NIDM convention.
-        Default is 'mni152_2mm' (MNI space with 2x2x2 voxels).
-        This parameter has no impact on images.
+        # reduce
+        if drop_invalid and (len(keep_idx) != len(self.ids)):
+            LGR.info(f"Retaining {len(keep_idx)}/{len(self.ids)} studies")
+        elif len(keep_idx) != len(self.ids):
+            raise Exception(
+                f"Only {len(keep_idx)}/{len(self.ids)} in Dataset contain the necessary data. "
+                "If you want to analyze the subset of studies with required data, "
+                "set `drop_invalid` to True."
+            )
 
-    mask : :obj:`str`, :class:`~nibabel.nifti1.Nifti1Image`, \
-    :class:`~nilearn.input_data.NiftiMasker` or similar, or None, optional
-        Mask(er) to use. If None, uses the target space image, with all
-        non-zero voxels included in the mask.
+        for k in results:
+            results[k] = [results[k][i] for i in keep_idx]
+            if dict_.get(k, [None])[0] in ("coordinates", "annotations"):
+                results[k] = pd.concat(results[k])
 
-    Attributes
-    ----------
-    space : :obj:`str`
-        Standard space. Same as ``target`` parameter.
+        return results
 
-    Notes
-    -----
-    Images loaded into a Dataset are assumed to be in the same space.
-    If images have different resolutions or affines from the Dataset's masker,
-    then they will be resampled automatically, at the point where they're used,
-    by :obj:`Dataset.masker`.
-    """
+    def _generic_column_getter(self, dataset, attr, ids=None, column=None, ignore_columns=None):
+        """Extract information from DataFrame-based attributes.
 
-    _id_cols = ["id", "study_id", "contrast_id"]
+        Parameters
+        ----------
+        attr : :obj:`str`
+            The name of the DataFrame-format Dataset attribute to search.
+        ids : :obj:`list` or None, optional
+            A list of study IDs within which to extract values.
+            If None, extract values for all studies in the Dataset.
+            Default is None.
+        column : :obj:`str` or None, optional
+            The column from which to extract values.
+            If None, a list of all columns with valid values will be returned.
+            Must be a column within Dataset.[attr].
+        ignore_columns : :obj:`list` or None, optional
+            A list of columns to ignore. Only used if ``column`` is None.
 
-    def __init__(self, source, target="mni152_2mm", mask=None):
-        if isinstance(source, str):
-            with open(source, "r") as f_obj:
-                data = json.load(f_obj)
-        elif isinstance(source, dict):
-            data = source
+        Returns
+        -------
+        result : :obj:`list` or :obj:`str`
+            A list of values or a string, depending on if ids is a list (or None) or a string.
+        """
+        if ignore_columns is None:
+            ignore_columns = dataset._id_cols
         else:
-            raise Exception("`source` needs to be a file path or a dictionary")
+            ignore_columns += dataset._id_cols
 
-        # Datasets are organized by study, then experiment
-        # To generate unique IDs, we combine study ID with experiment ID
-        # build list of ids
-        id_columns = ["id", "study_id", "contrast_id"]
-        all_ids = []
-        for pid in data.keys():
-            for expid in data[pid]["contrasts"].keys():
-                id_ = f"{pid}-{expid}"
-                all_ids.append([id_, pid, expid])
-        id_df = pd.DataFrame(columns=id_columns, data=all_ids)
-        id_df = id_df.set_index("id", drop=False)
-        self._ids = id_df.index.values
+        df = getattr(dataset, attr)
+        return_first = False
 
-        # Set up Masker
-        if mask is None:
-            mask = get_template(target, mask="brain")
-        self.masker = mask
-        self.space = target
+        if isinstance(ids, str) and column is not None:
+            return_first = True
+        ids = _listify(ids)
 
-        self.annotations = _dict_to_df(id_df, data, key="labels")
-        self.coordinates = _dict_to_coordinates(data, masker=self.masker, space=self.space)
-        self.images = _dict_to_df(id_df, data, key="images")
-        self.metadata = _dict_to_df(id_df, data, key="metadata")
-        self.texts = _dict_to_df(id_df, data, key="text")
-        self.basepath = None
+        available_types = [c for c in df.columns if c not in dataset._id_cols]
+        if (column is not None) and (column not in available_types):
+            raise ValueError(
+                f"{column} not found in {attr}.\nAvailable types: {', '.join(available_types)}"
+            )
 
-    def __repr__(self):
-        """Show basic Dataset representation.
+        if column is not None:
+            if ids is not None:
+                result = df[column].loc[df["id"].isin(ids)].tolist()
+            else:
+                result = df[column].tolist()
+        else:
+            if ids is not None:
+                result = {v: df[v].loc[df["id"].isin(ids)].tolist() for v in available_types}
+                result = {k: v for k, v in result.items() if any(v)}
+            else:
+                result = {v: df[v].tolist() for v in available_types}
+            result = list(result.keys())
 
-        It's basically the same as the NiMAREBase representation, but with the number of
-        experiments in the Dataset represented as well.
+        if return_first:
+            return result[0]
+        else:
+            return result
+
+    def get_labels(self, dataset, ids=None):
+        """Extract list of labels for which studies in Dataset have annotations.
+
+        Parameters
+        ----------
+        ids : :obj:`list`, optional
+            A list of IDs in the Dataset for which to find labels. Default is
+            None, in which case all labels are returned.
+
+        Returns
+        -------
+        labels : :obj:`list`
+            List of labels for which there are annotations in the Dataset.
         """
-        # Get default parameter values for the object
-        signature = inspect.signature(self.__init__)
-        defaults = {
-            k: v.default
-            for k, v in signature.parameters.items()
-            if v.default is not inspect.Parameter.empty
-        }
+        if not isinstance(ids, list) and ids is not None:
+            ids = _listify(ids)
 
-        # Eliminate any sub-parameters (e.g., parameters for a MetaEstimator's KernelTransformer),
-        # as well as default values
-        params = self.get_params()
-        params = {k: v for k, v in params.items() if "__" not in k}
-        # Parameter "target" is stored as attribute "space"
-        # and we want to show it regardless of whether it's the default or not
-        params["space"] = self.space
-        params.pop("target")
-        params = {k: v for k, v in params.items() if defaults.get(k) != v}
+        result = [c for c in dataset.annotations.columns if c not in dataset._id_cols]
+        if ids is not None:
+            temp_annotations = dataset.annotations.loc[dataset.annotations["id"].isin(ids)]
+            res = temp_annotations[result].any(axis=0)
+            result = res.loc[res].index.tolist()
 
-        # Convert to strings
-        param_strs = []
-        for k, v in params.items():
-            if isinstance(v, str):
-                # Wrap string values in single quotes
-                param_str = f"{k}='{v}'"
-            else:
-                # Keep everything else as-is based on its own repr
-                param_str = f"{k}={v}"
-            param_strs.append(param_str)
+        return result
 
-        params_str = ", ".join(param_strs)
-        params_str = f"{len(self.ids)} experiments{', ' if params_str else ''}{params_str}"
-        rep = f"{self.__class__.__name__}({params_str})"
-        return rep
+    def get_texts(self, dataset, ids=None, text_type=None):
+        """Extract list of texts of a given type for selected IDs.
 
-    @property
-    def ids(self):
-        """numpy.ndarray: 1D array of identifiers in Dataset.
+        Parameters
+        ----------
+        ids : :obj:`list`, optional
+            A list of IDs in the Dataset for which to find texts. Default is
+            None, in which case all texts of requested type are returned.
+        text_type : :obj:`str`, optional
+            Type of text to extract. Corresponds to column name in
+            Dataset.texts DataFrame. Default is None.
 
-        The associated setter for this property is private, as ``Dataset.ids`` is immutable.
+        Returns
+        -------
+        texts : :obj:`list`
+            List of texts of requested type for selected IDs.
         """
-        return self.__ids
+        result = self._generic_column_getter(dataset, "texts", ids=ids, column=text_type)
+        return result
 
-    @ids.setter
-    def _ids(self, ids):
-        ids = np.sort(np.asarray(ids))
-        assert isinstance(ids, np.ndarray) and ids.ndim == 1
-        self.__ids = ids
+    def get_metadata(self, dataset, ids=None, field=None):
+        """Get metadata from Dataset.
 
-    @property
-    def masker(self):
-        """:class:`nilearn.input_data.NiftiMasker` or similar: Masker object.
+        Parameters
+        ----------
+        ids : :obj:`list`, optional
+            A list of IDs in the Dataset for which to find metadata. Default is
+            None, in which case all metadata of requested type are returned.
+        field : :obj:`str`, optional
+            Metadata field to extract. Corresponds to column name in
+            Dataset.metadata DataFrame. Default is None.
 
-        Defines the space and location of the area of interest (e.g., 'brain').
+        Returns
+        -------
+        metadata : :obj:`list`
+            List of values of requested type for selected IDs.
         """
-        return self.__masker
+        result = dataset._generic_column_getter(dataset, "metadata", ids=ids, column=field)
+        return result
 
-    @masker.setter
-    def masker(self, mask):
-        mask = get_masker(mask)
-        if hasattr(self, "masker") and not np.array_equal(
-            self.masker.mask_img.affine, mask.mask_img.affine
-        ):
-            # This message does not have an associated effect,
-            # since matrix indices are calculated as necessary
-            LGR.warning("New masker does not match old masker. Space is assumed to be the same.")
-
-        self.__masker = mask
+    def get_images(self, dataset, ids=None, imtype=None):
+        """Get images of a certain type for a subset of studies in the dataset.
 
-    @property
-    def annotations(self):
-        """:class:`pandas.DataFrame`: Labels describing studies in the dataset.
+        Parameters
+        ----------
+        ids : :obj:`list`, optional
+            A list of IDs in the Dataset for which to find images. Default is
+            None, in which case all images of requested type are returned.
+        imtype : :obj:`str`, optional
+            Type of image to extract. Corresponds to column name in
+            Dataset.images DataFrame. Default is None.
 
-        Each study/experiment has its own row.
-        Columns correspond to individual labels (e.g., 'emotion'), and may
-        be prefixed with a feature group including two underscores
-        (e.g., 'Neurosynth_TFIDF__emotion').
+        Returns
+        -------
+        images : :obj:`list`
+            List of images of requested type for selected IDs.
         """
-        return self.__annotations
-
-    @annotations.setter
-    def annotations(self, df):
-        _validate_df(df)
-        self.__annotations = df.sort_values(by="id")
+        ignore_columns = ["space"]
+        ignore_columns += [c for c in dataset.images.columns if c.endswith("__relative")]
+        result = self._generic_column_getter(
+            dataset,
+            "images",
+            ids=ids,
+            column=imtype,
+            ignore_columns=ignore_columns,
+        )
+        return result
 
-    @property
-    def coordinates(self):
-        """:class:`pandas.DataFrame`: Coordinates in the dataset.
+    def get_studies_by_label(self, dataset, labels=None, label_threshold=0.001):
+        """Extract list of studies with a given label.
 
         .. versionchanged:: 0.0.10
 
-            The coordinates attribute no longer includes the associated matrix indices
-            (columns 'i', 'j', and 'k'). These columns are calculated as needed.
-
-        Each study has one row for each peak.
-        Columns include ['x', 'y', 'z'] (peak locations in mm) and 'space' (Dataset's space).
-        """
-        return self.__coordinates
-
-    @coordinates.setter
-    def coordinates(self, df):
-        _validate_df(df)
-        self.__coordinates = df.sort_values(by="id")
-
-    @property
-    def images(self):
-        """:class:`pandas.DataFrame`: Images in the dataset.
-
-        Each image type has its own column (e.g., 'z') with absolute paths to
-        files and each study has its own row.
-        Additionally, relative paths to image files are stored in columns with
-        the suffix '__relative' (e.g., 'z__relative').
+            Fix bug in which all IDs were returned when a label wasn't present in the Dataset.
 
-        Warnings
-        --------
-        Images are assumed to be in the same space, although they may have
-        different resolutions and affines. Images will be resampled as needed
-        at the point where they are used, via :obj:`Dataset.masker`.
-        """
-        return self.__images
+        .. versionchanged:: 0.0.9
 
-    @images.setter
-    def images(self, df):
-        _validate_df(df)
-        self.__images = _validate_images_df(df).sort_values(by="id")
+            Default value for label_threshold changed to 0.001.
 
-    @property
-    def metadata(self):
-        """:class:`pandas.DataFrame`: Metadata describing studies in the dataset.
+        Parameters
+        ----------
+        labels : :obj:`list`, optional
+            List of labels to use to search Dataset. If a contrast has all of
+            the labels above the threshold, it will be returned.
+            Default is None.
+        label_threshold : :obj:`float`, optional
+            Default is 0.5.
 
-        Each metadata field has its own column (e.g., 'sample_sizes') and each study
-        has its own row.
+        Returns
+        -------
+        found_ids : :obj:`list`
+            A list of IDs from the Dataset found by the search criteria.
         """
-        return self.__metadata
-
-    @metadata.setter
-    def metadata(self, df):
-        _validate_df(df)
-        self.__metadata = df.sort_values(by="id")
+        if isinstance(labels, str):
+            labels = [labels]
+        elif not isinstance(labels, list):
+            raise ValueError(f"Argument 'labels' cannot be {type(labels)}")
 
-    @property
-    def texts(self):
-        """:class:`pandas.DataFrame`: Texts in the dataset.
+        missing_labels = [label for label in labels if label not in dataset.annotations.columns]
+        if missing_labels:
+            raise ValueError(f"Missing label(s): {', '.join(missing_labels)}")
 
-        Each text type has its own column (e.g., 'abstract') and each study
-        has its own row.
-        """
-        return self.__texts
+        temp_annotations = dataset.annotations[dataset._id_cols + labels]
+        found_rows = (temp_annotations[labels] >= label_threshold).all(axis=1)
+        if any(found_rows):
+            found_ids = temp_annotations.loc[found_rows, "id"].tolist()
+        else:
+            found_ids = []
 
-    @texts.setter
-    def texts(self, df):
-        _validate_df(df)
-        self.__texts = df.sort_values(by="id")
+        return found_ids
 
-    def slice(self, ids):
-        """Create a new dataset with only requested IDs.
+    def get_studies_by_mask(self, dataset, mask):
+        """Extract list of studies with at least one coordinate in mask.
 
         Parameters
         ----------
-        ids : array_like
-            List of study IDs to include in new dataset
+        mask : img_like
+            Mask across which to search for coordinates.
 
         Returns
         -------
-        new_dset : :obj:`~nimare.dataset.Dataset`
-            Reduced Dataset containing only requested studies.
+        found_ids : :obj:`list`
+            A list of IDs from the Dataset with at least one focus in the mask.
         """
-        new_dset = copy.deepcopy(self)
-        new_dset._ids = ids
-        for attribute in ("annotations", "coordinates", "images", "metadata", "texts"):
-            df = getattr(new_dset, attribute)
-            df = df.loc[df["id"].isin(ids)]
-            setattr(new_dset, attribute, df)
+        from scipy.spatial.distance import cdist
 
-        return new_dset
+        mask = load_niimg(mask)
 
-    def merge(self, right):
-        """Merge two Datasets.
+        dset_mask = dataset.masker.mask_img
+        if not np.array_equal(dset_mask.affine, mask.affine):
+            LGR.warning("Mask affine does not match Dataset affine. Assuming same space.")
 
-        .. versionadded:: 0.0.9
+        dset_ijk = mm2vox(dataset.coordinates[["x", "y", "z"]].values, mask.affine)
+        mask_ijk = np.vstack(np.where(mask.get_fdata())).T
+        distances = cdist(mask_ijk, dset_ijk)
+        distances = np.any(distances == 0, axis=0)
+        found_ids = list(dataset.coordinates.loc[distances, "id"].unique())
+        return found_ids
+
+    def get_studies_by_coordinate(self, dataset, xyz, r=20):
+        """Extract list of studies with at least one focus within radius of requested coordinates.
 
         Parameters
         ----------
-        right : :obj:`~nimare.dataset.Dataset`
-            Dataset to merge with.
+        xyz : (X x 3) array_like
+            List of coordinates against which to find studies.
+        r : :obj:`float`, optional
+            Radius (in mm) within which to find studies. Default is 20mm.
 
         Returns
         -------
-        :obj:`~nimare.dataset.Dataset`
-            A Dataset of the two merged Datasets.
+        found_ids : :obj:`list`
+            A list of IDs from the Dataset with at least one focus within
+            radius r of requested coordinates.
         """
-        assert isinstance(right, Dataset)
-        shared_ids = np.intersect1d(self.ids, right.ids)
-        if shared_ids.size:
-            raise Exception("Duplicate IDs detected in both datasets.")
+        from scipy.spatial.distance import cdist
 
-        all_ids = np.concatenate((self.ids, right.ids))
-        new_dset = copy.deepcopy(self)
-        new_dset._ids = all_ids
+        xyz = np.array(xyz)
+        assert xyz.shape[1] == 3 and xyz.ndim == 2
+        distances = cdist(xyz, dataset.coordinates[["x", "y", "z"]].values)
+        distances = np.any(distances <= r, axis=0)
+        found_ids = list(dataset.coordinates.loc[distances, "id"].unique())
+        return found_ids
 
-        for attribute in ("annotations", "coordinates", "images", "metadata", "texts"):
-            df1 = getattr(self, attribute)
-            df2 = getattr(right, attribute)
-            new_df = df1.append(df2, ignore_index=True, sort=False)
-            new_df.sort_values(by="id", inplace=True)
-            new_df.reset_index(drop=True, inplace=True)
-            new_df = new_df.where(~new_df.isna(), None)
-            setattr(new_dset, attribute, new_df)
 
-        new_dset.coordinates = _transform_coordinates_to_space(
-            new_dset.coordinates,
-            self.masker,
-            self.space,
-        )
+class Dataset(NiMAREBase):
+    """Storage container for a coordinate- and/or image-based meta-analytic dataset/database.
 
-        return new_dset
+    .. versionchanged:: 0.0.9
 
-    def update_path(self, new_path):
-        """Update paths to images.
+        * [ENH] Add merge method to Dataset class
 
-        Prepends new path to the relative path for files in Dataset.images.
+    .. versionchanged:: 0.0.8
 
-        Parameters
-        ----------
-        new_path : :obj:`str`
-            Path to prepend to relative paths of files in Dataset.images.
-        """
-        self.basepath = op.abspath(new_path)
-        df = self.images
-        relative_path_cols = [c for c in df if c.endswith("__relative")]
-        for col in relative_path_cols:
-            abs_col = col.replace("__relative", "")
-            if abs_col in df.columns:
-                LGR.info(f"Overwriting images column {abs_col}")
-            df[abs_col] = df[col].apply(_try_prepend, prefix=self.basepath)
-        self.images = df
+        * [FIX] Set ``nimare.dataset.Dataset.basepath`` in :func:`update_path` using absolute path.
 
-    def copy(self):
-        """Create a copy of the Dataset."""
-        return copy.deepcopy(self)
+    Parameters
+    ----------
+    source : :obj:`str` or :obj:`dict`
+        JSON file containing dictionary with database information or the dict()
+        object
 
-    def get(self, dict_, drop_invalid=True):
-        """Retrieve files and/or metadata from the current Dataset.
+    target : :obj:`str`, optional
+        Desired coordinate space for coordinates. Names follow NIDM convention.
+        Default is 'mni152_2mm' (MNI space with 2x2x2 voxels).
+        This parameter has no impact on images.
 
-        Parameters
-        ----------
-        dict_ : :obj:`dict`
-            Dictionary specifying images or metadata to collect.
-            Keys should be variables to be used as keys for results dictionary.
-            Values should be tuples with two values:
-            type (e.g., 'image' or 'metadata') and specific field corresponding
-            to column of type-specific DataFrame (e.g., 'z' or 'sample_sizes').
-        drop_invalid : :obj:`bool`, optional
-            Whether to automatically ignore any studies without the required data or not.
-            Default is False.
+    mask : :obj:`str`, :class:`~nibabel.nifti1.Nifti1Image`, \
+    :class:`~nilearn.input_data.NiftiMasker` or similar, or None, optional
+        Mask(er) to use. If None, uses the target space image, with all
+        non-zero voxels included in the mask.
 
-        Returns
-        -------
-        results : :obj:`dict`
-            A dictionary of lists of requested data. Keys correspond to the keys in ``dict_``.
+    Attributes
+    ----------
+    space : :obj:`str`
+        Standard space. Same as ``target`` parameter.
 
-        Examples
-        --------
-        >>> dset.get({'z_maps': ('image', 'z'), 'sample_sizes': ('metadata', 'sample_sizes')})
-        >>> dset.get({'coordinates': ('coordinates', None)})
-        """
-        results = {}
-        results["id"] = self.ids
-        keep_idx = np.arange(len(self.ids), dtype=int)
-        for k, vals in dict_.items():
-            if vals[0] == "image":
-                temp = self.get_images(imtype=vals[1])
-            elif vals[0] == "metadata":
-                temp = self.get_metadata(field=vals[1])
-            elif vals[0] == "coordinates":
-                # Break DataFrame down into a list of study-specific DataFrames
-                temp = [self.coordinates.loc[self.coordinates["id"] == id_] for id_ in self.ids]
-                # Replace empty DataFrames with Nones
-                temp = [t if t.size else None for t in temp]
-            elif vals[0] == "annotations":
-                # Break DataFrame down into a list of study-specific DataFrames
-                temp = [self.annotations.loc[self.annotations["id"] == id_] for id_ in self.ids]
-                # Replace empty DataFrames with Nones
-                temp = [t if t.size else None for t in temp]
-            else:
-                raise ValueError(f"Input '{vals[0]}' not understood.")
+    Notes
+    -----
+    Images loaded into a Dataset are assumed to be in the same space.
+    If images have different resolutions or affines from the Dataset's masker,
+    then they will be resampled automatically, at the point where they're used,
+    by :obj:`Dataset.masker`.
+    """
 
-            results[k] = temp
-            temp_keep_idx = np.where([t is not None for t in temp])[0]
-            keep_idx = np.intersect1d(keep_idx, temp_keep_idx)
+    _id_cols = ["id", "study_id", "contrast_id"]
 
-        # reduce
-        if drop_invalid and (len(keep_idx) != len(self.ids)):
-            LGR.info(f"Retaining {len(keep_idx)}/{len(self.ids)} studies")
-        elif len(keep_idx) != len(self.ids):
-            raise Exception(
-                f"Only {len(keep_idx)}/{len(self.ids)} in Dataset contain the necessary data. "
-                "If you want to analyze the subset of studies with required data, "
-                "set `drop_invalid` to True."
-            )
+    def __init__(self, source, target="mni152_2mm", mask=None):
+        if isinstance(source, str):
+            with open(source, "r") as f_obj:
+                data = json.load(f_obj)
+        elif isinstance(source, dict):
+            data = source
+        else:
+            raise Exception("`source` needs to be a file path or a dictionary")
 
-        for k in results:
-            results[k] = [results[k][i] for i in keep_idx]
-            if dict_.get(k, [None])[0] in ("coordinates", "annotations"):
-                results[k] = pd.concat(results[k])
+        # Datasets are organized by study, then experiment
+        # To generate unique IDs, we combine study ID with experiment ID
+        # build list of ids
+        id_columns = ["id", "study_id", "contrast_id"]
+        all_ids = []
+        for pid in data.keys():
+            for expid in data[pid]["contrasts"].keys():
+                id_ = f"{pid}-{expid}"
+                all_ids.append([id_, pid, expid])
+        id_df = pd.DataFrame(columns=id_columns, data=all_ids)
+        id_df = id_df.set_index("id", drop=False)
+        self._ids = id_df.index.values
 
-        return results
+        # Set up Masker
+        if mask is None:
+            mask = get_template(target, mask="brain")
+        self.masker = mask
+        self.space = target
 
-    def _generic_column_getter(self, attr, ids=None, column=None, ignore_columns=None):
-        """Extract information from DataFrame-based attributes.
+        self.annotations = _dict_to_df(id_df, data, key="labels")
+        self.coordinates = _dict_to_coordinates(data, masker=self.masker, space=self.space)
+        self.images = _dict_to_df(id_df, data, key="images")
+        self.metadata = _dict_to_df(id_df, data, key="metadata")
+        self.texts = _dict_to_df(id_df, data, key="text")
+        self.basepath = None
 
-        Parameters
-        ----------
-        attr : :obj:`str`
-            The name of the DataFrame-format Dataset attribute to search.
-        ids : :obj:`list` or None, optional
-            A list of study IDs within which to extract values.
-            If None, extract values for all studies in the Dataset.
-            Default is None.
-        column : :obj:`str` or None, optional
-            The column from which to extract values.
-            If None, a list of all columns with valid values will be returned.
-            Must be a column within Dataset.[attr].
-        ignore_columns : :obj:`list` or None, optional
-            A list of columns to ignore. Only used if ``column`` is None.
+    def __repr__(self):
+        """Show basic Dataset representation.
 
-        Returns
-        -------
-        result : :obj:`list` or :obj:`str`
-            A list of values or a string, depending on if ids is a list (or None) or a string.
+        It's basically the same as the NiMAREBase representation, but with the number of
+        experiments in the Dataset represented as well.
         """
-        if ignore_columns is None:
-            ignore_columns = self._id_cols
-        else:
-            ignore_columns += self._id_cols
+        # Get default parameter values for the object
+        signature = inspect.signature(self.__init__)
+        defaults = {
+            k: v.default
+            for k, v in signature.parameters.items()
+            if v.default is not inspect.Parameter.empty
+        }
 
-        df = getattr(self, attr)
-        return_first = False
+        # Eliminate any sub-parameters (e.g., parameters for a MetaEstimator's KernelTransformer),
+        # as well as default values
+        params = self.get_params()
+        params = {k: v for k, v in params.items() if "__" not in k}
+        # Parameter "target" is stored as attribute "space"
+        # and we want to show it regardless of whether it's the default or not
+        params["space"] = self.space
+        params.pop("target")
+        params = {k: v for k, v in params.items() if defaults.get(k) != v}
 
-        if isinstance(ids, str) and column is not None:
-            return_first = True
-        ids = _listify(ids)
+        # Convert to strings
+        param_strs = []
+        for k, v in params.items():
+            if isinstance(v, str):
+                # Wrap string values in single quotes
+                param_str = f"{k}='{v}'"
+            else:
+                # Keep everything else as-is based on its own repr
+                param_str = f"{k}={v}"
+            param_strs.append(param_str)
 
-        available_types = [c for c in df.columns if c not in self._id_cols]
-        if (column is not None) and (column not in available_types):
-            raise ValueError(
-                f"{column} not found in {attr}.\nAvailable types: {', '.join(available_types)}"
-            )
+        params_str = ", ".join(param_strs)
+        params_str = f"{len(self.ids)} experiments{', ' if params_str else ''}{params_str}"
+        rep = f"{self.__class__.__name__}({params_str})"
+        return rep
 
-        if column is not None:
-            if ids is not None:
-                result = df[column].loc[df["id"].isin(ids)].tolist()
-            else:
-                result = df[column].tolist()
-        else:
-            if ids is not None:
-                result = {v: df[v].loc[df["id"].isin(ids)].tolist() for v in available_types}
-                result = {k: v for k, v in result.items() if any(v)}
-            else:
-                result = {v: df[v].tolist() for v in available_types}
-            result = list(result.keys())
+    @property
+    def ids(self):
+        """numpy.ndarray: 1D array of identifiers in Dataset.
 
-        if return_first:
-            return result[0]
-        else:
-            return result
+        The associated setter for this property is private, as ``Dataset.ids`` is immutable.
+        """
+        return self.__ids
+
+    @ids.setter
+    def _ids(self, ids):
+        ids = np.sort(np.asarray(ids))
+        assert isinstance(ids, np.ndarray) and ids.ndim == 1
+        self.__ids = ids
+
+    @property
+    def masker(self):
+        """:class:`nilearn.input_data.NiftiMasker` or similar: Masker object.
+
+        Defines the space and location of the area of interest (e.g., 'brain').
+        """
+        return self.__masker
 
-    def get_labels(self, ids=None):
-        """Extract list of labels for which studies in Dataset have annotations.
+    @masker.setter
+    def masker(self, mask):
+        mask = get_masker(mask)
+        if hasattr(self, "masker") and not np.array_equal(
+            self.masker.mask_img.affine, mask.mask_img.affine
+        ):
+            # This message does not have an associated effect,
+            # since matrix indices are calculated as necessary
+            LGR.warning("New masker does not match old masker. Space is assumed to be the same.")
 
-        Parameters
-        ----------
-        ids : :obj:`list`, optional
-            A list of IDs in the Dataset for which to find labels. Default is
-            None, in which case all labels are returned.
+        self.__masker = mask
 
-        Returns
-        -------
-        labels : :obj:`list`
-            List of labels for which there are annotations in the Dataset.
+    @property
+    def annotations(self):
+        """:class:`pandas.DataFrame`: Labels describing studies in the dataset.
+
+        Each study/experiment has its own row.
+        Columns correspond to individual labels (e.g., 'emotion'), and may
+        be prefixed with a feature group including two underscores
+        (e.g., 'Neurosynth_TFIDF__emotion').
         """
-        if not isinstance(ids, list) and ids is not None:
-            ids = _listify(ids)
+        return self.__annotations
 
-        result = [c for c in self.annotations.columns if c not in self._id_cols]
-        if ids is not None:
-            temp_annotations = self.annotations.loc[self.annotations["id"].isin(ids)]
-            res = temp_annotations[result].any(axis=0)
-            result = res.loc[res].index.tolist()
+    @annotations.setter
+    def annotations(self, df):
+        _validate_df(df)
+        self.__annotations = df.sort_values(by="id")
 
-        return result
+    @property
+    def coordinates(self):
+        """:class:`pandas.DataFrame`: Coordinates in the dataset.
 
-    def get_texts(self, ids=None, text_type=None):
-        """Extract list of texts of a given type for selected IDs.
+        .. versionchanged:: 0.0.10
 
-        Parameters
-        ----------
-        ids : :obj:`list`, optional
-            A list of IDs in the Dataset for which to find texts. Default is
-            None, in which case all texts of requested type are returned.
-        text_type : :obj:`str`, optional
-            Type of text to extract. Corresponds to column name in
-            Dataset.texts DataFrame. Default is None.
+            The coordinates attribute no longer includes the associated matrix indices
+            (columns 'i', 'j', and 'k'). These columns are calculated as needed.
 
-        Returns
-        -------
-        texts : :obj:`list`
-            List of texts of requested type for selected IDs.
+        Each study has one row for each peak.
+        Columns include ['x', 'y', 'z'] (peak locations in mm) and 'space' (Dataset's space).
         """
-        result = self._generic_column_getter("texts", ids=ids, column=text_type)
-        return result
+        return self.__coordinates
 
-    def get_metadata(self, ids=None, field=None):
-        """Get metadata from Dataset.
+    @coordinates.setter
+    def coordinates(self, df):
+        _validate_df(df)
+        self.__coordinates = df.sort_values(by="id")
 
-        Parameters
-        ----------
-        ids : :obj:`list`, optional
-            A list of IDs in the Dataset for which to find metadata. Default is
-            None, in which case all metadata of requested type are returned.
-        field : :obj:`str`, optional
-            Metadata field to extract. Corresponds to column name in
-            Dataset.metadata DataFrame. Default is None.
+    @property
+    def images(self):
+        """:class:`pandas.DataFrame`: Images in the dataset.
 
-        Returns
-        -------
-        metadata : :obj:`list`
-            List of values of requested type for selected IDs.
+        Each image type has its own column (e.g., 'z') with absolute paths to
+        files and each study has its own row.
+        Additionally, relative paths to image files are stored in columns with
+        the suffix '__relative' (e.g., 'z__relative').
+
+        Warnings
+        --------
+        Images are assumed to be in the same space, although they may have
+        different resolutions and affines. Images will be resampled as needed
+        at the point where they are used, via :obj:`Dataset.masker`.
         """
-        result = self._generic_column_getter("metadata", ids=ids, column=field)
-        return result
+        return self.__images
 
-    def get_images(self, ids=None, imtype=None):
-        """Get images of a certain type for a subset of studies in the dataset.
+    @images.setter
+    def images(self, df):
+        _validate_df(df)
+        self.__images = _validate_images_df(df).sort_values(by="id")
 
-        Parameters
-        ----------
-        ids : :obj:`list`, optional
-            A list of IDs in the Dataset for which to find images. Default is
-            None, in which case all images of requested type are returned.
-        imtype : :obj:`str`, optional
-            Type of image to extract. Corresponds to column name in
-            Dataset.images DataFrame. Default is None.
+    @property
+    def metadata(self):
+        """:class:`pandas.DataFrame`: Metadata describing studies in the dataset.
 
-        Returns
-        -------
-        images : :obj:`list`
-            List of images of requested type for selected IDs.
+        Each metadata field has its own column (e.g., 'sample_sizes') and each study
+        has its own row.
         """
-        ignore_columns = ["space"]
-        ignore_columns += [c for c in self.images.columns if c.endswith("__relative")]
-        result = self._generic_column_getter(
-            "images",
-            ids=ids,
-            column=imtype,
-            ignore_columns=ignore_columns,
-        )
-        return result
+        return self.__metadata
 
-    def get_studies_by_label(self, labels=None, label_threshold=0.001):
-        """Extract list of studies with a given label.
+    @metadata.setter
+    def metadata(self, df):
+        _validate_df(df)
+        self.__metadata = df.sort_values(by="id")
 
-        .. versionchanged:: 0.0.10
+    @property
+    def texts(self):
+        """:class:`pandas.DataFrame`: Texts in the dataset.
 
-            Fix bug in which all IDs were returned when a label wasn't present in the Dataset.
+        Each text type has its own column (e.g., 'abstract') and each study
+        has its own row.
+        """
+        return self.__texts
 
-        .. versionchanged:: 0.0.9
+    @texts.setter
+    def texts(self, df):
+        _validate_df(df)
+        self.__texts = df.sort_values(by="id")
 
-            Default value for label_threshold changed to 0.001.
+    def slice(self, ids):
+        """Create a new dataset with only requested IDs.
 
         Parameters
         ----------
-        labels : :obj:`list`, optional
-            List of labels to use to search Dataset. If a contrast has all of
-            the labels above the threshold, it will be returned.
-            Default is None.
-        label_threshold : :obj:`float`, optional
-            Default is 0.5.
+        ids : array_like
+            List of study IDs to include in new dataset
 
         Returns
         -------
-        found_ids : :obj:`list`
-            A list of IDs from the Dataset found by the search criteria.
+        new_dset : :obj:`~nimare.dataset.Dataset`
+            Reduced Dataset containing only requested studies.
         """
-        if isinstance(labels, str):
-            labels = [labels]
-        elif not isinstance(labels, list):
-            raise ValueError(f"Argument 'labels' cannot be {type(labels)}")
-
-        missing_labels = [label for label in labels if label not in self.annotations.columns]
-        if missing_labels:
-            raise ValueError(f"Missing label(s): {', '.join(missing_labels)}")
+        new_dset = copy.deepcopy(self)
+        new_dset._ids = ids
+        for attribute in ("annotations", "coordinates", "images", "metadata", "texts"):
+            df = getattr(new_dset, attribute)
+            df = df.loc[df["id"].isin(ids)]
+            setattr(new_dset, attribute, df)
 
-        temp_annotations = self.annotations[self._id_cols + labels]
-        found_rows = (temp_annotations[labels] >= label_threshold).all(axis=1)
-        if any(found_rows):
-            found_ids = temp_annotations.loc[found_rows, "id"].tolist()
-        else:
-            found_ids = []
+        return new_dset
 
-        return found_ids
+    def merge(self, right):
+        """Merge two Datasets.
 
-    def get_studies_by_mask(self, mask):
-        """Extract list of studies with at least one coordinate in mask.
+        .. versionadded:: 0.0.9
 
         Parameters
         ----------
-        mask : img_like
-            Mask across which to search for coordinates.
+        right : :obj:`~nimare.dataset.Dataset`
+            Dataset to merge with.
 
         Returns
         -------
-        found_ids : :obj:`list`
-            A list of IDs from the Dataset with at least one focus in the mask.
+        :obj:`~nimare.dataset.Dataset`
+            A Dataset of the two merged Datasets.
         """
-        from scipy.spatial.distance import cdist
+        assert isinstance(right, Dataset)
+        shared_ids = np.intersect1d(self.ids, right.ids)
+        if shared_ids.size:
+            raise Exception("Duplicate IDs detected in both datasets.")
 
-        mask = load_niimg(mask)
+        all_ids = np.concatenate((self.ids, right.ids))
+        new_dset = copy.deepcopy(self)
+        new_dset._ids = all_ids
 
-        dset_mask = self.masker.mask_img
-        if not np.array_equal(dset_mask.affine, mask.affine):
-            LGR.warning("Mask affine does not match Dataset affine. Assuming same space.")
+        for attribute in ("annotations", "coordinates", "images", "metadata", "texts"):
+            df1 = getattr(self, attribute)
+            df2 = getattr(right, attribute)
+            new_df = df1.append(df2, ignore_index=True, sort=False)
+            new_df.sort_values(by="id", inplace=True)
+            new_df.reset_index(drop=True, inplace=True)
+            new_df = new_df.where(~new_df.isna(), None)
+            setattr(new_dset, attribute, new_df)
 
-        dset_ijk = mm2vox(self.coordinates[["x", "y", "z"]].values, mask.affine)
-        mask_ijk = np.vstack(np.where(mask.get_fdata())).T
-        distances = cdist(mask_ijk, dset_ijk)
-        distances = np.any(distances == 0, axis=0)
-        found_ids = list(self.coordinates.loc[distances, "id"].unique())
-        return found_ids
+        new_dset.coordinates = _transform_coordinates_to_space(
+            new_dset.coordinates,
+            self.masker,
+            self.space,
+        )
 
-    def get_studies_by_coordinate(self, xyz, r=20):
-        """Extract list of studies with at least one focus within radius of requested coordinates.
+        return new_dset
+
+    def update_path(self, new_path):
+        """Update paths to images.
+
+        Prepends new path to the relative path for files in Dataset.images.
 
         Parameters
         ----------
-        xyz : (X x 3) array_like
-            List of coordinates against which to find studies.
-        r : :obj:`float`, optional
-            Radius (in mm) within which to find studies. Default is 20mm.
-
-        Returns
-        -------
-        found_ids : :obj:`list`
-            A list of IDs from the Dataset with at least one focus within
-            radius r of requested coordinates.
+        new_path : :obj:`str`
+            Path to prepend to relative paths of files in Dataset.images.
         """
-        from scipy.spatial.distance import cdist
+        self.basepath = op.abspath(new_path)
+        df = self.images
+        relative_path_cols = [c for c in df if c.endswith("__relative")]
+        for col in relative_path_cols:
+            abs_col = col.replace("__relative", "")
+            if abs_col in df.columns:
+                LGR.info(f"Overwriting images column {abs_col}")
+            df[abs_col] = df[col].apply(_try_prepend, prefix=self.basepath)
+        self.images = df
 
-        xyz = np.array(xyz)
-        assert xyz.shape[1] == 3 and xyz.ndim == 2
-        distances = cdist(xyz, self.coordinates[["x", "y", "z"]].values)
-        distances = np.any(distances <= r, axis=0)
-        found_ids = list(self.coordinates.loc[distances, "id"].unique())
-        return found_ids
+    def copy(self):
+        """Create a copy of the Dataset."""
+        return copy.deepcopy(self)

From 1a9c1a6f28c8744429f61156991266283e05dd32 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 25 Apr 2022 11:07:29 -0400
Subject: [PATCH 02/18] Incorporate new searcher into Estimator.

---
 nimare/base.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/nimare/base.py b/nimare/base.py
index 63ab94c80..1bcb88959 100644
--- a/nimare/base.py
+++ b/nimare/base.py
@@ -13,8 +13,9 @@
 from nilearn._utils.niimg_conversions import _check_same_fov
 from nilearn.image import concat_imgs, resample_to_img
 
-from .results import MetaResult
-from .utils import get_masker, mm2vox
+from nimare.dataset import DatasetSearcher
+from nimare.results import MetaResult
+from nimare.utils import get_masker, mm2vox
 
 LGR = logging.getLogger(__name__)
 
@@ -259,7 +260,8 @@ def _validate_input(self, dataset, drop_invalid=True):
             )
 
         if self._required_inputs:
-            data = dataset.get(self._required_inputs, drop_invalid=drop_invalid)
+            searcher = DatasetSearcher()
+            data = searcher.get(dataset, self._required_inputs, drop_invalid=drop_invalid)
             # Do not overwrite existing inputs_ attribute.
             # This is necessary for PairwiseCBMAEstimator, which validates two sets of coordinates
             # in the same object.
@@ -484,7 +486,8 @@ def _validate_input(self, dataset, drop_invalid=True):
             )
 
         if self._required_inputs:
-            data = dataset.get(self._required_inputs, drop_invalid=drop_invalid)
+            searcher = DatasetSearcher()
+            data = searcher.get(dataset, self._required_inputs, drop_invalid=drop_invalid)
             # Do not overwrite existing inputs_ attribute.
             # This is necessary for PairwiseCBMAEstimator, which validates two sets of coordinates
             # in the same object.

From afca5f82721a15237ef8f32f2c3dec45586cd7f3 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 25 Apr 2022 11:07:36 -0400
Subject: [PATCH 03/18] Update docstring example.

---
 nimare/dataset.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/nimare/dataset.py b/nimare/dataset.py
index b98c20bba..07921c31e 100755
--- a/nimare/dataset.py
+++ b/nimare/dataset.py
@@ -51,8 +51,11 @@ def get(self, dataset, dict_, drop_invalid=True):
 
         Examples
         --------
-        >>> dset.get({'z_maps': ('image', 'z'), 'sample_sizes': ('metadata', 'sample_sizes')})
-        >>> dset.get({'coordinates': ('coordinates', None)})
+        >>> searcher = DatasetSearcher()
+        >>> searcher.get(
+        >>>    dset, {'z_maps': ('image', 'z'), 'sample_sizes': ('metadata', 'sample_sizes')}
+        >>> )
+        >>> searcher.get(dset, {'coordinates': ('coordinates', None)})
         """
         results = {}
         results["id"] = dataset.ids

From 32bd9dbd1e6706081bcef7eede4b4eaef3070627 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 25 Apr 2022 11:07:50 -0400
Subject: [PATCH 04/18] Update get_labels.

---
 nimare/tests/test_decode_continuous.py |  7 +++++--
 nimare/tests/test_decode_discrete.py   | 10 +++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/nimare/tests/test_decode_continuous.py b/nimare/tests/test_decode_continuous.py
index c4a31738d..ee70239aa 100644
--- a/nimare/tests/test_decode_continuous.py
+++ b/nimare/tests/test_decode_continuous.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import pytest
 
+from nimare.dataset import DatasetSearcher
 from nimare.decode import continuous
 from nimare.meta import kernel, mkda
 
@@ -12,7 +13,8 @@
 def test_CorrelationDecoder_smoke(testdata_laird):
     """Smoke test for continuous.CorrelationDecoder."""
     testdata_laird = testdata_laird.copy()
-    features = testdata_laird.get_labels(ids=testdata_laird.ids[0])[:5]
+    searcher = DatasetSearcher()
+    features = searcher.get_labels(testdata_laird, ids=testdata_laird.ids[0])[:5]
     decoder = continuous.CorrelationDecoder(features=features)
     decoder.fit(testdata_laird)
 
@@ -29,7 +31,8 @@ def test_CorrelationDistributionDecoder_smoke(testdata_laird, tmp_path_factory):
     tmpdir = tmp_path_factory.mktemp("test_CorrelationDistributionDecoder")
 
     testdata_laird = testdata_laird.copy()
-    features = testdata_laird.get_labels(ids=testdata_laird.ids[0])[:5]
+    searcher = DatasetSearcher()
+    features = searcher.get_labels(testdata_laird, ids=testdata_laird.ids[0])[:5]
 
     decoder = continuous.CorrelationDistributionDecoder(features=features)
 
diff --git a/nimare/tests/test_decode_discrete.py b/nimare/tests/test_decode_discrete.py
index 17b65c771..d607dbddc 100644
--- a/nimare/tests/test_decode_discrete.py
+++ b/nimare/tests/test_decode_discrete.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import pytest
 
+from nimare.dataset import DatasetSearcher
 from nimare.decode import discrete
 
 
@@ -38,8 +39,9 @@ def test_brainmap_decode(testdata_laird):
 
 def test_NeurosynthDecoder(testdata_laird):
     """Smoke test for discrete.NeurosynthDecoder."""
+    searcher = DatasetSearcher()
     ids = testdata_laird.ids[:5]
-    labels = testdata_laird.get_labels(ids=testdata_laird.ids)
+    labels = searcher.get_labels(testdata_laird, ids=testdata_laird.ids)
     decoder = discrete.NeurosynthDecoder(features=labels)
     decoder.fit(testdata_laird)
     decoded_df = decoder.transform(ids=ids)
@@ -65,8 +67,9 @@ def test_NeurosynthDecoder_featuregroup_failure(testdata_laird):
 
 def test_BrainMapDecoder(testdata_laird):
     """Smoke test for discrete.BrainMapDecoder."""
+    searcher = DatasetSearcher()
     ids = testdata_laird.ids[:5]
-    labels = testdata_laird.get_labels(ids=testdata_laird.ids)
+    labels = searcher.get_labels(testdata_laird, ids=testdata_laird.ids)
     decoder = discrete.BrainMapDecoder(features=labels)
     decoder.fit(testdata_laird)
     decoded_df = decoder.transform(ids=ids)
@@ -83,7 +86,8 @@ def test_BrainMapDecoder_failure(testdata_laird):
 
 def test_ROIAssociationDecoder(testdata_laird, roi_img):
     """Smoke test for discrete.ROIAssociationDecoder."""
-    labels = testdata_laird.get_labels(ids=testdata_laird.ids)
+    searcher = DatasetSearcher()
+    labels = searcher.get_labels(testdata_laird, ids=testdata_laird.ids)
     decoder = discrete.ROIAssociationDecoder(masker=roi_img, features=labels)
     decoder.fit(testdata_laird)
     decoded_df = decoder.transform()

From 0f11583b6e77ad99cc58c03a7a816eaf5a050fba Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 25 Apr 2022 11:10:59 -0400
Subject: [PATCH 05/18] Update get_metadata.

---
 nimare/dataset.py       |  4 ++--
 nimare/utils.py         | 10 ++++++----
 nimare/workflows/ale.py |  8 +++++---
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/nimare/dataset.py b/nimare/dataset.py
index 07921c31e..fad67adb4 100755
--- a/nimare/dataset.py
+++ b/nimare/dataset.py
@@ -62,9 +62,9 @@ def get(self, dataset, dict_, drop_invalid=True):
         keep_idx = np.arange(len(dataset.ids), dtype=int)
         for k, vals in dict_.items():
             if vals[0] == "image":
-                temp = dataset.get_images(imtype=vals[1])
+                temp = self.get_images(dataset, imtype=vals[1])
             elif vals[0] == "metadata":
-                temp = dataset.get_metadata(field=vals[1])
+                temp = self.get_metadata(dataset, field=vals[1])
             elif vals[0] == "coordinates":
                 # Break DataFrame down into a list of study-specific DataFrames
                 temp = [
diff --git a/nimare/utils.py b/nimare/utils.py
index c5ae8d4e9..c04c609e7 100755
--- a/nimare/utils.py
+++ b/nimare/utils.py
@@ -15,8 +15,9 @@
 import pandas as pd
 from nilearn.input_data import NiftiMasker
 
-from . import references
-from .due import due
+from nimare import references
+from nimare.dataset import DatasetSearcher
+from nimare.due import due
 
 LGR = logging.getLogger(__name__)
 
@@ -841,10 +842,11 @@ def _add_metadata_to_dataframe(
         Updated DataFrame with ``target_column`` added.
     """
     dataframe = dataframe.copy()
+    searcher = DatasetSearcher()
 
-    if metadata_field in dataset.get_metadata():
+    if metadata_field in searcher.get_metadata(dataset):
         # Collect metadata from Dataset
-        metadata = dataset.get_metadata(field=metadata_field, ids=dataset.ids)
+        metadata = searcher.get_metadata(dataset, field=metadata_field, ids=dataset.ids)
         metadata = [[m] for m in metadata]
         # Create a DataFrame with the metadata
         metadata = pd.DataFrame(
diff --git a/nimare/workflows/ale.py b/nimare/workflows/ale.py
index acc572d0a..54d56d7fe 100644
--- a/nimare/workflows/ale.py
+++ b/nimare/workflows/ale.py
@@ -7,6 +7,7 @@
 import numpy as np
 
 from nimare.correct import FWECorrector
+from nimare.dataset import DatasetSearcher
 from nimare.diagnostics import FocusCounter
 from nimare.io import convert_sleuth_to_dataset
 from nimare.meta import ALE, ALESubtraction
@@ -26,6 +27,7 @@ def ale_sleuth_workflow(
 ):
     """Perform ALE meta-analysis from Sleuth text file."""
     LGR.info("Loading coordinates...")
+    searcher = DatasetSearcher()
 
     if fwhm:
         fwhm_str = f"of {fwhm} mm"
@@ -34,7 +36,7 @@ def ale_sleuth_workflow(
 
     if not sleuth_file2:
         dset = convert_sleuth_to_dataset(sleuth_file, target="ale_2mm")
-        n_subs = dset.get_metadata(field="sample_sizes")
+        n_subs = searcher.get_metadata(dset, field="sample_sizes")
         n_subs = np.sum(n_subs)
 
         boilerplate = """
@@ -115,9 +117,9 @@ def ale_sleuth_workflow(
     else:
         dset1 = convert_sleuth_to_dataset(sleuth_file, target="ale_2mm")
         dset2 = convert_sleuth_to_dataset(sleuth_file2, target="ale_2mm")
-        n_subs1 = dset1.get_metadata(field="sample_sizes")
+        n_subs1 = searcher.get_metadata(dset1, field="sample_sizes")
         n_subs1 = np.sum(n_subs1)
-        n_subs2 = dset2.get_metadata(field="sample_sizes")
+        n_subs2 = searcher.get_metadata(dset2, field="sample_sizes")
         n_subs2 = np.sum(n_subs2)
 
         boilerplate = """

From 6c8ef33e751038238e284515a668f6bbbc3ca0a9 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 25 Apr 2022 11:15:36 -0400
Subject: [PATCH 06/18] Update get_images.

---
 examples/01_datasets/01_plot_dataset_io.py | 15 ++++++++------
 nimare/base.py                             |  4 +++-
 nimare/meta/kernel.py                      | 23 ++++++++++++++++------
 nimare/tests/test_workflows.py             |  7 +++++--
 4 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/examples/01_datasets/01_plot_dataset_io.py b/examples/01_datasets/01_plot_dataset_io.py
index f83a214d2..9dcb9c962 100644
--- a/examples/01_datasets/01_plot_dataset_io.py
+++ b/examples/01_datasets/01_plot_dataset_io.py
@@ -14,7 +14,7 @@
 # -----------------------------------------------------------------------------
 import os
 
-from nimare.dataset import Dataset
+from nimare.dataset import Dataset, DatasetSearcher
 from nimare.extract import download_nidm_pain
 from nimare.transforms import ImageTransformer
 from nimare.utils import get_resource_path
@@ -127,8 +127,11 @@
 dset.images[["id", "varcope"]].head()
 
 ###############################################################################
-# Datasets support many search methods
+# The DatasetSearcher class can search Datasets
 # -----------------------------------------------------------------------------
+searcher = DatasetSearcher()
+
+###############################################################################
 # There are ``get_[X]`` and ``get_studies_by_[X]`` methods for a range of
 # possible search criteria.
 # The ``get_[X]`` methods allow you to search for specific metadata, while the
@@ -139,7 +142,7 @@
 # by default, and for every requested study if the ``ids`` argument is provided.
 # If a study does not have the data requested, the returned list will have
 # ``None`` for that study.
-z_images = dset.get_images(imtype="z")
+z_images = searcher.get_images(dset, imtype="z")
 z_images = [str(z) for z in z_images]
 print("\n".join(z_images))
 
@@ -148,16 +151,16 @@
 # `````````````````````````````````````````````````````````````````````````````
 z_transformer = ImageTransformer(target="z")
 dset = z_transformer.transform(dset)
-z_images = dset.get_images(imtype="z")
+z_images = searcher.get_images(dset, imtype="z")
 z_images = [str(z) for z in z_images]
 print("\n".join(z_images))
 
 ###############################################################################
-# Datasets can also search for studies matching criteria
+# DatasetSearchers can also search for studies matching criteria
 # -----------------------------------------------------------------------------
 # ``get_studies_by_[X]`` methods return a list of study identifiers matching
 # the criteria, such as reporting a peak coordinate near a search coordinate.
-sel_studies = dset.get_studies_by_coordinate(xyz=[[0, 0, 0]], r=20)
+sel_studies = searcher.get_studies_by_coordinate(dset, xyz=[[0, 0, 0]], r=20)
 print("\n".join(sel_studies))
 
 ###############################################################################
diff --git a/nimare/base.py b/nimare/base.py
index 1bcb88959..e4f5e2da8 100644
--- a/nimare/base.py
+++ b/nimare/base.py
@@ -361,6 +361,7 @@ def __init__(self, *args, **kwargs):
     def _preprocess_input(self, dataset):
         """Preprocess inputs to the Estimator from the Dataset as needed."""
         masker = self.masker or dataset.masker
+        searcher = DatasetSearcher()
 
         mask_img = masker.mask_img or masker.labels_img
         if isinstance(mask_img, str):
@@ -420,7 +421,8 @@ def _preprocess_input(self, dataset):
                 if hasattr(self, "kernel_transformer"):
                     self.kernel_transformer._infer_names(affine=md5(mask_img.affine).hexdigest())
                     if self.kernel_transformer.image_type in dataset.images.columns:
-                        files = dataset.get_images(
+                        files = searcher.get_images(
+                            dataset,
                             ids=self.inputs_["id"],
                             imtype=self.kernel_transformer.image_type,
                         )
diff --git a/nimare/meta/kernel.py b/nimare/meta/kernel.py
index 31d73e8c8..862da96e1 100644
--- a/nimare/meta/kernel.py
+++ b/nimare/meta/kernel.py
@@ -16,17 +16,23 @@
 import pandas as pd
 from nilearn import image
 
-from .. import references
-from ..base import Transformer
-from ..due import due
-from ..utils import (
+from nimare import references
+from nimare.base import Transformer
+from nimare.dataset import DatasetSearcher
+from nimare.due import due
+from nimare.meta.utils import (
+    compute_ale_ma,
+    compute_kda_ma,
+    compute_p2m_ma,
+    get_ale_kernel,
+)
+from nimare.utils import (
     _add_metadata_to_dataframe,
     _safe_transform,
     mm2vox,
     use_memmap,
     vox2mm,
 )
-from .utils import compute_ale_ma, compute_kda_ma, compute_p2m_ma, get_ale_kernel
 
 LGR = logging.getLogger(__name__)
 
@@ -132,6 +138,7 @@ def transform(self, dataset, masker=None, return_type="image"):
             # but has different affine, from original IJK.
             coordinates[["i", "j", "k"]] = mm2vox(coordinates[["x", "y", "z"]], mask.affine)
         else:
+            searcher = DatasetSearcher()
             masker = dataset.masker if not masker else masker
             mask = masker.mask_img
             coordinates = dataset.coordinates.copy()
@@ -143,7 +150,11 @@ def transform(self, dataset, masker=None, return_type="image"):
             # Use coordinates to get IDs instead of Dataset.ids bc of possible
             # mismatch between full Dataset and contrasts with coordinates.
             if self.image_type in dataset.images.columns:
-                files = dataset.get_images(ids=coordinates["id"].unique(), imtype=self.image_type)
+                files = searcher.get_images(
+                    dataset,
+                    ids=coordinates["id"].unique(),
+                    imtype=self.image_type,
+                )
                 if all(f is not None for f in files):
                     LGR.debug("Files already exist. Using them.")
                     if return_type == "array":
diff --git a/nimare/tests/test_workflows.py b/nimare/tests/test_workflows.py
index 526ec90ac..0fffb4c52 100644
--- a/nimare/tests/test_workflows.py
+++ b/nimare/tests/test_workflows.py
@@ -2,6 +2,7 @@
 import os.path as op
 
 from nimare import cli, workflows
+from nimare.dataset import DatasetSearcher
 from nimare.tests.utils import get_test_data_path
 
 
@@ -127,7 +128,8 @@ def test_conperm_workflow_function_smoke(testdata_ibma, tmp_path_factory):
     """Run smoke test of the contrast permutation workflow as a function."""
     tmpdir = tmp_path_factory.mktemp("test_conperm_workflow_function_smoke")
     dset = testdata_ibma
-    files = dset.get_images(imtype="beta")
+    searcher = DatasetSearcher()
+    files = searcher.get_images(dset, imtype="beta")
     mask_image = op.join(get_test_data_path(), "test_pain_dataset", "mask.nii.gz")
     prefix = "test"
 
@@ -142,7 +144,8 @@ def test_conperm_workflow_cli_smoke(testdata_ibma, tmp_path_factory):
     """Run smoke test of the contrast permutation workflow as a CLI."""
     tmpdir = tmp_path_factory.mktemp("test_conperm_workflow_cli_smoke")
     dset = testdata_ibma
-    files = dset.get_images(imtype="beta")
+    searcher = DatasetSearcher()
+    files = searcher.get_images(dset, imtype="beta")
     mask_image = op.join(get_test_data_path(), "test_pain_dataset", "mask.nii.gz")
     prefix = "test"
 

From a4646854a1ab495f254a0d06300cf8c609b62390 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 25 Apr 2022 11:18:59 -0400
Subject: [PATCH 07/18] Update get_studies_by_mask.

---
 examples/02_meta-analyses/07_macm.py          |  5 ++--
 .../04_decoding/01_plot_discrete_decoders.py  |  5 ++--
 nimare/decode/continuous.py                   | 26 +++++++++++--------
 nimare/workflows/macm.py                      |  5 ++--
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/examples/02_meta-analyses/07_macm.py b/examples/02_meta-analyses/07_macm.py
index ee331d363..39f9ac032 100644
--- a/examples/02_meta-analyses/07_macm.py
+++ b/examples/02_meta-analyses/07_macm.py
@@ -17,7 +17,7 @@
 from nilearn import datasets, image, plotting
 
 from nimare.correct import FWECorrector
-from nimare.dataset import Dataset
+from nimare.dataset import Dataset, DatasetSearcher
 from nimare.meta.cbma.ale import SCALE
 from nimare.meta.cbma.mkda import MKDAChi2
 
@@ -44,7 +44,8 @@
 ###############################################################################
 # Select studies with a reported coordinate in the ROI
 # -----------------------------------------------------------------------------
-roi_ids = dset.get_studies_by_mask(roi_img)
+searcher = DatasetSearcher()
+roi_ids = searcher.get_studies_by_mask(dset, roi_img)
 dset_sel = dset.slice(roi_ids)
 print(f"{len(roi_ids)}/{len(dset.ids)} studies report at least one coordinate in the ROI")
 
diff --git a/examples/04_decoding/01_plot_discrete_decoders.py b/examples/04_decoding/01_plot_discrete_decoders.py
index 123eec0d3..890d09a9e 100644
--- a/examples/04_decoding/01_plot_discrete_decoders.py
+++ b/examples/04_decoding/01_plot_discrete_decoders.py
@@ -17,7 +17,7 @@
 import numpy as np
 from nilearn.plotting import plot_roi
 
-from nimare.dataset import Dataset
+from nimare.dataset import Dataset, DatasetSearcher
 from nimare.decode import discrete
 from nimare.utils import get_resource_path
 
@@ -40,7 +40,8 @@
 plot_roi(mask_img, draw_cross=False)
 
 # Get studies with voxels in the mask
-ids = dset.get_studies_by_mask(mask_img)
+searcher = DatasetSearcher()
+ids = searcher.get_studies_by_mask(dset, mask_img)
 
 ###############################################################################
 #
diff --git a/nimare/decode/continuous.py b/nimare/decode/continuous.py
index 457acf528..393d66c1a 100755
--- a/nimare/decode/continuous.py
+++ b/nimare/decode/continuous.py
@@ -8,14 +8,15 @@
 from nilearn.masking import apply_mask
 from tqdm.auto import tqdm
 
-from .. import references
-from ..base import Decoder
-from ..due import due
-from ..meta.cbma.base import CBMAEstimator
-from ..meta.cbma.mkda import MKDAChi2
-from ..stats import pearson
-from ..utils import _check_type, _safe_transform
-from .utils import weight_priors
+from nimare import references
+from nimare.base import Decoder
+from nimare.dataset import DatasetSearcher
+from nimare.decode.utils import weight_priors
+from nimare.due import due
+from nimare.meta.cbma.base import CBMAEstimator
+from nimare.meta.cbma.mkda import MKDAChi2
+from nimare.stats import pearson
+from nimare.utils import _check_type, _safe_transform
 
 LGR = logging.getLogger(__name__)
 
@@ -182,10 +183,12 @@ def _fit(self, dataset):
             Masked meta-analytic maps
         """
         self.masker = dataset.masker
+        searcher = DatasetSearcher()
 
         n_features = len(self.features_)
         for i_feature, feature in enumerate(tqdm(self.features_, total=n_features)):
-            feature_ids = dataset.get_studies_by_label(
+            feature_ids = searcher.get_studies_by_label(
+                dataset,
                 labels=[feature],
                 label_threshold=self.frequency_threshold,
             )
@@ -292,11 +295,12 @@ def _fit(self, dataset):
             Masked meta-analytic maps
         """
         self.masker = dataset.masker
+        searcher = DatasetSearcher()
 
         images_ = {}
         for feature in self.features_:
-            feature_ids = dataset.get_studies_by_label(
-                labels=[feature], label_threshold=self.frequency_threshold
+            feature_ids = searcher.get_studies_by_label(
+                dataset, labels=[feature], label_threshold=self.frequency_threshold
             )
             selected_ids = sorted(list(set(feature_ids).intersection(self.inputs_["id"])))
             selected_id_idx = [
diff --git a/nimare/workflows/macm.py b/nimare/workflows/macm.py
index 6f368c037..1c9fb2213 100644
--- a/nimare/workflows/macm.py
+++ b/nimare/workflows/macm.py
@@ -5,7 +5,7 @@
 from shutil import copyfile
 
 from ..correct import FWECorrector
-from ..dataset import Dataset
+from ..dataset import Dataset, DatasetSearcher
 from ..meta import ALE
 
 LGR = logging.getLogger(__name__)
@@ -17,7 +17,8 @@ def macm_workflow(
     """Perform MACM with ALE algorithm."""
     LGR.info("Loading coordinates...")
     dset = Dataset(dataset_file)
-    sel_ids = dset.get_studies_by_mask(mask_file)
+    searcher = DatasetSearcher()
+    sel_ids = searcher.get_studies_by_mask(dset, mask_file)
     sel_dset = dset.slice(sel_ids)
 
     # override sample size

From 396a43a1e01b99346f13c6ad361aceb04b77d99e Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 25 Apr 2022 12:03:58 -0400
Subject: [PATCH 08/18] Add test. Still need to deal with circular imports.

---
 nimare/base.py               |  7 ++++++-
 nimare/tests/test_dataset.py | 34 +++++++++++++++++++---------------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/nimare/base.py b/nimare/base.py
index e4f5e2da8..59f01c231 100644
--- a/nimare/base.py
+++ b/nimare/base.py
@@ -13,7 +13,6 @@
 from nilearn._utils.niimg_conversions import _check_same_fov
 from nilearn.image import concat_imgs, resample_to_img
 
-from nimare.dataset import DatasetSearcher
 from nimare.results import MetaResult
 from nimare.utils import get_masker, mm2vox
 
@@ -254,6 +253,8 @@ class Estimator(NiMAREBase):
 
     def _validate_input(self, dataset, drop_invalid=True):
         """Search for, and validate, required inputs as necessary."""
+        from nimare.dataset import DatasetSearcher
+
         if not hasattr(dataset, "slice"):
             raise ValueError(
                 f"Argument 'dataset' must be a valid Dataset object, not a {type(dataset)}."
@@ -360,6 +361,8 @@ def __init__(self, *args, **kwargs):
 
     def _preprocess_input(self, dataset):
         """Preprocess inputs to the Estimator from the Dataset as needed."""
+        from nimare.dataset import DatasetSearcher
+
         masker = self.masker or dataset.masker
         searcher = DatasetSearcher()
 
@@ -482,6 +485,8 @@ class Decoder(NiMAREBase):
 
     def _validate_input(self, dataset, drop_invalid=True):
         """Search for, and validate, required inputs as necessary."""
+        from nimare.dataset import DatasetSearcher
+
         if not hasattr(dataset, "slice"):
             raise ValueError(
                 f"Argument 'dataset' must be a valid Dataset object, not a {type(dataset)}."
diff --git a/nimare/tests/test_dataset.py b/nimare/tests/test_dataset.py
index 65bc26238..cbb5864d8 100644
--- a/nimare/tests/test_dataset.py
+++ b/nimare/tests/test_dataset.py
@@ -10,6 +10,25 @@
 from nimare.tests.utils import get_test_data_path
 
 
+def test_DatasetSearcher(testdata_laird):
+    dset = testdata_laird.copy()
+    searcher = dataset.DatasetSearcher
+    METHODS = [searcher.get_images, searcher.get_labels, searcher.get_metadata, searcher.get_texts]
+    for method in METHODS:
+        assert isinstance(method(dset), list)
+        assert isinstance(method(dset, ids=dset.ids[:5]), list)
+        assert isinstance(method(dset, ids=dset.ids[0]), list)
+
+    assert isinstance(searcher.get_images(dset, imtype="beta"), list)
+    assert isinstance(searcher.get_metadata(dset, field="sample_sizes"), list)
+    assert isinstance(searcher.get_studies_by_label(dset, "cogat_cognitive_control"), list)
+    assert isinstance(searcher.get_studies_by_coordinate(dset, np.array([[20, 20, 20]])), list)
+
+    # If label is not available, raise ValueError
+    with pytest.raises(ValueError):
+        searcher.get_studies_by_label(dset, "dog")
+
+
 def test_dataset_smoke():
     """Smoke test for nimare.dataset.Dataset initialization and get methods."""
     db_file = op.join(get_test_data_path(), "neurosynth_dset.json")
@@ -19,21 +38,6 @@ def test_dataset_smoke():
     # Test that Dataset.masker is portable
     assert not nib.is_proxy(dset.masker.mask_img_.dataobj)
 
-    methods = [dset.get_images, dset.get_labels, dset.get_metadata, dset.get_texts]
-    for method in methods:
-        assert isinstance(method(), list)
-        assert isinstance(method(ids=dset.ids[:5]), list)
-        assert isinstance(method(ids=dset.ids[0]), list)
-
-    assert isinstance(dset.get_images(imtype="beta"), list)
-    assert isinstance(dset.get_metadata(field="sample_sizes"), list)
-    assert isinstance(dset.get_studies_by_label("cogat_cognitive_control"), list)
-    assert isinstance(dset.get_studies_by_coordinate(np.array([[20, 20, 20]])), list)
-
-    # If label is not available, raise ValueError
-    with pytest.raises(ValueError):
-        dset.get_studies_by_label("dog")
-
     mask_data = np.zeros(dset.masker.mask_img.shape, int)
     mask_data[40, 40, 40] = 1
     mask_img = nib.Nifti1Image(mask_data, dset.masker.mask_img.affine)

From c81785d5f2d49c1ccaf7fc0e89a5370b63e586e4 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 28 Apr 2022 12:41:19 -0400
Subject: [PATCH 09/18] Fix name.

---
 nimare/dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nimare/dataset.py b/nimare/dataset.py
index 2240caa17..cbb0855be 100755
--- a/nimare/dataset.py
+++ b/nimare/dataset.py
@@ -26,7 +26,7 @@
 LGR = logging.getLogger(__name__)
 
 
-class DatasetSeacher(NiMAREBase):
+class DatasetSearcher(NiMAREBase):
     """A tool for searching Datasets."""
 
     def get(self, dataset, dict_, drop_invalid=True):

From 09bf9f3a0015e4563eb7d3c81cbff513545d0bfc Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 28 Apr 2022 12:48:50 -0400
Subject: [PATCH 10/18] Move some functions around.

---
 nimare/annotate/cogat.py |  2 +-
 nimare/extract/utils.py  | 30 ++++++++++++-
 nimare/meta/cbma/base.py | 15 +++----
 nimare/meta/kernel.py    |  9 +---
 nimare/meta/utils.py     | 71 +++++++++++++++++++++++++++++
 nimare/utils.py          | 97 ----------------------------------------
 6 files changed, 109 insertions(+), 115 deletions(-)

diff --git a/nimare/annotate/cogat.py b/nimare/annotate/cogat.py
index a6264598a..6c6365a99 100755
--- a/nimare/annotate/cogat.py
+++ b/nimare/annotate/cogat.py
@@ -9,7 +9,7 @@
 from nimare.annotate import utils
 from nimare.due import due
 from nimare.extract import download_cognitive_atlas
-from nimare.utils import _uk_to_us
+from nimare.extract.utils import _uk_to_us
 
 LGR = logging.getLogger(__name__)
 
diff --git a/nimare/extract/utils.py b/nimare/extract/utils.py
index 710dccafe..5341fa6be 100644
--- a/nimare/extract/utils.py
+++ b/nimare/extract/utils.py
@@ -4,13 +4,14 @@
 import logging
 import os
 import os.path as op
+import re
 
 import numpy as np
 import pandas as pd
 import requests
 from fuzzywuzzy import fuzz
 
-from nimare.utils import _uk_to_us
+from nimare.utils import get_resource_path
 
 LGR = logging.getLogger(__name__)
 
@@ -292,3 +293,30 @@ def _expand_df(df):
     df["ratio"] = df[["alias", "name"]].apply(_get_ratio, axis=1)
     df = df.sort_values(by=["length", "ratio"], ascending=[False, False])
     return df
+
+
+def _uk_to_us(text):
+    """Convert UK spellings to US based on a converter.
+
+    .. versionadded:: 0.0.2
+
+    Parameters
+    ----------
+    text : :obj:`str`
+
+    Returns
+    -------
+    text : :obj:`str`
+
+    Notes
+    -----
+    The english_spellings.csv file is from http://www.tysto.com/uk-us-spelling-list.html.
+    """
+    SPELL_DF = pd.read_csv(op.join(get_resource_path(), "english_spellings.csv"), index_col="UK")
+    SPELL_DICT = SPELL_DF["US"].to_dict()
+
+    if isinstance(text, str):
+        # Convert British to American English
+        pattern = re.compile(r"\b(" + "|".join(SPELL_DICT.keys()) + r")\b")
+        text = pattern.sub(lambda x: SPELL_DICT[x.group()], text)
+    return text
diff --git a/nimare/meta/cbma/base.py b/nimare/meta/cbma/base.py
index 7b8665949..0d4b5077f 100644
--- a/nimare/meta/cbma/base.py
+++ b/nimare/meta/cbma/base.py
@@ -10,18 +10,15 @@
 
 from nimare.base import MetaEstimator
 from nimare.meta.kernel import KernelTransformer
-from nimare.meta.utils import _calculate_cluster_measures, _get_last_bin
+from nimare.meta.utils import (
+    _add_metadata_to_dataframe,
+    _calculate_cluster_measures,
+    _get_last_bin,
+)
 from nimare.results import MetaResult
 from nimare.stats import null_to_p, nullhist_to_p
 from nimare.transforms import p_to_z
-from nimare.utils import (
-    _add_metadata_to_dataframe,
-    _check_type,
-    _safe_transform,
-    tqdm_joblib,
-    use_memmap,
-    vox2mm,
-)
+from nimare.utils import _check_type, _safe_transform, tqdm_joblib, use_memmap, vox2mm
 
 LGR = logging.getLogger(__name__)
 
diff --git a/nimare/meta/kernel.py b/nimare/meta/kernel.py
index 862da96e1..57e49d948 100644
--- a/nimare/meta/kernel.py
+++ b/nimare/meta/kernel.py
@@ -21,18 +21,13 @@
 from nimare.dataset import DatasetSearcher
 from nimare.due import due
 from nimare.meta.utils import (
+    _add_metadata_to_dataframe,
     compute_ale_ma,
     compute_kda_ma,
     compute_p2m_ma,
     get_ale_kernel,
 )
-from nimare.utils import (
-    _add_metadata_to_dataframe,
-    _safe_transform,
-    mm2vox,
-    use_memmap,
-    vox2mm,
-)
+from nimare.utils import _safe_transform, mm2vox, use_memmap, vox2mm
 
 LGR = logging.getLogger(__name__)
 
diff --git a/nimare/meta/utils.py b/nimare/meta/utils.py
index 38f54283a..d04fbf16c 100755
--- a/nimare/meta/utils.py
+++ b/nimare/meta/utils.py
@@ -5,9 +5,11 @@
 import nibabel as nib
 import numpy as np
 import numpy.linalg as npl
+import pandas as pd
 from scipy import ndimage
 
 from nimare import references
+from nimare.dataset import DatasetSearcher
 from nimare.due import due
 from nimare.extract import download_peaks2maps_model
 from nimare.utils import _determine_chunk_size
@@ -16,6 +18,75 @@
 LGR = logging.getLogger(__name__)
 
 
+def _add_metadata_to_dataframe(
+    dataset,
+    dataframe,
+    metadata_field,
+    target_column,
+    filter_func=np.mean,
+):
+    """Add metadata from a Dataset to a DataFrame.
+
+    .. versionadded:: 0.0.8
+
+    This is particularly useful for kernel transformers or estimators where a given metadata field
+    is necessary (e.g., ALEKernel with "sample_size"), but we want to just use the coordinates
+    DataFrame instead of passing the full Dataset.
+
+    Parameters
+    ----------
+    dataset : :obj:`~nimare.dataset.Dataset`
+        Dataset containing study IDs and metadata to feed into dataframe.
+    dataframe : :obj:`pandas.DataFrame`
+        DataFrame containing study IDs, into which Dataset metadata will be merged.
+    metadata_field : :obj:`str`
+        Metadata field in ``dataset``.
+    target_column : :obj:`str`
+        Name of the column that will be added to ``dataframe``, containing information from the
+        Dataset.
+    filter_func : :obj:`function`, optional
+        Function to apply to the metadata so that it fits as a column in a DataFrame.
+        Default is ``numpy.mean``.
+
+    Returns
+    -------
+    dataframe : :obj:`pandas.DataFrame`
+        Updated DataFrame with ``target_column`` added.
+    """
+    dataframe = dataframe.copy()
+    searcher = DatasetSearcher()
+
+    if metadata_field in searcher.get_metadata(dataset):
+        # Collect metadata from Dataset
+        metadata = searcher.get_metadata(dataset, field=metadata_field, ids=dataset.ids)
+        metadata = [[m] for m in metadata]
+        # Create a DataFrame with the metadata
+        metadata = pd.DataFrame(
+            index=dataset.ids,
+            data=metadata,
+            columns=[metadata_field],
+        )
+        # Reduce the metadata (if in list/array format) to single values
+        metadata[target_column] = metadata[metadata_field].apply(filter_func)
+        # Merge metadata df into coordinates df
+        dataframe = dataframe.merge(
+            right=metadata,
+            left_on="id",
+            right_index=True,
+            sort=False,
+            validate="many_to_one",
+            suffixes=(False, False),
+            how="left",
+        )
+    else:
+        LGR.warning(
+            f"Metadata field '{metadata_field}' not found. "
+            "Set a constant value for this field as an argument, if possible."
+        )
+
+    return dataframe
+
+
 def model_fn(features, labels, mode, params):
     """Run model function used internally by peaks2maps.
 
diff --git a/nimare/utils.py b/nimare/utils.py
index c04c609e7..cdf3149eb 100755
--- a/nimare/utils.py
+++ b/nimare/utils.py
@@ -16,7 +16,6 @@
 from nilearn.input_data import NiftiMasker
 
 from nimare import references
-from nimare.dataset import DatasetSearcher
 from nimare.due import due
 
 LGR = logging.getLogger(__name__)
@@ -625,33 +624,6 @@ def _find_stem(arr):
     return res
 
 
-def _uk_to_us(text):
-    """Convert UK spellings to US based on a converter.
-
-    .. versionadded:: 0.0.2
-
-    Parameters
-    ----------
-    text : :obj:`str`
-
-    Returns
-    -------
-    text : :obj:`str`
-
-    Notes
-    -----
-    The english_spellings.csv file is from http://www.tysto.com/uk-us-spelling-list.html.
-    """
-    SPELL_DF = pd.read_csv(op.join(get_resource_path(), "english_spellings.csv"), index_col="UK")
-    SPELL_DICT = SPELL_DF["US"].to_dict()
-
-    if isinstance(text, str):
-        # Convert British to American English
-        pattern = re.compile(r"\b(" + "|".join(SPELL_DICT.keys()) + r")\b")
-        text = pattern.sub(lambda x: SPELL_DICT[x.group()], text)
-    return text
-
-
 def use_memmap(logger, n_files=1):
     """Memory-map array to a file, and perform cleanup after.
 
@@ -806,75 +778,6 @@ def _safe_transform(imgs, masker, memory_limit="1gb", dtype="auto", memfile=None
     return masked_data
 
 
-def _add_metadata_to_dataframe(
-    dataset,
-    dataframe,
-    metadata_field,
-    target_column,
-    filter_func=np.mean,
-):
-    """Add metadata from a Dataset to a DataFrame.
-
-    .. versionadded:: 0.0.8
-
-    This is particularly useful for kernel transformers or estimators where a given metadata field
-    is necessary (e.g., ALEKernel with "sample_size"), but we want to just use the coordinates
-    DataFrame instead of passing the full Dataset.
-
-    Parameters
-    ----------
-    dataset : :obj:`~nimare.dataset.Dataset`
-        Dataset containing study IDs and metadata to feed into dataframe.
-    dataframe : :obj:`pandas.DataFrame`
-        DataFrame containing study IDs, into which Dataset metadata will be merged.
-    metadata_field : :obj:`str`
-        Metadata field in ``dataset``.
-    target_column : :obj:`str`
-        Name of the column that will be added to ``dataframe``, containing information from the
-        Dataset.
-    filter_func : :obj:`function`, optional
-        Function to apply to the metadata so that it fits as a column in a DataFrame.
-        Default is ``numpy.mean``.
-
-    Returns
-    -------
-    dataframe : :obj:`pandas.DataFrame`
-        Updated DataFrame with ``target_column`` added.
-    """
-    dataframe = dataframe.copy()
-    searcher = DatasetSearcher()
-
-    if metadata_field in searcher.get_metadata(dataset):
-        # Collect metadata from Dataset
-        metadata = searcher.get_metadata(dataset, field=metadata_field, ids=dataset.ids)
-        metadata = [[m] for m in metadata]
-        # Create a DataFrame with the metadata
-        metadata = pd.DataFrame(
-            index=dataset.ids,
-            data=metadata,
-            columns=[metadata_field],
-        )
-        # Reduce the metadata (if in list/array format) to single values
-        metadata[target_column] = metadata[metadata_field].apply(filter_func)
-        # Merge metadata df into coordinates df
-        dataframe = dataframe.merge(
-            right=metadata,
-            left_on="id",
-            right_index=True,
-            sort=False,
-            validate="many_to_one",
-            suffixes=(False, False),
-            how="left",
-        )
-    else:
-        LGR.warning(
-            f"Metadata field '{metadata_field}' not found. "
-            "Set a constant value for this field as an argument, if possible."
-        )
-
-    return dataframe
-
-
 def _check_type(obj, clss, **kwargs):
     """Check variable type and initialize if necessary.
 

From 841cadf1d1837c20d7ef51c3c57bcf5334849dba Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 28 Apr 2022 12:50:00 -0400
Subject: [PATCH 11/18] Update test_dataset.py

---
 nimare/tests/test_dataset.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/nimare/tests/test_dataset.py b/nimare/tests/test_dataset.py
index cbb5864d8..c8464c282 100644
--- a/nimare/tests/test_dataset.py
+++ b/nimare/tests/test_dataset.py
@@ -24,6 +24,11 @@ def test_DatasetSearcher(testdata_laird):
     assert isinstance(searcher.get_studies_by_label(dset, "cogat_cognitive_control"), list)
     assert isinstance(searcher.get_studies_by_coordinate(dset, np.array([[20, 20, 20]])), list)
 
+    mask_data = np.zeros(dset.masker.mask_img.shape, int)
+    mask_data[40, 40, 40] = 1
+    mask_img = nib.Nifti1Image(mask_data, dset.masker.mask_img.affine)
+    assert isinstance(dset.get_studies_by_mask(mask_img), list)
+
     # If label is not available, raise ValueError
     with pytest.raises(ValueError):
         searcher.get_studies_by_label(dset, "dog")
@@ -38,11 +43,6 @@ def test_dataset_smoke():
     # Test that Dataset.masker is portable
     assert not nib.is_proxy(dset.masker.mask_img_.dataobj)
 
-    mask_data = np.zeros(dset.masker.mask_img.shape, int)
-    mask_data[40, 40, 40] = 1
-    mask_img = nib.Nifti1Image(mask_data, dset.masker.mask_img.affine)
-    assert isinstance(dset.get_studies_by_mask(mask_img), list)
-
     dset1 = dset.slice(dset.ids[:5])
     dset2 = dset.slice(dset.ids[5:])
     assert isinstance(dset1, dataset.Dataset)

From 96ada78665b0058176d9c8e2338138517fbb10bc Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 28 Apr 2022 12:58:27 -0400
Subject: [PATCH 12/18] Update test_dataset.py

---
 nimare/tests/test_dataset.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nimare/tests/test_dataset.py b/nimare/tests/test_dataset.py
index c8464c282..c753dceab 100644
--- a/nimare/tests/test_dataset.py
+++ b/nimare/tests/test_dataset.py
@@ -11,6 +11,7 @@
 
 
 def test_DatasetSearcher(testdata_laird):
+    """Test the DatasetSearcher class."""
     dset = testdata_laird.copy()
     searcher = dataset.DatasetSearcher
     METHODS = [searcher.get_images, searcher.get_labels, searcher.get_metadata, searcher.get_texts]

From f0cca2dbc2cc8da91deb36a05e5d95e6996537d1 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 28 Apr 2022 13:20:06 -0400
Subject: [PATCH 13/18] Fix things.

---
 nimare/dataset.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/nimare/dataset.py b/nimare/dataset.py
index cbb0855be..52e74d289 100755
--- a/nimare/dataset.py
+++ b/nimare/dataset.py
@@ -89,11 +89,11 @@ def get(self, dataset, dict_, drop_invalid=True):
             keep_idx = np.intersect1d(keep_idx, temp_keep_idx)
 
         # reduce
-        if drop_invalid and (len(keep_idx) != len(self.ids)):
-            LGR.info(f"Retaining {len(keep_idx)}/{len(self.ids)} studies")
-        elif len(keep_idx) != len(self.ids):
+        if drop_invalid and (len(keep_idx) != len(dataset.ids)):
+            LGR.info(f"Retaining {len(keep_idx)}/{len(dataset.ids)} studies")
+        elif len(keep_idx) != len(dataset.ids):
             raise Exception(
-                f"Only {len(keep_idx)}/{len(self.ids)} in Dataset contain the necessary data. "
+                f"Only {len(keep_idx)}/{len(dataset.ids)} in Dataset contain the necessary data. "
                 "If you want to analyze the subset of studies with required data, "
                 "set `drop_invalid` to True."
             )
@@ -226,7 +226,7 @@ def get_metadata(self, dataset, ids=None, field=None):
         metadata : :obj:`list`
             List of values of requested type for selected IDs.
         """
-        result = dataset._generic_column_getter(dataset, "metadata", ids=ids, column=field)
+        result = self._generic_column_getter(dataset, "metadata", ids=ids, column=field)
         return result
 
     def get_images(self, dataset, ids=None, imtype=None):
@@ -357,6 +357,10 @@ def get_studies_by_coordinate(self, dataset, xyz, r=20):
 class Dataset(NiMAREBase):
     """Storage container for a coordinate- and/or image-based meta-analytic dataset/database.
 
+    .. versionchanged:: 0.0.12
+
+        All search methods have been moved out of Dataset and into DatasetSearcher.
+
     .. versionchanged:: 0.0.9
 
         * [ENH] Add merge method to Dataset class

From 40e470f897c4625df7ec1b13efb3b1e60ff0b8e1 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 28 Apr 2022 13:27:20 -0400
Subject: [PATCH 14/18] Fix more.

---
 nimare/tests/test_dataset.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/nimare/tests/test_dataset.py b/nimare/tests/test_dataset.py
index c753dceab..b102dd659 100644
--- a/nimare/tests/test_dataset.py
+++ b/nimare/tests/test_dataset.py
@@ -13,22 +13,25 @@
 def test_DatasetSearcher(testdata_laird):
     """Test the DatasetSearcher class."""
     dset = testdata_laird.copy()
-    searcher = dataset.DatasetSearcher
+    searcher = dataset.DatasetSearcher()
     METHODS = [searcher.get_images, searcher.get_labels, searcher.get_metadata, searcher.get_texts]
     for method in METHODS:
         assert isinstance(method(dset), list)
         assert isinstance(method(dset, ids=dset.ids[:5]), list)
         assert isinstance(method(dset, ids=dset.ids[0]), list)
 
-    assert isinstance(searcher.get_images(dset, imtype="beta"), list)
-    assert isinstance(searcher.get_metadata(dset, field="sample_sizes"), list)
-    assert isinstance(searcher.get_studies_by_label(dset, "cogat_cognitive_control"), list)
+    # This test dataset has no images
+    with pytest.raises(ValueError):
+        searcher.get_images(dset, imtype="beta")
+
+    assert isinstance(searcher.get_metadata(dset, field="journal"), list)
+    assert isinstance(searcher.get_studies_by_label(dset, "Neurosynth_TFIDF__analyze"), list)
     assert isinstance(searcher.get_studies_by_coordinate(dset, np.array([[20, 20, 20]])), list)
 
     mask_data = np.zeros(dset.masker.mask_img.shape, int)
     mask_data[40, 40, 40] = 1
     mask_img = nib.Nifti1Image(mask_data, dset.masker.mask_img.affine)
-    assert isinstance(dset.get_studies_by_mask(mask_img), list)
+    assert isinstance(searcher.get_studies_by_mask(dset, mask=mask_img), list)
 
     # If label is not available, raise ValueError
     with pytest.raises(ValueError):

From 560dad94d72d2d9793da9ef4b81d375a0f06d7d6 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 5 May 2022 14:23:05 -0400
Subject: [PATCH 15/18] Re-import.

---
 nimare/base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nimare/base.py b/nimare/base.py
index 1143199cc..b6b244a51 100644
--- a/nimare/base.py
+++ b/nimare/base.py
@@ -260,6 +260,8 @@ def _collect_inputs(self, dataset, drop_invalid=True):
             )
 
         if self._required_inputs:
+            from nimare.dataset import DatasetSearcher
+
             searcher = DatasetSearcher()
             data = searcher.get(dataset, self._required_inputs, drop_invalid=drop_invalid)
             # Do not overwrite existing inputs_ attribute.

From 963656227df148f7cc695c5331d66d4a4debb19a Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 5 May 2022 14:36:40 -0400
Subject: [PATCH 16/18] Update base.py

---
 nimare/meta/cbma/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nimare/meta/cbma/base.py b/nimare/meta/cbma/base.py
index 16f92d4b3..6bf957f36 100644
--- a/nimare/meta/cbma/base.py
+++ b/nimare/meta/cbma/base.py
@@ -21,7 +21,6 @@
 from nimare.stats import null_to_p, nullhist_to_p
 from nimare.transforms import p_to_z
 from nimare.utils import (
-    _add_metadata_to_dataframe,
     _check_ncores,
     _check_type,
     _safe_transform,

From 6712d57e0199d3f89a17da0a36fa9d2fb0d747bd Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Thu, 5 May 2022 15:41:48 -0400
Subject: [PATCH 17/18] Fix.

---
 nimare/decode/base.py    | 4 +++-
 nimare/meta/cbma/base.py | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/nimare/decode/base.py b/nimare/decode/base.py
index 7c401ff38..4f2ada899 100644
--- a/nimare/decode/base.py
+++ b/nimare/decode/base.py
@@ -3,6 +3,7 @@
 from abc import abstractmethod
 
 from nimare.base import NiMAREBase
+from nimare.dataset import DatasetSearcher
 
 LGR = logging.getLogger(__name__)
 
@@ -28,7 +29,8 @@ def _collect_inputs(self, dataset, drop_invalid=True):
             )
 
         if self._required_inputs:
-            data = dataset.get(self._required_inputs, drop_invalid=drop_invalid)
+            searcher = DatasetSearcher()
+            data = searcher.get(dataset, self._required_inputs, drop_invalid=drop_invalid)
             # Do not overwrite existing inputs_ attribute.
             # This is necessary for PairwiseCBMAEstimator, which validates two sets of coordinates
             # in the same object.
diff --git a/nimare/meta/cbma/base.py b/nimare/meta/cbma/base.py
index 6bf957f36..9b478db48 100644
--- a/nimare/meta/cbma/base.py
+++ b/nimare/meta/cbma/base.py
@@ -11,6 +11,7 @@
 from tqdm.auto import tqdm
 
 from nimare.base import Estimator
+from nimare.dataset import DatasetSearcher
 from nimare.meta.kernel import KernelTransformer
 from nimare.meta.utils import (
     _add_metadata_to_dataframe,
@@ -105,6 +106,7 @@ def _preprocess_input(self, dataset):
             (2) IJK coordinates will be added based on the mask image's affine,
             and (3) sample sizes may be added to the "coordinates" key, as needed.
         """
+        searcher = DatasetSearcher()
         masker = self.masker or dataset.masker
 
         mask_img = masker.mask_img or masker.labels_img
@@ -117,7 +119,8 @@ def _preprocess_input(self, dataset):
                 if hasattr(self, "kernel_transformer"):
                     self.kernel_transformer._infer_names(affine=md5(mask_img.affine).hexdigest())
                     if self.kernel_transformer.image_type in dataset.images.columns:
-                        files = dataset.get_images(
+                        files = searcher.get_images(
+                            dataset,
                             ids=self.inputs_["id"],
                             imtype=self.kernel_transformer.image_type,
                         )

From bc67dbc3cc770e6a56a2db6e0771e06e97270362 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Tue, 31 May 2022 11:56:58 -0400
Subject: [PATCH 18/18] Update api.rst

---
 docs/api.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/api.rst b/docs/api.rst
index ad6c6c1d8..71bf7414a 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -17,6 +17,7 @@ API
    :template: class.rst
 
    dataset.Dataset
+   dataset.DatasetSearcher
 
 
 .. _api_meta_ref: