diff --git a/beep/cmd.py b/beep/cmd.py
index d6569056..f96cf476 100644
--- a/beep/cmd.py
+++ b/beep/cmd.py
@@ -42,12 +42,13 @@
 from beep.structure.base import BEEPDatapath
 from beep.structure.cli import auto_load, auto_load_processed
 from beep.structure.validate import BEEPValidationError
-from beep.features.base import (
+from beep.features.featurizer import (
     BEEPFeaturizer,
     BEEPFeaturizationError,
-    BEEPFeatureMatrix,
 )
-from beep.features.core import (
+from beep.features.matrix import BEEPFeatureMatrix
+
+from beep.features.early_cycles import (
     HPPCResistanceVoltageFeatures,
     DeltaQFastCharge,
     TrajectoryFastCharge,
diff --git a/beep/features/base.py b/beep/features/base.py
deleted file mode 100644
index b70a596f..00000000
--- a/beep/features/base.py
+++ /dev/null
@@ -1,363 +0,0 @@
-# Copyright [2020] [Toyota Research Institute]
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-For creating features and organizing them into datasets.
-
-"""
-import os
-import copy
-import abc
-import json
-import hashlib
-from typing import Union, Tuple, List
-
-import pandas as pd
-from monty.io import zopen
-from monty.json import MSONable, MontyDecoder
-from monty.serialization import loadfn, dumpfn
-
-from beep.structure.base import BEEPDatapath
-
-
-class BEEPFeaturizationError(BaseException):
-    """Raise when a featurization-specific error occurs"""
-    pass
-
-
-class BEEPFeatureMatrixError(BaseException):
-    """ Raise when there is a BEEP-specific problem with a dataset"""
-    pass
-
-
-class BEEPFeaturizer(MSONable, abc.ABC):
-    """
-    Base class for all beep feature generation.
-
-    From a structured battery file representing many cycles of one cell,
-    (AKA a structured datapath), produce a feature vector.
-
-    Works for generating both
-     - Vectors X to use as training vectors
-     - Vectors or scalars y to use as ML targets
-        (as problems may have multiple metrics to predict)
-
-    """
-
-    DEFAULT_HYPERPARAMETERS = {}
-
-    def __init__(self, structured_datapath: Union[BEEPDatapath, None], hyperparameters: Union[dict, None] = None):
-        # If all required hyperparameters are specified, use those
-        # If some subset of required hyperparameters are specified, throw error
-        # If no hyperparameters are specified, use defaults
-        if hyperparameters:
-            if all(k in hyperparameters for k in self.DEFAULT_HYPERPARAMETERS):
-                self.hyperparameters = hyperparameters
-            else:
-                raise BEEPFeaturizationError(
-                    f"Features cannot be created with incomplete set of "
-                    f"hyperparameters {hyperparameters.keys()} < "
-                    f"{self.DEFAULT_HYPERPARAMETERS.keys()}!")
-        else:
-            self.hyperparameters = self.DEFAULT_HYPERPARAMETERS
-
-        if not (structured_datapath is None or structured_datapath.is_structured):
-            raise BEEPFeaturizationError("BEEPDatapath input is not structured!")
-        self.datapath = structured_datapath
-
-        self.features = None
-
-        # In case these features are loaded from file
-        # Allow attrs which can hold relevant metadata without having
-        # to reload the original datapath
-        self.paths = self.datapath.paths if self.datapath else {}
-        self.metadata = self.datapath.metadata.raw if self.datapath else {}
-        self.linked_semiunique_id = self.datapath.semiunique_id if self.datapath else None
-
-    @abc.abstractmethod
-    def validate(self) -> Tuple[bool, Union[str, None]]:
-        """
-        Validate a featurizer on it's ingested datapath.
-
-        Returns:
-            (bool, str/None): The validation result and it's message.
-
-        """
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def create_features(self) -> None:
-        """
-        Should assign a dataframe to self.features.
-
-        Returns:
-            None
-        """
-        raise NotImplementedError
-
-    def as_dict(self):
-        """Serialize a BEEPDatapath as a dictionary.
-
-        Must not be loaded from legacy.
-
-        Returns:
-            (dict): corresponding to dictionary for serialization.
-
-        """
-
-        if self.features is None:
-            raise BEEPFeaturizationError("Cannot serialize features which have not been generated.")
-
-        features = self.features.to_dict("list")
-
-        return {
-            "@module": self.__class__.__module__,
-            "@class": self.__class__.__name__,
-
-            # Core parts of BEEPFeaturizer
-            "features": features,
-            "hyperparameters": self.hyperparameters,
-            "paths": self.paths,
-            "metadata": self.metadata,
-            "linked_datapath_semiunique_id": self.linked_semiunique_id
-        }
-
-    @classmethod
-    def from_dict(cls, d):
-        """Create a BEEPDatapath object from a dictionary.
-
-        Args:
-            d (dict): dictionary represenation.
-
-        Returns:
-            beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun.
-        """
-
-        # no need for original datapath
-        bf = cls(structured_datapath=None, hyperparameters=d["hyperparameters"])
-        bf.features = pd.DataFrame(d["features"])
-        bf.paths = d["paths"]
-        bf.metadata = d["metadata"]
-        bf.linked_semiunique_id = d["linked_datapath_semiunique_id"]
-        return bf
-
-    @classmethod
-    def from_json_file(cls, filename):
-        """Load a structured run previously saved to file.
-
-        .json.gz files are supported.
-
-        Loads a BEEPFeaturizer from json.
-
-        Can be used in combination with files serialized with BEEPFeatures.to_json_file.
-
-        Args:
-            filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file.
-
-        Returns:
-            None
-        """
-        with zopen(filename, "r") as f:
-            d = json.load(f)
-
-        # Add this structured file path to the paths dict
-        paths = d.get("paths", {})
-        paths["features"] = os.path.abspath(filename)
-        d["paths"] = paths
-        return cls.from_dict(d)
-
-    def to_json_file(self, filename):
-        """Save a BEEPFeatures to disk as a json.
-
-        .json.gz files are supported.
-
-        Not named from_json to avoid conflict with MSONable.from_json(*)
-
-        Args:
-            filename (str, Pathlike): The filename to save the file to.
-            omit_raw (bool): If True, saves only structured (NOT RAW) data.
-                More efficient for saving/writing to disk.
-
-        Returns:
-            None
-        """
-        d = self.as_dict()
-        dumpfn(d, filename)
-
-
-class BEEPFeatureMatrix(MSONable):
-    """
-    Create an (n battery cycler files) x (k features) array composed of
-    m BEEPFeaturizer objects.
-
-    Args:
-        beepfeaturizers ([BEEPFeaturizer]): A list of BEEPFeaturizer objects
-
-    """
-
-    OP_DELIMITER = "::"
-
-    def __init__(self, beepfeaturizers: List[BEEPFeaturizer]):
-
-        if beepfeaturizers:
-            dfs_by_file = {bf.paths.get("structured", "no file found"): [] for bf in beepfeaturizers}
-            # big_df_rows = {bf.__class__.__name__: [] for bf in beepfeaturizers}
-            unique_features = {}
-            for i, bf in enumerate(beepfeaturizers):
-                if bf.features is None:
-                    raise BEEPFeatureMatrixError(f"BEEPFeaturizer {bf} has not created features")
-                elif bf.features.shape[0] != 1:
-                    raise BEEPFeatureMatrixError(f"BEEPFeaturizer {bf} features are not 1-dimensional.")
-                else:
-                    bfcn = bf.__class__.__name__
-
-                    fname = bf.paths.get("structured", None)
-                    if not fname:
-                        raise BEEPFeatureMatrixError(
-                            "Cannot join features automatically as no linking can be done "
-                            "based on original structured filename."
-                        )
-
-                    # Check for any possible feature collisions using identical featurizers
-                    # on identical files
-
-                    # sort params for this featurizer obj by key
-                    params = sorted(list(bf.hyperparameters.items()), key=lambda x: x[0])
-
-                    # Prevent identical features from identical input files
-                    # create a unique operation string for the application of this featurizer
-                    # on a specific file, this op string will be the same as long as
-                    # the featurizer class name, hyperparameters, and class are the same
-
-                    param_str = "-".join([f"{k}:{v}" for k, v in params])
-                    param_hash = hashlib.sha256(param_str.encode("utf-8")).hexdigest()
-
-                    # Get an id for this featurizer operation (including hyperparameters)
-                    # regardless of the file it is applied on
-                    feature_op_id = f"{bfcn}{self.OP_DELIMITER}{param_hash}"
-
-                    # Get an id for this featurizer operation (including hyperparameters)
-                    # on THIS SPECIFIC file.
-                    file_feature_op_id = f"{fname}{self.OP_DELIMITER}{bfcn}{self.OP_DELIMITER}{param_hash}"
-
-                    # Get a unique id for every feature generated by a specific
-                    # featurizer on a specific file.
-                    this_file_feature_columns_ids = \
-                        [
-                            f"{file_feature_op_id}{self.OP_DELIMITER}{c}" for c in bf.features.columns
-                        ]
-
-                    # Check to make sure there are no duplicates of the exact same feature for
-                    # the exact same featurizer with the exact same hyperparameters on the exact
-                    # same file.
-                    collisions = {c: f for c, f in unique_features.items() if c in this_file_feature_columns_ids}
-                    if collisions:
-                        raise BEEPFeatureMatrixError(
-                            f"Multiple features generated with identical classes and identical hyperparameters"
-                            f" attempted to be joined into same dataset; \n"
-                            f"{bfcn} features collide with existing: \n{collisions}"
-                        )
-                    for c in this_file_feature_columns_ids:
-                        unique_features[c] = bfcn
-
-                    # Create consistent scheme for naming features regardless of file
-                    df = copy.deepcopy(bf.features)
-                    consistent_column_names = [f"{c}{self.OP_DELIMITER}{feature_op_id}" for c in df.columns]
-                    df.columns = consistent_column_names
-
-                    df.index = [fname] * df.shape[0]
-                    df.index.rename("filename", inplace=True)
-                    dfs_by_file[fname].append(df)
-
-            rows = []
-            for filename, dfs in dfs_by_file.items():
-                row = pd.concat(dfs, axis=1)
-                row = row[sorted(row.columns)]
-                rows.append(row)
-            self.matrix = pd.concat(rows, axis=0)
-
-        else:
-            self.matrix = None
-
-        self.featurizers = beepfeaturizers
-
-    def as_dict(self):
-        """Serialize a BEEPDatapath as a dictionary.
-
-        Must not be loaded from legacy.
-
-        Returns:
-            (dict): corresponding to dictionary for serialization.
-
-        """
-
-        return {
-            "@module": self.__class__.__module__,
-            "@class": self.__class__.__name__,
-
-            # Core parts of BEEPFeaturizer
-            "featurizers": [f.as_dict() for f in self.featurizers],
-            "matrix": self.matrix.to_dict("list"),
-        }
-
-    @classmethod
-    def from_dict(cls, d):
-        """Create a BEEPDatapath object from a dictionary.
-
-        Args:
-            d (dict): dictionary represenation.
-
-        Returns:
-            beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun.
-        """
-        # no need for original datapaths, as their ref paths should
-        # be in the subobjects
-        featurizers = [MontyDecoder().process_decoded(f) for f in d["featurizers"]]
-        return cls(featurizers)
-
-    @classmethod
-    def from_json_file(cls, filename):
-        """Load a structured run previously saved to file.
-
-        .json.gz files are supported.
-
-        Loads a BEEPFeatureMatrix from json.
-
-        Can be used in combination with files serialized with BEEPFeatures.to_json_file.
-
-        Args:
-            filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file.
-
-        Returns:
-            None
-        """
-        return loadfn(filename)
-
-    def to_json_file(self, filename):
-        """Save a BEEPFeatureMatrix to disk as a json.
-
-        .json.gz files are supported.
-
-        Not named from_json to avoid conflict with MSONable.from_json(*)
-
-        Args:
-            filename (str, Pathlike): The filename to save the file to.
-            omit_raw (bool): If True, saves only structured (NOT RAW) data.
-                More efficient for saving/writing to disk.
-
-        Returns:
-            None
-        """
-        d = self.as_dict()
-        dumpfn(d, filename)
diff --git a/beep/features/core.py b/beep/features/core.py
deleted file mode 100644
index 878b0987..00000000
--- a/beep/features/core.py
+++ /dev/null
@@ -1,909 +0,0 @@
-import numpy as np
-import pandas as pd
-from scipy.stats import skew, kurtosis
-from scipy.interpolate import interp1d
-
-from beep import PROTOCOL_PARAMETERS_DIR
-from beep.features import featurizer_helpers
-from beep.features.base import BEEPFeaturizer, BEEPFeaturizationError
-
-
-class HPPCResistanceVoltageFeatures(BEEPFeaturizer):
-    DEFAULT_HYPERPARAMETERS = {
-        "test_time_filter_sec": 1000000,
-        "cycle_index_filter": 6,
-        "diag_pos": 1,
-        "soc_window": 8,
-        "parameters_path": PROTOCOL_PARAMETERS_DIR
-    }
-
-    def validate(self):
-        val, msg = featurizer_helpers.check_diagnostic_validation(self.datapath)
-        if val:
-            conditions = []
-            conditions.append(
-                any(
-                    [
-                        "hppc" in x
-                        for x in
-                        self.datapath.diagnostic_summary.cycle_type.unique()
-                    ]
-                )
-            )
-            if all(conditions):
-                return True, None
-            else:
-                return False, "HPPC conditions not met for this cycler run"
-        else:
-            return val, msg
-
-    def create_features(self):
-        # Filter out low cycle numbers at the end of the test, corresponding to the "final" diagnostic
-        self.datapath.diagnostic_data = self.datapath.diagnostic_data[
-            ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[
-                'test_time_filter_sec']) &
-              (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[
-                  'cycle_index_filter']))
-        ]
-        self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby(
-            ["cycle_index", "step_index", "step_index_counter"]
-        ).filter(lambda x: ~x["test_time"].isnull().all())
-
-        # diffusion features
-        diffusion_features = featurizer_helpers.get_diffusion_features(
-            self.datapath, self.hyperparameters["diag_pos"]
-        )
-
-        hppc_r = pd.DataFrame()
-        # the 9 by 6 dataframe
-        df_dr = featurizer_helpers.get_dr_df(
-            self.datapath, self.hyperparameters["diag_pos"]
-        )
-        # transform this dataframe to be 1 by 54
-        columns = df_dr.columns
-        for column in columns:
-            for r in range(len(df_dr[column])):
-                name = column + str(r)
-                hppc_r[name] = [df_dr[column][r]]
-
-        # the variance of ocv features
-        hppc_ocv = featurizer_helpers.get_hppc_ocv(
-            self.datapath,
-            self.hyperparameters["diag_pos"],
-            parameters_path=self.hyperparameters["parameters_path"]
-        )
-
-        # the v_diff features
-        v_diff = featurizer_helpers.get_v_diff(
-            self.datapath,
-            self.hyperparameters["diag_pos"],
-            self.hyperparameters["soc_window"],
-            self.hyperparameters["parameters_path"]
-        )
-
-        # merge everything together as a final result dataframe
-        self.features = pd.concat(
-            [hppc_r, hppc_ocv, v_diff, diffusion_features], axis=1)
-
-
-class CycleSummaryStats(BEEPFeaturizer):
-    DEFAULT_HYPERPARAMETERS = {
-        "cycle_comp_num": [10, 100],
-        "statistics": ["var", "min", "mean", "skew", "kurtosis", "abs",
-                       "square"]
-    }
-
-    def validate(self):
-        """
-        This function determines if the input data has the necessary attributes for
-        creation of this feature class. It should test for all of the possible reasons
-        that feature generation would fail for this particular input data.
-
-        Args:
-            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
-            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-                gets featurized. These could be filters for column or row operations
-        Returns:
-            bool: True/False indication of ability to proceed with feature generation
-        """
-
-        # TODO: not sure this is necessary
-        # Check for data in each of the selected cycles
-        index_1, index_2 = self.hyperparameters['cycle_comp_num']
-        cycle_1 = self.datapath.structured_data[
-            self.datapath.structured_data.cycle_index == index_1]
-        cycle_2 = self.datapath.structured_data[
-            self.datapath.structured_data.cycle_index == index_2]
-        if len(cycle_1) == 0 or len(cycle_2) == 0:
-            return False, "Length of one or more comparison cycles is zero"
-
-        # TODO: check whether this is good
-        # Check for relevant data
-        required_columns = [
-            'charge_capacity',
-            'discharge_capacity',
-            'charge_energy',
-            'discharge_energy',
-        ]
-        pcycler_run_columns = self.datapath.structured_data.columns
-        if not all(
-                [column in pcycler_run_columns for column in required_columns]):
-            return False, f"Required column not present in all structured data " \
-                          f"(must have all of: {required_columns})"
-
-        return True, None
-
-    def create_features(self):
-        """
-        Generate features listed in early prediction manuscript using both diagnostic and regular cycles
-
-        Args:
-            processed_cycler_run (beep.structure.ProcessedCyclerRun)
-            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-                gets featurized. These could be filters for column or row operations
-            parameters_path (str): Root directory storing project parameter files.
-
-        Returns:
-            X (pd.DataFrame): Dataframe containing the feature
-        """
-
-        # TODO: extend this dataframe and uncomment energy features when
-        #   structuring is refactored
-        X = pd.DataFrame(np.zeros((1, 28)))
-
-        reg_cycle_comp_num = self.hyperparameters.get("cycle_comp_num")
-        cycle_comp_1 = self.datapath.structured_data[
-            self.datapath.structured_data.cycle_index == reg_cycle_comp_num[1]
-            ]
-        cycle_comp_0 = self.datapath.structured_data[
-            self.datapath.structured_data.cycle_index == reg_cycle_comp_num[0]
-            ]
-        Qc100_1 = cycle_comp_1[
-            cycle_comp_1.step_type == "charge"].charge_capacity
-        Qc10_1 = cycle_comp_0[
-            cycle_comp_0.step_type == "charge"].charge_capacity
-        QcDiff = Qc100_1.values - Qc10_1.values
-        QcDiff = QcDiff[~np.isnan(QcDiff)]
-
-        X.loc[0, 0:6] = self.get_summary_statistics(QcDiff)
-
-        Qd100_1 = cycle_comp_1[
-            cycle_comp_1.step_type == "discharge"].discharge_capacity
-        Qd10_1 = cycle_comp_0[
-            cycle_comp_0.step_type == "discharge"].discharge_capacity
-        QdDiff = Qd100_1.values - Qd10_1.values
-        QdDiff = QdDiff[~np.isnan(QdDiff)]
-
-        X.loc[0, 7:13] = self.get_summary_statistics(QdDiff)
-
-        # # Charging Energy features
-        Ec100_1 = cycle_comp_1[cycle_comp_1.step_type == "charge"].charge_energy
-        Ec10_1 = cycle_comp_0[cycle_comp_0.step_type == "charge"].charge_energy
-        EcDiff = Ec100_1.values - Ec10_1.values
-        EcDiff = EcDiff[~np.isnan(EcDiff)]
-
-        X.loc[0, 14:20] = self.get_summary_statistics(EcDiff)
-
-        # # Discharging Energy features
-        Ed100_1 = cycle_comp_1[
-            cycle_comp_1.step_type == "charge"].discharge_energy
-        Ed10_1 = cycle_comp_0[
-            cycle_comp_0.step_type == "charge"].discharge_energy
-        EdDiff = Ed100_1.values - Ed10_1.values
-        EdDiff = EdDiff[~np.isnan(EdDiff)]
-
-        X.loc[0, 21:27] = self.get_summary_statistics(EdDiff)
-
-        quantities = [
-            "charging_capacity",
-            "discharging_capacity",
-            "charging_energy",
-            "discharging_energy",
-        ]
-
-        X.columns = [y + "_" + x for x in quantities for y in
-                     self.hyperparameters["statistics"]]
-
-        self.features = X
-
-    def get_summary_statistics(self, array):
-        """
-        Static method for getting values corresponding
-        to standard 7 operations that many beep features
-        use, i.e. log of absolute value of each of
-        variance, min, mean, skew, kurtosis, the sum of
-        the absolute values and the sum of squares
-
-        Args:
-            array (list, np.ndarray): array of values to get
-                standard operation values for, e.g. cycle
-                discharging capacity, QcDiff, etc.
-
-        Returns:
-            [float]: list of features
-
-        """
-
-        stats_names = self.hyperparameters["statistics"]
-        supported_stats = self.DEFAULT_HYPERPARAMETERS["statistics"]
-
-        if any(s not in supported_stats for s in stats_names):
-            raise ValueError(
-                f"Unsupported statistics in {stats_names}: supported statistics are {supported_stats}")
-
-        stats = []
-
-        if "var" in stats_names:
-            stats.append(np.log10(np.absolute(np.var(array))))
-        if "min" in stats_names:
-            stats.append(np.log10(np.absolute(min(array))))
-        if "mean" in stats_names:
-            stats.append(np.log10(np.absolute(np.mean(array))))
-        if "skew" in stats_names:
-            stats.append(np.log10(np.absolute(skew(array))))
-        if "kurtosis" in stats_names:
-            stats.append(np.log10(
-                np.absolute(kurtosis(array, fisher=False, bias=False))))
-        if "abs" in stats_names:
-            stats.append(np.log10(np.sum(np.absolute(array))))
-        if "square" in stats_names:
-            stats.append(np.log10(np.sum(np.square(array))))
-
-        return np.asarray(stats)
-
-
-class DiagnosticSummaryStats(CycleSummaryStats):
-    """
-    Object corresponding to summary statistics from a diagnostic cycle of
-    specific type. Includes constructors to create the features, object names
-    and metadata attributes in the object.  Inherits from RegularCycleSummaryStats
-    to reuse standard feature generation
-
-    name (str): predictor object name.
-    X (pandas.DataFrame): features in DataFrame format.
-    metadata (dict): information about the conditions, data
-        and code used to produce features
-    """
-    DEFAULT_HYPERPARAMETERS = {
-        "test_time_filter_sec": 1000000,
-        "cycle_index_filter": 6,
-        "diagnostic_cycle_type": 'rpt_0.2C',
-        "diag_pos_list": [0, 1],
-        "statistics": ["var", "min", "mean", "skew", "kurtosis", "abs",
-                       "square"],
-        "parameters_path": PROTOCOL_PARAMETERS_DIR
-    }
-
-    def validate(self):
-        """
-        This function determines if the input data has the necessary attributes for
-        creation of this feature class. It should test for all of the possible reasons
-        that feature generation would fail for this particular input data.
-
-        Args:
-            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
-            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-                gets featurized. These could be filters for column or row operations
-
-        Returns:
-            bool: True/False indication of ability to proceed with feature generation
-        """
-        val, msg = featurizer_helpers.check_diagnostic_validation(self.datapath)
-        if val:
-            df = self.datapath.diagnostic_summary
-            df = df[
-                df.cycle_type == self.hyperparameters["diagnostic_cycle_type"]]
-            if df.cycle_index.nunique() >= max(
-                    self.hyperparameters["diag_pos_list"]) + 1:
-                return True, None
-            else:
-                return False, "Diagnostic cycles insufficient for featurization"
-        else:
-            return val, msg
-
-    def get_summary_diff(
-            self,
-            pos=None,
-            cycle_types=("rpt_0.2C", "rpt_1C", "rpt_2C"),
-            metrics=(
-                    "discharge_capacity", "discharge_energy", "charge_capacity",
-                    "charge_energy")
-    ):
-        """
-        Helper function to calculate difference between summary values in the diagnostic cycles
-
-                Args:
-                    processed_cycler_run (beep.structure.ProcessedCyclerRun)
-                    pos (list): position of the diagnostics to use in the calculation
-                    cycle_types (list): calculate difference for these diagnostic types
-                    metrics (str): Calculate difference for these metrics
-
-                Returns:
-                    values (list): List of difference values to insert into the dataframe
-                    names (list): List of column headers to use in the creation of the dataframe
-                """
-        pos = self.hyperparameters["diag_pos_list"] if not pos else pos
-
-        values = []
-        names = []
-        for cycle_type in cycle_types:
-            diag_type_summary = self.datapath.diagnostic_summary[
-                self.datapath.diagnostic_summary.cycle_type == cycle_type]
-            for metric in metrics:
-                diff = (diag_type_summary.iloc[pos[1]][metric] -
-                        diag_type_summary.iloc[pos[0]][metric]) \
-                       / diag_type_summary.iloc[pos[0]][metric]
-                values.append(diff)
-                names.append("diag_sum_diff_" + str(pos[0]) + "_" + str(
-                    pos[1]) + "_" + cycle_type + metric)
-        return values, names
-
-    def create_features(self):
-        """
-        Generate features listed in early prediction manuscript using both diagnostic and regular cycles
-
-        Args:
-            self.datapathn (beep.structure.ProcessedCyclerRun)
-            self.hyperparameters (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-                gets featurized. These could be filters for column or row operations
-            parameters_path (str): Root directory storing project parameter files.
-
-        Returns:
-            X (pd.DataFrame): Dataframe containing the feature
-        """
-        # Filter out "final" diagnostic cycles that have been appended to the end of the file with the wrong
-        # cycle number(test time is monotonic)
-        self.datapath.diagnostic_data = self.datapath.diagnostic_data[
-            ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[
-                'test_time_filter_sec']) &
-              (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[
-                  'cycle_index_filter']))
-        ]
-        self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby(
-            ["cycle_index", "step_index", "step_index_counter"]
-        ).filter(lambda x: ~x["test_time"].isnull().all())
-
-        diag_intrp = self.datapath.diagnostic_data
-
-        X = pd.DataFrame(np.zeros((1, 54)))
-
-        # Calculate the cycles and the steps for the selected diagnostics
-        cycles = diag_intrp.cycle_index[diag_intrp.cycle_type ==
-                                        self.hyperparameters[
-                                            "diagnostic_cycle_type"]].unique()
-        step_dict_0 = featurizer_helpers.get_step_index(
-            self.datapath,
-            cycle_type=self.hyperparameters["diagnostic_cycle_type"],
-            diag_pos=self.hyperparameters["diag_pos_list"][0],
-            parameters_path=self.hyperparameters["parameters_path"]
-        )
-        step_dict_1 = featurizer_helpers.get_step_index(
-            self.datapath,
-            cycle_type=self.hyperparameters["diagnostic_cycle_type"],
-            diag_pos=self.hyperparameters["diag_pos_list"][1],
-            parameters_path=self.hyperparameters["parameters_path"]
-        )
-
-        # Create masks for each position in the data
-        mask_pos_0_charge = ((diag_intrp.cycle_index == cycles[
-            self.hyperparameters["diag_pos_list"][0]]) &
-                             (diag_intrp.step_index == step_dict_0[
-                                 self.hyperparameters[
-                                     "diagnostic_cycle_type"] + '_charge']))
-        mask_pos_1_charge = ((diag_intrp.cycle_index == cycles[
-            self.hyperparameters["diag_pos_list"][1]]) &
-                             (diag_intrp.step_index == step_dict_1[
-                                 self.hyperparameters[
-                                     "diagnostic_cycle_type"] + '_charge']))
-        mask_pos_0_discharge = ((diag_intrp.cycle_index == cycles[
-            self.hyperparameters["diag_pos_list"][0]]) &
-                                (diag_intrp.step_index ==
-                                 step_dict_0[self.hyperparameters[
-                                                 "diagnostic_cycle_type"] + '_discharge']))
-        mask_pos_1_discharge = ((diag_intrp.cycle_index == cycles[
-            self.hyperparameters["diag_pos_list"][1]]) &
-                                (diag_intrp.step_index ==
-                                 step_dict_1[self.hyperparameters[
-                                                 "diagnostic_cycle_type"] + '_discharge']))
-
-        # Charging Capacity features
-        Qc_1 = diag_intrp.charge_capacity[mask_pos_1_charge]
-        Qc_0 = diag_intrp.charge_capacity[mask_pos_0_charge]
-        QcDiff = Qc_1.values - Qc_0.values
-        QcDiff = QcDiff[~np.isnan(QcDiff)]
-
-        X.loc[0, 0:6] = self.get_summary_statistics(QcDiff)
-
-        # Discharging Capacity features
-        Qd_1 = diag_intrp.discharge_capacity[mask_pos_1_discharge]
-        Qd_0 = diag_intrp.discharge_capacity[mask_pos_0_discharge]
-        QdDiff = Qd_1.values - Qd_0.values
-        QdDiff = QdDiff[~np.isnan(QdDiff)]
-
-        X.loc[0, 7:13] = self.get_summary_statistics(QdDiff)
-
-        # Charging Energy features
-        Ec_1 = diag_intrp.charge_energy[mask_pos_1_charge]
-        Ec_0 = diag_intrp.charge_energy[mask_pos_0_charge]
-        EcDiff = Ec_1.values - Ec_0.values
-        EcDiff = EcDiff[~np.isnan(EcDiff)]
-
-        X.loc[0, 14:20] = self.get_summary_statistics(EcDiff)
-
-        # Discharging Energy features
-        Ed_1 = diag_intrp.discharge_energy[mask_pos_1_discharge]
-        Ed_0 = diag_intrp.discharge_energy[mask_pos_0_discharge]
-        EdDiff = Ed_1.values - Ed_0.values
-        EdDiff = EdDiff[~np.isnan(EdDiff)]
-
-        X.loc[0, 21:27] = self.get_summary_statistics(EdDiff)
-
-        # Charging dQdV features
-        dQdVc_1 = diag_intrp.charge_dQdV[mask_pos_1_charge]
-        dQdVc_0 = diag_intrp.charge_dQdV[mask_pos_0_charge]
-        dQdVcDiff = dQdVc_1.values - dQdVc_0.values
-        dQdVcDiff = dQdVcDiff[~np.isnan(dQdVcDiff)]
-
-        X.loc[0, 28:34] = self.get_summary_statistics(dQdVcDiff)
-
-        # Discharging Capacity features
-        dQdVd_1 = diag_intrp.discharge_dQdV[mask_pos_1_discharge]
-        dQdVd_0 = diag_intrp.discharge_dQdV[mask_pos_0_discharge]
-        dQdVdDiff = dQdVd_1.values - dQdVd_0.values
-        dQdVdDiff = dQdVdDiff[~np.isnan(dQdVdDiff)]
-
-        X.loc[0, 35:41] = self.get_summary_statistics(dQdVdDiff)
-
-        X.loc[0, 42:53], names = self.get_summary_diff(
-            self.hyperparameters["diag_pos_list"]
-        )
-
-        quantities = [
-            "charging_capacity",
-            "discharging_capacity",
-            "charging_energy",
-            "discharging_energy",
-            "charging_dQdV",
-            "discharging_dQdV",
-        ]
-
-        X.columns = [y + "_" + x for x in quantities for y in
-                     self.hyperparameters["statistics"]] + names
-        self.features = X
-
-
-class DeltaQFastCharge(BEEPFeaturizer):
-    """
-    Object corresponding to feature object. Includes constructors
-    to create the features, object names and metadata attributes in the
-    object
-        name (str): predictor object name.
-        X (pandas.DataFrame): features in DataFrame format.
-        metadata (dict): information about the conditions, data
-            and code used to produce features
-    """
-    DEFAULT_HYPERPARAMETERS = {
-        "init_pred_cycle": 10,
-        "mid_pred_cycle": 91,
-        "final_pred_cycle": 100,
-        "n_nominal_cycles": 40
-    }
-
-    def validate(self):
-        """
-        This function determines if the input data has the necessary attributes for
-        creation of this feature class. It should test for all of the possible reasons
-        that feature generation would fail for this particular input data.
-
-        Args:
-            self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run
-            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-            gets featurized. These could be filters for column or row operations
-        Returns:
-            bool: True/False indication of ability to proceed with feature generation
-        """
-
-        if not self.datapath.structured_summary.index.max() > self.hyperparameters["final_pred_cycle"]:
-            return False, "Structured summary index max is less than final pred cycle"
-        elif not self.datapath.structured_summary.index.min() <= self.hyperparameters["init_pred_cycle"]:
-            return False, "Structured summary index min is more than initial pred cycle"
-        elif "cycle_index" not in self.datapath.structured_summary.columns:
-            return False, "Structured summary missing critical data: 'cycle_index'"
-        elif "cycle_index" not in self.datapath.structured_data.columns:
-            return False, "Structured data missing critical data: 'cycle_index'"
-        elif not self.hyperparameters["mid_pred_cycle"] > 10:
-            return False, "Middle pred. cycle less than threshold value of 10"
-        elif not self.hyperparameters["final_pred_cycle"] > self.hyperparameters["mid_pred_cycle"]:
-            return False, "Final pred cycle less than middle pred cycle"
-        else:
-            return True, None
-
-    def create_features(self):
-        """
-        Generate features listed in early prediction manuscript, primarily related to the
-        so called delta Q feature
-        Args:
-            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
-            self.hyperparameters (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-                gets featurized. These could be filters for column or row operations
-            parameters_path (str): Root directory storing project parameter files.
-
-        Returns:
-            pd.DataFrame: features indicative of degradation, derived from the input data
-        """
-        i_final = self.hyperparameters[
-                      "final_pred_cycle"] - 1  # python indexing
-        i_mid = self.hyperparameters["mid_pred_cycle"] - 1
-
-        summary = self.datapath.structured_summary
-        self.hyperparameters[
-            "n_nominal_cycles"
-        ] = 40  # For nominal capacity, use median discharge capacity of first n cycles
-
-        if "step_type" in self.datapath.structured_data.columns:
-            interpolated_df = self.datapath.structured_data[
-                self.datapath.structured_data.step_type == "discharge"
-                ]
-        else:
-            interpolated_df = self.datapath.structured_data
-        X = pd.DataFrame(np.zeros((1, 20)))
-        labels = []
-        # Discharge capacity, cycle 2 = Q(n=2)
-        X[0] = summary.discharge_capacity.iloc[1]
-        labels.append("discharge_capacity_cycle_2")
-
-        # Max discharge capacity - discharge capacity, cycle 2 = max_n(Q(n)) - Q(n=2)
-        X[1] = max(
-            summary.discharge_capacity.iloc[np.arange(i_final + 1)]
-            - summary.discharge_capacity.iloc[1]
-        )
-        labels.append("max_discharge_capacity_difference")
-
-        # Discharge capacity, cycle 100 = Q(n=100)
-        X[2] = summary.discharge_capacity.iloc[i_final]
-        labels.append("discharge_capacity_cycle_100")
-
-        # Feature representing time-temperature integral over cycles 2 to 100
-        X[3] = np.nansum(
-            summary.time_temperature_integrated.iloc[np.arange(i_final + 1)])
-        labels.append("integrated_time_temperature_cycles_1:100")
-
-        # Mean of charge times of first 5 cycles
-        X[4] = np.nanmean(summary.charge_duration.iloc[1:6])
-        labels.append("charge_time_cycles_1:5")
-
-        # Descriptors based on capacity loss between cycles 10 and 100.
-        Qd_final = interpolated_df.discharge_capacity[
-            interpolated_df.cycle_index == i_final
-            ]
-        Qd_10 = interpolated_df.discharge_capacity[
-            interpolated_df.cycle_index == 9]
-
-        Qd_diff = Qd_final.values - Qd_10.values
-
-        # If DeltaQ(V) is not an empty array, compute summary stats, else initialize with np.nan
-        # Cells discharged rapidly over a narrow voltage window run into have no interpolated discharge steps
-        if len(Qd_diff):
-            X[5] = np.log10(np.abs(np.nanmin(Qd_diff)))  # Minimum
-            X[6] = np.log10(np.abs(np.nanmean(Qd_diff)))  # Mean
-            X[7] = np.log10(np.abs(np.nanvar(Qd_diff)))  # Variance
-            X[8] = np.log10(np.abs(skew(Qd_diff)))  # Skewness
-            X[9] = np.log10(np.abs(kurtosis(Qd_diff)))  # Kurtosis
-            X[10] = np.log10(np.abs(Qd_diff[0]))  # First difference
-        else:
-            X[5:11] = np.nan
-
-        labels.append("abs_min_discharge_capacity_difference_cycles_2:100")
-        labels.append("abs_mean_discharge_capacity_difference_cycles_2:100")
-        labels.append("abs_variance_discharge_capacity_difference_cycles_2:100")
-        labels.append("abs_skew_discharge_capacity_difference_cycles_2:100")
-        labels.append("abs_kurtosis_discharge_capacity_difference_cycles_2:100")
-        labels.append("abs_first_discharge_capacity_difference_cycles_2:100")
-
-        X[11] = np.max(summary.temperature_maximum.iloc[
-                           list(range(1, i_final + 1))])  # Max T
-        labels.append("max_temperature_cycles_1:100")
-
-        X[12] = np.min(summary.temperature_minimum.iloc[
-                           list(range(1, i_final + 1))])  # Min T
-        labels.append("min_temperature_cycles_1:100")
-
-        # Slope and intercept of linear fit to discharge capacity as a fn of cycle #, cycles 2 to 100
-
-        X[13], X[14] = np.polyfit(
-            list(range(1, i_final + 1)),
-            summary.discharge_capacity.iloc[list(range(1, i_final + 1))],
-            1,
-        )
-
-        labels.append("slope_discharge_capacity_cycle_number_2:100")
-        labels.append("intercept_discharge_capacity_cycle_number_2:100")
-
-        # Slope and intercept of linear fit to discharge capacity as a fn of cycle #, cycles 91 to 100
-        X[15], X[16] = np.polyfit(
-            list(range(i_mid, i_final + 1)),
-            summary.discharge_capacity.iloc[list(range(i_mid, i_final + 1))],
-            1,
-        )
-        labels.append("slope_discharge_capacity_cycle_number_91:100")
-        labels.append("intercept_discharge_capacity_cycle_number_91:100")
-
-        IR_trend = summary.dc_internal_resistance.iloc[
-            list(range(1, i_final + 1))]
-        if any(v == 0 for v in IR_trend):
-            IR_trend[IR_trend == 0] = np.nan
-
-        # Internal resistance minimum
-        X[17] = np.nanmin(IR_trend)
-        labels.append("min_internal_resistance_cycles_2:100")
-
-        # Internal resistance at cycle 2
-        X[18] = summary.dc_internal_resistance.iloc[1]
-        labels.append("internal_resistance_cycle_2")
-
-        # Internal resistance at cycle 100 - cycle 2
-        X[19] = (
-                summary.dc_internal_resistance.iloc[i_final] -
-                summary.dc_internal_resistance.iloc[1]
-        )
-        labels.append("internal_resistance_difference_cycles_2:100")
-
-        # Nominal capacity
-        end = self.hyperparameters["n_nominal_cycles"]
-        X[20] = np.median(summary.discharge_capacity.iloc[0: end])
-        labels.append("nominal_capacity_by_median")
-
-        X.columns = labels
-        self.features = X
-
-
-class TrajectoryFastCharge(DeltaQFastCharge):
-    """
-    Object corresponding to cycle numbers at which the capacity drops below
-     specific percentages of the initial capacity. Computed on the discharge
-     portion of the regular fast charge cycles.
-
-    """
-
-    DEFAULT_HYPERPARAMETERS = {
-        "thresh_max_cap": 0.98,
-        "thresh_min_cap": 0.78,
-        "interval_cap": 0.03
-    }
-
-    def validate(self):
-        """
-        This function determines if the input data has the necessary attributes for
-        creation of this feature class. It should test for all of the possible reasons
-        that feature generation would fail for this particular input data.
-
-        Args:
-            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
-            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-            gets featurized. These could be filters for column or row operations
-        Returns:
-            bool: True/False indication of ability to proceed with feature generation
-        """
-        cap = self.datapath.structured_summary.discharge_capacity
-        cap_ratio = cap.min() / cap.max()
-        max_cap = self.hyperparameters["thresh_max_cap"]
-        if not cap_ratio < max_cap:
-            return False, f"thresh_max_cap hyperparameter exceeded: {cap_ratio} !< {max_cap}"
-        else:
-            return True, None
-
-    def create_features(self):
-        """
-        Calculate the outcomes from the input data. In particular, the number of cycles
-        where we expect to reach certain thresholds of capacity loss
-        Args:
-            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
-            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-            gets featurized. These could be filters for column or row operations
-            parameters_path (str): Root directory storing project parameter files.
-
-        Returns:
-            pd.DataFrame: cycles at which capacity/energy degradation exceeds thresholds
-        """
-        y = self.datapath.capacities_to_cycles(
-            self.hyperparameters["thresh_max_cap"],
-            self.hyperparameters["thresh_min_cap"],
-            self.hyperparameters["interval_cap"],
-        )
-        self.features = y
-
-
-class DiagnosticProperties(BEEPFeaturizer):
-    """
-    This class stores fractional levels of degradation in discharge capacity and discharge energy
-    relative to the first cycle at each diagnostic cycle, grouped by diagnostic cycle type.
-
-        name (str): predictor object name.
-        X (pandas.DataFrame): features in DataFrame format.
-        metadata (dict): information about the conditions, data
-            and code used to produce features
-
-    Hyperparameters:
-        parameters_dir (str): Full path to directory of parameters to analyse the
-            diagnostic cycles
-        quantities ([str]): Quantities to extract/get fractional metrics for
-            diagnostic cycles
-        cycle_type (str): Type of diagnostic cycle being used to measure the
-            fractional metric
-        metric (str): The metric being used for fractional capacity
-        interpolation_axes (list): List of column names to use for
-            x_axis interpolation (distance to threshold)
-        threshold (float): Value for the fractional metric to be considered above
-            or below threshold
-        filter_kinks (float): If set, cutoff value for the second derivative of
-            the fractional metric (cells with an abrupt change in degradation
-            rate might have something else going on). Typical value might be 0.04
-        extrapolate_threshold (bool): Should threshold crossing point be
-            extrapolated for cells that have not yet reached the threshold
-            (warning: this uses a linear extrapolation from the last two
-            diagnostic cycles)
-    """
-    DEFAULT_HYPERPARAMETERS = {
-        "parameters_dir": PROTOCOL_PARAMETERS_DIR,
-        "quantities": ['discharge_energy', 'discharge_capacity'],
-        "threshold": 0.8,
-        "metric": "discharge_energy",
-        "filter_kinks": None,
-        "interpolation_axes": ["normalized_regular_throughput", "cycle_index"],
-        "cycle_type": "rpt_1C",
-        "extrapolate_threshold": True
-    }
-
-    def validate(self):
-        """
-        This function determines if the input data has the necessary attributes for
-        creation of this feature class. It should test for all of the possible reasons
-        that feature generation would fail for this particular input data.
-
-        Args:
-            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
-            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-            gets featurized. These could be filters for column or row operations
-        Returns:
-            bool: True/False indication of ability to proceed with feature generation
-        """
-        return featurizer_helpers.check_diagnostic_validation(self.datapath)
-
-    def create_features(self):
-        """
-        Generates diagnostic-property features from processed cycler run, including values for n*x method
-        Args:
-            self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run
-            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
-                gets featurized. These could be filters for column or row operations
-            parameters_path (str): Root directory storing project parameter files.
-
-        Returns:
-            pd.DataFrame: with "cycle_index", "fractional_metric", "x", "n", "cycle_type" and "metric" columns, rows
-            for each diagnostic cycle of the cell
-        """
-
-        parameters_path = self.hyperparameters["parameters_dir"]
-
-        cycle_types = self.datapath.diagnostic_summary.cycle_type.unique()
-        X = pd.DataFrame()
-        for quantity in self.hyperparameters["quantities"]:
-            for cycle_type in cycle_types:
-                summary_diag_cycle_type = featurizer_helpers.get_fractional_quantity_remaining_nx(
-                    self.datapath, quantity, cycle_type,
-                    parameters_path=parameters_path
-                )
-
-                summary_diag_cycle_type.loc[:, "cycle_type"] = cycle_type
-                summary_diag_cycle_type.loc[:, "metric"] = quantity
-                X = X.append(summary_diag_cycle_type)
-
-        X_condensed = self.get_threshold_targets(X)
-        self.features = X_condensed
-
-    def get_threshold_targets(self, df):
-        """
-        Apply a threshold via interpolation for determining various
-        metrics (e.g., discharge energy) from diagnostic cycles.
-
-        Args:
-            df (pd.DataFrame): A dataframe of diagnostic cycle data
-                for a single battery cycler run.
-
-        Returns:
-            (pd.DataFrame): Contains a vector for interpolated/intercept
-                data for determining threshold.
-
-        """
-        cycle_type = self.hyperparameters["cycle_type"]
-        metric = self.hyperparameters["metric"]
-        interpolation_axes = self.hyperparameters["interpolation_axes"]
-        threshold = self.hyperparameters["threshold"]
-        filter_kinks = self.hyperparameters["filter_kinks"]
-        extrapolate_threshold = self.hyperparameters["extrapolate_threshold"]
-
-        if filter_kinks:
-            if np.any(df['fractional_metric'].diff().diff() < filter_kinks):
-                last_good_cycle = df[
-                    df['fractional_metric'].diff().diff() < filter_kinks]['cycle_index'].min()
-                df = df[df['cycle_index'] < last_good_cycle]
-
-        x_axes = []
-        for type in interpolation_axes:
-            x_axes.append(df[type])
-        y_interpolation_axis = df['fractional_metric']
-
-        # Logic around how to deal with cells that have not crossed threshold
-        if df['fractional_metric'].min() > threshold and \
-                not extrapolate_threshold:
-            BEEPFeaturizationError(
-                "DiagnosticProperties data has not crossed threshold "
-                "and extrapolation inaccurate"
-            )
-        elif df['fractional_metric'].min() > threshold and \
-                extrapolate_threshold:
-            fill_value = "extrapolate"
-            bounds_error = False
-            x_linspaces = []
-            for x_axis in x_axes:
-                y1 = y_interpolation_axis.iloc[-2]
-                y2 = y_interpolation_axis.iloc[-1]
-                x1 = x_axis.iloc[-2]
-                x2 = x_axis.iloc[-1]
-                x_thresh_extrap = (threshold - 0.1 - y1) * (x2 - x1) / (
-                        y2 - y1) + x1
-                x_linspaces.append(
-                    np.linspace(x_axis.min(), x_thresh_extrap, num=1000)
-                )
-        else:
-            fill_value = np.nan
-            bounds_error = True
-            x_linspaces = []
-            for x_axis in x_axes:
-                x_linspaces.append(
-                    np.linspace(x_axis.min(), x_axis.max(), num=1000))
-
-        f_axis = []
-        for x_axis in x_axes:
-            f_axis.append(
-                interp1d(
-                    x_axis,
-                    y_interpolation_axis,
-                    kind='linear',
-                    bounds_error=bounds_error,
-                    fill_value=fill_value
-                )
-            )
-
-        x_to_threshold = []
-        for indx, x_linspace in enumerate(x_linspaces):
-            crossing_array = abs(f_axis[indx](x_linspace) - threshold)
-            x_to_threshold.append(x_linspace[np.argmin(crossing_array)])
-
-        if ~(x_to_threshold[0] > 0) or ~(x_to_threshold[1] > 0):
-            raise BEEPFeaturizationError(
-                "DiagnosticProperties data does not have a positive value "
-                "to threshold"
-            )
-
-        if "normalized_regular_throughput" in interpolation_axes:
-            real_throughput_to_threshold = x_to_threshold[
-                                               interpolation_axes.index(
-                                                   "normalized_regular_throughput")] * \
-                                           df[
-                                               'initial_regular_throughput'].values[
-                                               0]
-            x_to_threshold.append(real_throughput_to_threshold)
-            interpolation_axes = interpolation_axes + ["real_regular_throughput"]
-
-        threshold_dict = {
-            'initial_regular_throughput':
-                df['initial_regular_throughput'].values[0],
-        }
-
-        for indx, x_axis in enumerate(interpolation_axes):
-            threshold_dict[
-                cycle_type + metric + str(threshold) + '_' + x_axis] = [
-                x_to_threshold[indx]]
-
-        return pd.DataFrame(threshold_dict)
diff --git a/beep/features/early_cycles/__init__.py b/beep/features/early_cycles/__init__.py
new file mode 100644
index 00000000..520baf44
--- /dev/null
+++ b/beep/features/early_cycles/__init__.py
@@ -0,0 +1,14 @@
+"""
+Features for predicting cycling characteristics from early cycles.
+
+Typically these features are formed by looking at two or more early cycles,
+assessing the differences between features of these cycles, then relating
+that to the degradation characteristics of the battery. These features
+can then be used with any cycler file with few numbers of cycles to predict
+the total number of cycles before reaching certain degradation thresholds.
+"""
+
+from beep.features.early_cycles.delta_q import DeltaQFastCharge
+from beep.features.early_cycles.hppc import HPPCResistanceVoltage
+from beep.features.early_cycles.summary import DiagnosticSummaryStats, CycleSummaryStats
+from beep.features.early_cycles.targets import TrajectoryFastCharge, DiagnosticProperties
\ No newline at end of file
diff --git a/beep/features/early_cycles/delta_q.py b/beep/features/early_cycles/delta_q.py
new file mode 100644
index 00000000..64feaac9
--- /dev/null
+++ b/beep/features/early_cycles/delta_q.py
@@ -0,0 +1,183 @@
+import numpy as np
+import pandas as pd
+from scipy.stats import skew, kurtosis
+
+from beep.features.featurizer import BEEPEarlyCyclesFeaturizer
+
+
+class DeltaQFastCharge(BEEPEarlyCyclesFeaturizer):
+    """
+    Object corresponding to feature object. Includes constructors
+    to create the features, object names and metadata attributes in the
+    object
+        name (str): predictor object name.
+        X (pandas.DataFrame): features in DataFrame format.
+        metadata (dict): information about the conditions, data
+            and code used to produce features
+    """
+    DEFAULT_HYPERPARAMETERS = {
+        "init_pred_cycle": 10,
+        "mid_pred_cycle": 91,
+        "final_pred_cycle": 100,
+        "n_nominal_cycles": 40
+    }
+
+    def validate(self):
+        """
+        This function determines if the input data has the necessary attributes for
+        creation of this feature class. It should test for all of the possible reasons
+        that feature generation would fail for this particular input data.
+
+        Args:
+            self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+            gets featurized. These could be filters for column or row operations
+        Returns:
+            bool: True/False indication of ability to proceed with feature generation
+        """
+
+        if not self.datapath.structured_summary.index.max() > \
+               self.hyperparameters["final_pred_cycle"]:
+            return False, "Structured summary index max is less than final pred cycle"
+        elif not self.datapath.structured_summary.index.min() <= \
+                 self.hyperparameters["init_pred_cycle"]:
+            return False, "Structured summary index min is more than initial pred cycle"
+        elif "cycle_index" not in self.datapath.structured_summary.columns:
+            return False, "Structured summary missing critical data: 'cycle_index'"
+        elif "cycle_index" not in self.datapath.structured_data.columns:
+            return False, "Structured data missing critical data: 'cycle_index'"
+        elif not self.hyperparameters["mid_pred_cycle"] > 10:
+            return False, "Middle pred. cycle less than threshold value of 10"
+        elif not self.hyperparameters["final_pred_cycle"] > \
+                 self.hyperparameters["mid_pred_cycle"]:
+            return False, "Final pred cycle less than middle pred cycle"
+        else:
+            return True, None
+
+    def create_features(self):
+        """
+        """
+        i_final = self.hyperparameters[
+                      "final_pred_cycle"] - 1  # python indexing
+        i_mid = self.hyperparameters["mid_pred_cycle"] - 1
+
+        summary = self.datapath.structured_summary
+        self.hyperparameters[
+            "n_nominal_cycles"
+        ] = 40  # For nominal capacity, use median discharge capacity of first n cycles
+
+        if "step_type" in self.datapath.structured_data.columns:
+            interpolated_df = self.datapath.structured_data[
+                self.datapath.structured_data.step_type == "discharge"
+                ]
+        else:
+            interpolated_df = self.datapath.structured_data
+        X = pd.DataFrame(np.zeros((1, 20)))
+        labels = []
+        # Discharge capacity, cycle 2 = Q(n=2)
+        X[0] = summary.discharge_capacity.iloc[1]
+        labels.append("discharge_capacity_cycle_2")
+
+        # Max discharge capacity - discharge capacity, cycle 2 = max_n(Q(n)) - Q(n=2)
+        X[1] = max(
+            summary.discharge_capacity.iloc[np.arange(i_final + 1)]
+            - summary.discharge_capacity.iloc[1]
+        )
+        labels.append("max_discharge_capacity_difference")
+
+        # Discharge capacity, cycle 100 = Q(n=100)
+        X[2] = summary.discharge_capacity.iloc[i_final]
+        labels.append("discharge_capacity_cycle_100")
+
+        # Feature representing time-temperature integral over cycles 2 to 100
+        X[3] = np.nansum(
+            summary.time_temperature_integrated.iloc[np.arange(i_final + 1)])
+        labels.append("integrated_time_temperature_cycles_1:100")
+
+        # Mean of charge times of first 5 cycles
+        X[4] = np.nanmean(summary.charge_duration.iloc[1:6])
+        labels.append("charge_time_cycles_1:5")
+
+        # Descriptors based on capacity loss between cycles 10 and 100.
+        Qd_final = interpolated_df.discharge_capacity[
+            interpolated_df.cycle_index == i_final
+            ]
+        Qd_10 = interpolated_df.discharge_capacity[
+            interpolated_df.cycle_index == 9]
+
+        Qd_diff = Qd_final.values - Qd_10.values
+
+        # If DeltaQ(V) is not an empty array, compute summary stats, else initialize with np.nan
+        # Cells discharged rapidly over a narrow voltage window run into have no interpolated discharge steps
+        if len(Qd_diff):
+            X[5] = np.log10(np.abs(np.nanmin(Qd_diff)))  # Minimum
+            X[6] = np.log10(np.abs(np.nanmean(Qd_diff)))  # Mean
+            X[7] = np.log10(np.abs(np.nanvar(Qd_diff)))  # Variance
+            X[8] = np.log10(np.abs(skew(Qd_diff)))  # Skewness
+            X[9] = np.log10(np.abs(kurtosis(Qd_diff)))  # Kurtosis
+            X[10] = np.log10(np.abs(Qd_diff[0]))  # First difference
+        else:
+            X[5:11] = np.nan
+
+        labels.append("abs_min_discharge_capacity_difference_cycles_2:100")
+        labels.append("abs_mean_discharge_capacity_difference_cycles_2:100")
+        labels.append("abs_variance_discharge_capacity_difference_cycles_2:100")
+        labels.append("abs_skew_discharge_capacity_difference_cycles_2:100")
+        labels.append("abs_kurtosis_discharge_capacity_difference_cycles_2:100")
+        labels.append("abs_first_discharge_capacity_difference_cycles_2:100")
+
+        X[11] = np.max(summary.temperature_maximum.iloc[
+                           list(range(1, i_final + 1))])  # Max T
+        labels.append("max_temperature_cycles_1:100")
+
+        X[12] = np.min(summary.temperature_minimum.iloc[
+                           list(range(1, i_final + 1))])  # Min T
+        labels.append("min_temperature_cycles_1:100")
+
+        # Slope and intercept of linear fit to discharge capacity as a fn of cycle #, cycles 2 to 100
+
+        X[13], X[14] = np.polyfit(
+            list(range(1, i_final + 1)),
+            summary.discharge_capacity.iloc[list(range(1, i_final + 1))],
+            1,
+        )
+
+        labels.append("slope_discharge_capacity_cycle_number_2:100")
+        labels.append("intercept_discharge_capacity_cycle_number_2:100")
+
+        # Slope and intercept of linear fit to discharge capacity as a fn of cycle #, cycles 91 to 100
+        X[15], X[16] = np.polyfit(
+            list(range(i_mid, i_final + 1)),
+            summary.discharge_capacity.iloc[list(range(i_mid, i_final + 1))],
+            1,
+        )
+        labels.append("slope_discharge_capacity_cycle_number_91:100")
+        labels.append("intercept_discharge_capacity_cycle_number_91:100")
+
+        IR_trend = summary.dc_internal_resistance.iloc[
+            list(range(1, i_final + 1))]
+        if any(v == 0 for v in IR_trend):
+            IR_trend[IR_trend == 0] = np.nan
+
+        # Internal resistance minimum
+        X[17] = np.nanmin(IR_trend)
+        labels.append("min_internal_resistance_cycles_2:100")
+
+        # Internal resistance at cycle 2
+        X[18] = summary.dc_internal_resistance.iloc[1]
+        labels.append("internal_resistance_cycle_2")
+
+        # Internal resistance at cycle 100 - cycle 2
+        X[19] = (
+                summary.dc_internal_resistance.iloc[i_final] -
+                summary.dc_internal_resistance.iloc[1]
+        )
+        labels.append("internal_resistance_difference_cycles_2:100")
+
+        # Nominal capacity
+        end = self.hyperparameters["n_nominal_cycles"]
+        X[20] = np.median(summary.discharge_capacity.iloc[0: end])
+        labels.append("nominal_capacity_by_median")
+
+        X.columns = labels
+        self.features = X
\ No newline at end of file
diff --git a/beep/features/early_cycles/hppc.py b/beep/features/early_cycles/hppc.py
new file mode 100644
index 00000000..23c864f9
--- /dev/null
+++ b/beep/features/early_cycles/hppc.py
@@ -0,0 +1,83 @@
+import pandas as pd
+
+from beep import PROTOCOL_PARAMETERS_DIR
+from beep.features import helper_functions
+from beep.features.featurizer import BEEPEarlyCyclesFeaturizer
+
+
+class HPPCResistanceVoltageFeatures(BEEPEarlyCyclesFeaturizer):
+    DEFAULT_HYPERPARAMETERS = {
+        "test_time_filter_sec": 1000000,
+        "cycle_index_filter": 6,
+        "diag_pos": 1,
+        "soc_window": 8,
+        "parameters_path": PROTOCOL_PARAMETERS_DIR
+    }
+
+    def validate(self):
+        val, msg = helper_functions.check_diagnostic_validation(self.datapath)
+        if val:
+            conditions = []
+            conditions.append(
+                any(
+                    [
+                        "hppc" in x
+                        for x in
+                        self.datapath.diagnostic_summary.cycle_type.unique()
+                    ]
+                )
+            )
+            if all(conditions):
+                return True, None
+            else:
+                return False, "HPPC conditions not met for this cycler run"
+        else:
+            return val, msg
+
+    def create_features(self):
+        # Filter out low cycle numbers at the end of the test, corresponding to the "final" diagnostic
+        self.datapath.diagnostic_data = self.datapath.diagnostic_data[
+            ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[
+                'test_time_filter_sec']) &
+              (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[
+                  'cycle_index_filter']))
+        ]
+        self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby(
+            ["cycle_index", "step_index", "step_index_counter"]
+        ).filter(lambda x: ~x["test_time"].isnull().all())
+
+        # diffusion features
+        diffusion_features = helper_functions.get_diffusion_features(
+            self.datapath,
+        )
+
+        hppc_r = pd.DataFrame()
+        # the 9 by 6 dataframe
+        df_dr = helper_functions.get_dr_df(
+            self.datapath, self.hyperparameters["diag_pos"]
+        )
+        # transform this dataframe to be 1 by 54
+        columns = df_dr.columns
+        for column in columns:
+            for r in range(len(df_dr[column])):
+                name = column + str(r)
+                hppc_r[name] = [df_dr[column][r]]
+
+        # the variance of ocv features
+        hppc_ocv = helper_functions.get_hppc_ocv(
+            self.datapath,
+            self.hyperparameters["diag_pos"],
+            parameters_path=self.hyperparameters["parameters_path"]
+        )
+
+        # the v_diff features
+        v_diff = helper_functions.get_v_diff(
+            self.datapath,
+            self.hyperparameters["diag_pos"],
+            self.hyperparameters["soc_window"],
+            self.hyperparameters["parameters_path"]
+        )
+
+        # merge everything together as a final result dataframe
+        self.features = pd.concat(
+            [hppc_r, hppc_ocv, v_diff, diffusion_features], axis=1)
\ No newline at end of file
diff --git a/beep/features/early_cycles/summary.py b/beep/features/early_cycles/summary.py
new file mode 100644
index 00000000..e17ceea6
--- /dev/null
+++ b/beep/features/early_cycles/summary.py
@@ -0,0 +1,393 @@
+import numpy as np
+import pandas as pd
+from scipy.stats import skew, kurtosis
+
+from beep.features import helper_functions
+from beep import PROTOCOL_PARAMETERS_DIR
+from beep.features.featurizer import BEEPEarlyCyclesFeaturizer
+
+
+class CycleSummaryStats(BEEPEarlyCyclesFeaturizer):
+    DEFAULT_HYPERPARAMETERS = {
+        "cycle_comp_num": [10, 100],
+        "statistics": ["var", "min", "mean", "skew", "kurtosis", "abs",
+                       "square"]
+    }
+
+    def validate(self):
+        """
+        This function determines if the input data has the necessary attributes for
+        creation of this feature class. It should test for all of the possible reasons
+        that feature generation would fail for this particular input data.
+
+        Args:
+            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+                gets featurized. These could be filters for column or row operations
+        Returns:
+            bool: True/False indication of ability to proceed with feature generation
+        """
+
+        # TODO: not sure this is necessary
+        # Check for data in each of the selected cycles
+        index_1, index_2 = self.hyperparameters['cycle_comp_num']
+        cycle_1 = self.datapath.structured_data[
+            self.datapath.structured_data.cycle_index == index_1]
+        cycle_2 = self.datapath.structured_data[
+            self.datapath.structured_data.cycle_index == index_2]
+        if len(cycle_1) == 0 or len(cycle_2) == 0:
+            return False, "Length of one or more comparison cycles is zero"
+
+        # TODO: check whether this is good
+        # Check for relevant data
+        required_columns = [
+            'charge_capacity',
+            'discharge_capacity',
+            'charge_energy',
+            'discharge_energy',
+        ]
+        pcycler_run_columns = self.datapath.structured_data.columns
+        if not all(
+                [column in pcycler_run_columns for column in required_columns]):
+            return False, f"Required column not present in all structured data " \
+                          f"(must have all of: {required_columns})"
+
+        return True, None
+
+    def create_features(self):
+        """
+        Generate features listed in early prediction manuscript using both diagnostic and regular cycles
+
+        Args:
+            processed_cycler_run (beep.structure.ProcessedCyclerRun)
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+                gets featurized. These could be filters for column or row operations
+            parameters_path (str): Root directory storing project parameter files.
+
+        Returns:
+            X (pd.DataFrame): Dataframe containing the feature
+        """
+
+        # TODO: extend this dataframe and uncomment energy features when
+        #   structuring is refactored
+        X = pd.DataFrame(np.zeros((1, 28)))
+
+        reg_cycle_comp_num = self.hyperparameters.get("cycle_comp_num")
+        cycle_comp_1 = self.datapath.structured_data[
+            self.datapath.structured_data.cycle_index == reg_cycle_comp_num[1]
+            ]
+        cycle_comp_0 = self.datapath.structured_data[
+            self.datapath.structured_data.cycle_index == reg_cycle_comp_num[0]
+            ]
+        Qc100_1 = cycle_comp_1[
+            cycle_comp_1.step_type == "charge"].charge_capacity
+        Qc10_1 = cycle_comp_0[
+            cycle_comp_0.step_type == "charge"].charge_capacity
+        QcDiff = Qc100_1.values - Qc10_1.values
+        QcDiff = QcDiff[~np.isnan(QcDiff)]
+
+        X.loc[0, 0:6] = self.get_summary_statistics(QcDiff)
+
+        Qd100_1 = cycle_comp_1[
+            cycle_comp_1.step_type == "discharge"].discharge_capacity
+        Qd10_1 = cycle_comp_0[
+            cycle_comp_0.step_type == "discharge"].discharge_capacity
+        QdDiff = Qd100_1.values - Qd10_1.values
+        QdDiff = QdDiff[~np.isnan(QdDiff)]
+
+        X.loc[0, 7:13] = self.get_summary_statistics(QdDiff)
+
+        # # Charging Energy features
+        Ec100_1 = cycle_comp_1[cycle_comp_1.step_type == "charge"].charge_energy
+        Ec10_1 = cycle_comp_0[cycle_comp_0.step_type == "charge"].charge_energy
+        EcDiff = Ec100_1.values - Ec10_1.values
+        EcDiff = EcDiff[~np.isnan(EcDiff)]
+
+        X.loc[0, 14:20] = self.get_summary_statistics(EcDiff)
+
+        # # Discharging Energy features
+        Ed100_1 = cycle_comp_1[
+            cycle_comp_1.step_type == "charge"].discharge_energy
+        Ed10_1 = cycle_comp_0[
+            cycle_comp_0.step_type == "charge"].discharge_energy
+        EdDiff = Ed100_1.values - Ed10_1.values
+        EdDiff = EdDiff[~np.isnan(EdDiff)]
+
+        X.loc[0, 21:27] = self.get_summary_statistics(EdDiff)
+
+        quantities = [
+            "charging_capacity",
+            "discharging_capacity",
+            "charging_energy",
+            "discharging_energy",
+        ]
+
+        X.columns = [y + "_" + x for x in quantities for y in
+                     self.hyperparameters["statistics"]]
+
+        self.features = X
+
+    def get_summary_statistics(self, array):
+        """
+        Static method for getting values corresponding
+        to standard 7 operations that many beep features
+        use, i.e. log of absolute value of each of
+        variance, min, mean, skew, kurtosis, the sum of
+        the absolute values and the sum of squares
+
+        Args:
+            array (list, np.ndarray): array of values to get
+                standard operation values for, e.g. cycle
+                discharging capacity, QcDiff, etc.
+
+        Returns:
+            [float]: list of features
+
+        """
+
+        stats_names = self.hyperparameters["statistics"]
+        supported_stats = self.DEFAULT_HYPERPARAMETERS["statistics"]
+
+        if any(s not in supported_stats for s in stats_names):
+            raise ValueError(
+                f"Unsupported statistics in {stats_names}: supported statistics are {supported_stats}")
+
+        stats = []
+
+        if "var" in stats_names:
+            stats.append(np.log10(np.absolute(np.var(array))))
+        if "min" in stats_names:
+            stats.append(np.log10(np.absolute(min(array))))
+        if "mean" in stats_names:
+            stats.append(np.log10(np.absolute(np.mean(array))))
+        if "skew" in stats_names:
+            stats.append(np.log10(np.absolute(skew(array))))
+        if "kurtosis" in stats_names:
+            stats.append(np.log10(
+                np.absolute(kurtosis(array, fisher=False, bias=False))))
+        if "abs" in stats_names:
+            stats.append(np.log10(np.sum(np.absolute(array))))
+        if "square" in stats_names:
+            stats.append(np.log10(np.sum(np.square(array))))
+
+        return np.asarray(stats)
+
+
+class DiagnosticSummaryStats(CycleSummaryStats):
+    """
+    Object corresponding to summary statistics from a diagnostic cycle of
+    specific type. Includes constructors to create the features, object names
+    and metadata attributes in the object.  Inherits from RegularCycleSummaryStats
+    to reuse standard feature generation
+
+    name (str): predictor object name.
+    X (pandas.DataFrame): features in DataFrame format.
+    metadata (dict): information about the conditions, data
+        and code used to produce features
+    """
+    DEFAULT_HYPERPARAMETERS = {
+        "test_time_filter_sec": 1000000,
+        "cycle_index_filter": 6,
+        "diagnostic_cycle_type": 'rpt_0.2C',
+        "diag_pos_list": [0, 1],
+        "statistics": ["var", "min", "mean", "skew", "kurtosis", "abs",
+                       "square"],
+        "parameters_path": PROTOCOL_PARAMETERS_DIR
+    }
+
+    def validate(self):
+        """
+        This function determines if the input data has the necessary attributes for
+        creation of this feature class. It should test for all of the possible reasons
+        that feature generation would fail for this particular input data.
+
+        Args:
+            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+                gets featurized. These could be filters for column or row operations
+
+        Returns:
+            bool: True/False indication of ability to proceed with feature generation
+        """
+        val, msg = helper_functions.check_diagnostic_validation(self.datapath)
+        if val:
+            df = self.datapath.diagnostic_summary
+            df = df[
+                df.cycle_type == self.hyperparameters["diagnostic_cycle_type"]]
+            if df.cycle_index.nunique() >= max(
+                    self.hyperparameters["diag_pos_list"]) + 1:
+                return True, None
+            else:
+                return False, "Diagnostic cycles insufficient for featurization"
+        else:
+            return val, msg
+
+    def get_summary_diff(
+            self,
+            pos=None,
+            cycle_types=("rpt_0.2C", "rpt_1C", "rpt_2C"),
+            metrics=(
+                    "discharge_capacity", "discharge_energy", "charge_capacity",
+                    "charge_energy")
+    ):
+        """
+        Helper function to calculate difference between summary values in the diagnostic cycles
+
+                Args:
+                    processed_cycler_run (beep.structure.ProcessedCyclerRun)
+                    pos (list): position of the diagnostics to use in the calculation
+                    cycle_types (list): calculate difference for these diagnostic types
+                    metrics (str): Calculate difference for these metrics
+
+                Returns:
+                    values (list): List of difference values to insert into the dataframe
+                    names (list): List of column headers to use in the creation of the dataframe
+                """
+        pos = self.hyperparameters["diag_pos_list"] if not pos else pos
+
+        values = []
+        names = []
+        for cycle_type in cycle_types:
+            diag_type_summary = self.datapath.diagnostic_summary[
+                self.datapath.diagnostic_summary.cycle_type == cycle_type]
+            for metric in metrics:
+                diff = (diag_type_summary.iloc[pos[1]][metric] -
+                        diag_type_summary.iloc[pos[0]][metric]) \
+                       / diag_type_summary.iloc[pos[0]][metric]
+                values.append(diff)
+                names.append("diag_sum_diff_" + str(pos[0]) + "_" + str(
+                    pos[1]) + "_" + cycle_type + metric)
+        return values, names
+
+    def create_features(self):
+        """
+        Generate features listed in early prediction manuscript using both diagnostic and regular cycles
+
+        Args:
+            self.datapathn (beep.structure.ProcessedCyclerRun)
+            self.hyperparameters (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+                gets featurized. These could be filters for column or row operations
+            parameters_path (str): Root directory storing project parameter files.
+
+        Returns:
+            X (pd.DataFrame): Dataframe containing the feature
+        """
+        # Filter out "final" diagnostic cycles that have been appended to the end of the file with the wrong
+        # cycle number(test time is monotonic)
+        self.datapath.diagnostic_data = self.datapath.diagnostic_data[
+            ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[
+                'test_time_filter_sec']) &
+              (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[
+                  'cycle_index_filter']))
+        ]
+        self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby(
+            ["cycle_index", "step_index", "step_index_counter"]
+        ).filter(lambda x: ~x["test_time"].isnull().all())
+
+        diag_intrp = self.datapath.diagnostic_data
+
+        X = pd.DataFrame(np.zeros((1, 54)))
+
+        # Calculate the cycles and the steps for the selected diagnostics
+        cycles = diag_intrp.cycle_index[diag_intrp.cycle_type ==
+                                        self.hyperparameters[
+                                            "diagnostic_cycle_type"]].unique()
+        step_dict_0 = helper_functions.get_step_index(
+            self.datapath,
+            cycle_type=self.hyperparameters["diagnostic_cycle_type"],
+            diag_pos=self.hyperparameters["diag_pos_list"][0],
+            parameters_path=self.hyperparameters["parameters_path"]
+        )
+        step_dict_1 = helper_functions.get_step_index(
+            self.datapath,
+            cycle_type=self.hyperparameters["diagnostic_cycle_type"],
+            diag_pos=self.hyperparameters["diag_pos_list"][1],
+            parameters_path=self.hyperparameters["parameters_path"]
+        )
+
+        # Create masks for each position in the data
+        mask_pos_0_charge = ((diag_intrp.cycle_index == cycles[
+            self.hyperparameters["diag_pos_list"][0]]) &
+                             (diag_intrp.step_index == step_dict_0[
+                                 self.hyperparameters[
+                                     "diagnostic_cycle_type"] + '_charge']))
+        mask_pos_1_charge = ((diag_intrp.cycle_index == cycles[
+            self.hyperparameters["diag_pos_list"][1]]) &
+                             (diag_intrp.step_index == step_dict_1[
+                                 self.hyperparameters[
+                                     "diagnostic_cycle_type"] + '_charge']))
+        mask_pos_0_discharge = ((diag_intrp.cycle_index == cycles[
+            self.hyperparameters["diag_pos_list"][0]]) &
+                                (diag_intrp.step_index ==
+                                 step_dict_0[self.hyperparameters[
+                                                 "diagnostic_cycle_type"] + '_discharge']))
+        mask_pos_1_discharge = ((diag_intrp.cycle_index == cycles[
+            self.hyperparameters["diag_pos_list"][1]]) &
+                                (diag_intrp.step_index ==
+                                 step_dict_1[self.hyperparameters[
+                                                 "diagnostic_cycle_type"] + '_discharge']))
+
+        # Charging Capacity features
+        Qc_1 = diag_intrp.charge_capacity[mask_pos_1_charge]
+        Qc_0 = diag_intrp.charge_capacity[mask_pos_0_charge]
+        QcDiff = Qc_1.values - Qc_0.values
+        QcDiff = QcDiff[~np.isnan(QcDiff)]
+
+        X.loc[0, 0:6] = self.get_summary_statistics(QcDiff)
+
+        # Discharging Capacity features
+        Qd_1 = diag_intrp.discharge_capacity[mask_pos_1_discharge]
+        Qd_0 = diag_intrp.discharge_capacity[mask_pos_0_discharge]
+        QdDiff = Qd_1.values - Qd_0.values
+        QdDiff = QdDiff[~np.isnan(QdDiff)]
+
+        X.loc[0, 7:13] = self.get_summary_statistics(QdDiff)
+
+        # Charging Energy features
+        Ec_1 = diag_intrp.charge_energy[mask_pos_1_charge]
+        Ec_0 = diag_intrp.charge_energy[mask_pos_0_charge]
+        EcDiff = Ec_1.values - Ec_0.values
+        EcDiff = EcDiff[~np.isnan(EcDiff)]
+
+        X.loc[0, 14:20] = self.get_summary_statistics(EcDiff)
+
+        # Discharging Energy features
+        Ed_1 = diag_intrp.discharge_energy[mask_pos_1_discharge]
+        Ed_0 = diag_intrp.discharge_energy[mask_pos_0_discharge]
+        EdDiff = Ed_1.values - Ed_0.values
+        EdDiff = EdDiff[~np.isnan(EdDiff)]
+
+        X.loc[0, 21:27] = self.get_summary_statistics(EdDiff)
+
+        # Charging dQdV features
+        dQdVc_1 = diag_intrp.charge_dQdV[mask_pos_1_charge]
+        dQdVc_0 = diag_intrp.charge_dQdV[mask_pos_0_charge]
+        dQdVcDiff = dQdVc_1.values - dQdVc_0.values
+        dQdVcDiff = dQdVcDiff[~np.isnan(dQdVcDiff)]
+
+        X.loc[0, 28:34] = self.get_summary_statistics(dQdVcDiff)
+
+        # Discharging Capacity features
+        dQdVd_1 = diag_intrp.discharge_dQdV[mask_pos_1_discharge]
+        dQdVd_0 = diag_intrp.discharge_dQdV[mask_pos_0_discharge]
+        dQdVdDiff = dQdVd_1.values - dQdVd_0.values
+        dQdVdDiff = dQdVdDiff[~np.isnan(dQdVdDiff)]
+
+        X.loc[0, 35:41] = self.get_summary_statistics(dQdVdDiff)
+
+        X.loc[0, 42:53], names = self.get_summary_diff(
+            self.hyperparameters["diag_pos_list"]
+        )
+
+        quantities = [
+            "charging_capacity",
+            "discharging_capacity",
+            "charging_energy",
+            "discharging_energy",
+            "charging_dQdV",
+            "discharging_dQdV",
+        ]
+
+        X.columns = [y + "_" + x for x in quantities for y in
+                     self.hyperparameters["statistics"]] + names
+        self.features = X
diff --git a/beep/features/early_cycles/targets.py b/beep/features/early_cycles/targets.py
new file mode 100644
index 00000000..aecd7e80
--- /dev/null
+++ b/beep/features/early_cycles/targets.py
@@ -0,0 +1,262 @@
+import numpy as np
+import pandas as pd
+from scipy.interpolate import interp1d
+
+from beep import PROTOCOL_PARAMETERS_DIR
+from beep.features import helper_functions
+
+from beep.features.featurizer import BEEPFeaturizer, BEEPFeaturizationError
+
+
+class TrajectoryFastCharge(BEEPFeaturizer):
+    """
+    Object corresponding to cycle numbers at which the capacity drops below
+     specific percentages of the initial capacity. Computed on the discharge
+     portion of the regular fast charge cycles.
+
+    """
+
+    DEFAULT_HYPERPARAMETERS = {
+        "thresh_max_cap": 0.98,
+        "thresh_min_cap": 0.78,
+        "interval_cap": 0.03
+    }
+
+    def validate(self):
+        """
+        This function determines if the input data has the necessary attributes for
+        creation of this feature class. It should test for all of the possible reasons
+        that feature generation would fail for this particular input data.
+
+        Args:
+            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+            gets featurized. These could be filters for column or row operations
+        Returns:
+            bool: True/False indication of ability to proceed with feature generation
+        """
+        cap = self.datapath.structured_summary.discharge_capacity
+        cap_ratio = cap.min() / cap.max()
+        max_cap = self.hyperparameters["thresh_max_cap"]
+        if not cap_ratio < max_cap:
+            return False, f"thresh_max_cap hyperparameter exceeded: {cap_ratio} !< {max_cap}"
+        else:
+            return True, None
+
+    def create_features(self):
+        """
+        Calculate the outcomes from the input data. In particular, the number of cycles
+        where we expect to reach certain thresholds of capacity loss
+        Args:
+            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+            gets featurized. These could be filters for column or row operations
+            parameters_path (str): Root directory storing project parameter files.
+
+        Returns:
+            pd.DataFrame: cycles at which capacity/energy degradation exceeds thresholds
+        """
+        y = self.datapath.capacities_to_cycles(
+            self.hyperparameters["thresh_max_cap"],
+            self.hyperparameters["thresh_min_cap"],
+            self.hyperparameters["interval_cap"],
+        )
+        self.features = y
+
+
+class DiagnosticProperties(BEEPFeaturizer):
+    """
+    This class stores fractional levels of degradation in discharge capacity and discharge energy
+    relative to the first cycle at each diagnostic cycle, grouped by diagnostic cycle type.
+
+        name (str): predictor object name.
+        X (pandas.DataFrame): features in DataFrame format.
+        metadata (dict): information about the conditions, data
+            and code used to produce features
+
+    Hyperparameters:
+        parameters_dir (str): Full path to directory of parameters to analyse the
+            diagnostic cycles
+        quantities ([str]): Quantities to extract/get fractional metrics for
+            diagnostic cycles
+        cycle_type (str): Type of diagnostic cycle being used to measure the
+            fractional metric
+        metric (str): The metric being used for fractional capacity
+        interpolation_axes (list): List of column names to use for
+            x_axis interpolation (distance to threshold)
+        threshold (float): Value for the fractional metric to be considered above
+            or below threshold
+        filter_kinks (float): If set, cutoff value for the second derivative of
+            the fractional metric (cells with an abrupt change in degradation
+            rate might have something else going on). Typical value might be 0.04
+        extrapolate_threshold (bool): Should threshold crossing point be
+            extrapolated for cells that have not yet reached the threshold
+            (warning: this uses a linear extrapolation from the last two
+            diagnostic cycles)
+    """
+    DEFAULT_HYPERPARAMETERS = {
+        "parameters_dir": PROTOCOL_PARAMETERS_DIR,
+        "quantities": ['discharge_energy', 'discharge_capacity'],
+        "threshold": 0.8,
+        "metric": "discharge_energy",
+        "filter_kinks": None,
+        "interpolation_axes": ["normalized_regular_throughput", "cycle_index"],
+        "cycle_type": "rpt_1C",
+        "extrapolate_threshold": True
+    }
+
+    def validate(self):
+        """
+        This function determines if the input data has the necessary attributes for
+        creation of this feature class. It should test for all of the possible reasons
+        that feature generation would fail for this particular input data.
+
+        Args:
+            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+            gets featurized. These could be filters for column or row operations
+        Returns:
+            bool: True/False indication of ability to proceed with feature generation
+        """
+        return helper_functions.check_diagnostic_validation(self.datapath)
+
+    def create_features(self):
+        """
+        Generates diagnostic-property features from processed cycler run, including values for n*x method
+        Args:
+            self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+                gets featurized. These could be filters for column or row operations
+            parameters_path (str): Root directory storing project parameter files.
+
+        Returns:
+            pd.DataFrame: with "cycle_index", "fractional_metric", "x", "n", "cycle_type" and "metric" columns, rows
+            for each diagnostic cycle of the cell
+        """
+
+        parameters_path = self.hyperparameters["parameters_dir"]
+
+        cycle_types = self.datapath.diagnostic_summary.cycle_type.unique()
+        X = pd.DataFrame()
+        for quantity in self.hyperparameters["quantities"]:
+            for cycle_type in cycle_types:
+                summary_diag_cycle_type = helper_functions.get_fractional_quantity_remaining_nx(
+                    self.datapath, quantity, cycle_type,
+                    parameters_path=parameters_path
+                )
+
+                summary_diag_cycle_type.loc[:, "cycle_type"] = cycle_type
+                summary_diag_cycle_type.loc[:, "metric"] = quantity
+                X = X.append(summary_diag_cycle_type)
+
+        X_condensed = self.get_threshold_targets(X)
+        self.features = X_condensed
+
+    def get_threshold_targets(self, df):
+        """
+        Apply a threshold via interpolation for determining various
+        metrics (e.g., discharge energy) from diagnostic cycles.
+
+        Args:
+            df (pd.DataFrame): A dataframe of diagnostic cycle data
+                for a single battery cycler run.
+
+        Returns:
+            (pd.DataFrame): Contains a vector for interpolated/intercept
+                data for determining threshold.
+
+        """
+        cycle_type = self.hyperparameters["cycle_type"]
+        metric = self.hyperparameters["metric"]
+        interpolation_axes = self.hyperparameters["interpolation_axes"]
+        threshold = self.hyperparameters["threshold"]
+        filter_kinks = self.hyperparameters["filter_kinks"]
+        extrapolate_threshold = self.hyperparameters["extrapolate_threshold"]
+
+        if filter_kinks:
+            if np.any(df['fractional_metric'].diff().diff() < filter_kinks):
+                last_good_cycle = df[
+                    df['fractional_metric'].diff().diff() < filter_kinks][
+                    'cycle_index'].min()
+                df = df[df['cycle_index'] < last_good_cycle]
+
+        x_axes = []
+        for type in interpolation_axes:
+            x_axes.append(df[type])
+        y_interpolation_axis = df['fractional_metric']
+
+        # Logic around how to deal with cells that have not crossed threshold
+        if df['fractional_metric'].min() > threshold and \
+                not extrapolate_threshold:
+            BEEPFeaturizationError(
+                "DiagnosticProperties data has not crossed threshold "
+                "and extrapolation inaccurate"
+            )
+        elif df['fractional_metric'].min() > threshold and \
+                extrapolate_threshold:
+            fill_value = "extrapolate"
+            bounds_error = False
+            x_linspaces = []
+            for x_axis in x_axes:
+                y1 = y_interpolation_axis.iloc[-2]
+                y2 = y_interpolation_axis.iloc[-1]
+                x1 = x_axis.iloc[-2]
+                x2 = x_axis.iloc[-1]
+                x_thresh_extrap = (threshold - 0.1 - y1) * (x2 - x1) / (
+                        y2 - y1) + x1
+                x_linspaces.append(
+                    np.linspace(x_axis.min(), x_thresh_extrap, num=1000)
+                )
+        else:
+            fill_value = np.nan
+            bounds_error = True
+            x_linspaces = []
+            for x_axis in x_axes:
+                x_linspaces.append(
+                    np.linspace(x_axis.min(), x_axis.max(), num=1000))
+
+        f_axis = []
+        for x_axis in x_axes:
+            f_axis.append(
+                interp1d(
+                    x_axis,
+                    y_interpolation_axis,
+                    kind='linear',
+                    bounds_error=bounds_error,
+                    fill_value=fill_value
+                )
+            )
+
+        x_to_threshold = []
+        for indx, x_linspace in enumerate(x_linspaces):
+            crossing_array = abs(f_axis[indx](x_linspace) - threshold)
+            x_to_threshold.append(x_linspace[np.argmin(crossing_array)])
+
+        if ~(x_to_threshold[0] > 0) or ~(x_to_threshold[1] > 0):
+            raise BEEPFeaturizationError(
+                "DiagnosticProperties data does not have a positive value "
+                "to threshold"
+            )
+
+        if "normalized_regular_throughput" in interpolation_axes:
+            real_throughput_to_threshold = x_to_threshold[
+                                               interpolation_axes.index(
+                                                   "normalized_regular_throughput")] * \
+                                           df[
+                                               'initial_regular_throughput'].values[
+                                               0]
+            x_to_threshold.append(real_throughput_to_threshold)
+            interpolation_axes = interpolation_axes + [
+                "real_regular_throughput"]
+
+        threshold_dict = {
+            'initial_regular_throughput':
+                df['initial_regular_throughput'].values[0],
+        }
+
+        for indx, x_axis in enumerate(interpolation_axes):
+            threshold_dict[
+                cycle_type + metric + str(threshold) + '_' + x_axis] = [
+                x_to_threshold[indx]]
+
+        return pd.DataFrame(threshold_dict)
\ No newline at end of file
diff --git a/beep/features/featurizer.py b/beep/features/featurizer.py
new file mode 100644
index 00000000..2fb1f181
--- /dev/null
+++ b/beep/features/featurizer.py
@@ -0,0 +1,233 @@
+# Copyright [2020] [Toyota Research Institute]
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+For creating features and organizing them into datasets.
+
+"""
+import os
+import abc
+import json
+from typing import Union, Tuple
+
+import pandas as pd
+from monty.io import zopen
+from monty.json import MSONable
+from monty.serialization import dumpfn
+
+from beep.structure.base import BEEPDatapath
+
+
+class BEEPFeaturizationError(BaseException):
+    """Raise when a featurization-specific error occurs"""
+    pass
+
+
+class BEEPFeaturizer(MSONable, abc.ABC):
+    """
+    Base class for all beep feature generation.
+
+    From a structured battery file representing many cycles of one cell,
+    (AKA a structured datapath), produce a feature vector.
+
+    Works for generating both
+     - Vectors X to use as training vectors
+     - Vectors or scalars y to use as ML targets
+        (as problems may have multiple metrics to predict)
+
+    """
+
+    DEFAULT_HYPERPARAMETERS = {}
+
+    def __init__(self, structured_datapath: Union[BEEPDatapath, None],
+                 hyperparameters: Union[dict, None] = None):
+        # If all required hyperparameters are specified, use those
+        # If some subset of required hyperparameters are specified, throw error
+        # If no hyperparameters are specified, use defaults
+        if hyperparameters:
+            if all(k in hyperparameters for k in self.DEFAULT_HYPERPARAMETERS):
+                self.hyperparameters = hyperparameters
+            else:
+                raise BEEPFeaturizationError(
+                    f"Features cannot be created with incomplete set of "
+                    f"hyperparameters {hyperparameters.keys()} < "
+                    f"{self.DEFAULT_HYPERPARAMETERS.keys()}!")
+        else:
+            self.hyperparameters = self.DEFAULT_HYPERPARAMETERS
+
+        if structured_datapath is not None and not structured_datapath.is_structured:
+            raise BEEPFeaturizationError(
+                "BEEPDatapath input is not structured!")
+        self.datapath = structured_datapath
+
+        self.features = None
+
+        # In case these features are loaded from file
+        # Allow attrs which can hold relevant metadata without having
+        # to reload the original datapath
+        self.paths = self.datapath.paths if self.datapath else {}
+        self.metadata = self.datapath.metadata.raw if self.datapath else {}
+        self.linked_semiunique_id = self.datapath.semiunique_id if self.datapath else None
+
+    @abc.abstractmethod
+    def validate(self) -> Tuple[bool, Union[str, None]]:
+        """
+        Validate a featurizer on it's ingested datapath.
+
+        Returns:
+            (bool, str/None): The validation result and it's message.
+
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def create_features(self) -> None:
+        """
+        Should assign a dataframe to self.features.
+
+        Returns:
+            None
+        """
+        raise NotImplementedError
+
+    def as_dict(self):
+        """Serialize a BEEPDatapath as a dictionary.
+
+        Must not be loaded from legacy.
+
+        Returns:
+            (dict): corresponding to dictionary for serialization.
+
+        """
+
+        if self.features is None:
+            raise BEEPFeaturizationError(
+                "Cannot serialize features which have not been generated.")
+
+        features = self.features.to_dict("list")
+
+        return {
+            "@module": self.__class__.__module__,
+            "@class": self.__class__.__name__,
+
+            # Core parts of BEEPFeaturizer
+            "features": features,
+            "hyperparameters": self.hyperparameters,
+            "paths": self.paths,
+            "metadata": self.metadata,
+            "linked_datapath_semiunique_id": self.linked_semiunique_id
+        }
+
+    @classmethod
+    def from_dict(cls, d):
+        """Create a BEEPDatapath object from a dictionary.
+
+        Args:
+            d (dict): dictionary represenation.
+
+        Returns:
+            beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun.
+        """
+
+        # no need for original datapath
+        bf = cls(structured_datapath=None, hyperparameters=d["hyperparameters"])
+        bf.features = pd.DataFrame(d["features"])
+        bf.paths = d["paths"]
+        bf.metadata = d["metadata"]
+        bf.linked_semiunique_id = d["linked_datapath_semiunique_id"]
+        return bf
+
+    @classmethod
+    def from_json_file(cls, filename):
+        """Load a structured run previously saved to file.
+
+        .json.gz files are supported.
+
+        Loads a BEEPFeaturizer from json.
+
+        Can be used in combination with files serialized with BEEPFeatures.to_json_file.
+
+        Args:
+            filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file.
+
+        Returns:
+            None
+        """
+        with zopen(filename, "r") as f:
+            d = json.load(f)
+
+        # Add this structured file path to the paths dict
+        paths = d.get("paths", {})
+        paths["features"] = os.path.abspath(filename)
+        d["paths"] = paths
+        return cls.from_dict(d)
+
+    def to_json_file(self, filename):
+        """Save a BEEPFeatures to disk as a json.
+
+        .json.gz files are supported.
+
+        Not named from_json to avoid conflict with MSONable.from_json(*)
+
+        Args:
+            filename (str, Pathlike): The filename to save the file to.
+            omit_raw (bool): If True, saves only structured (NOT RAW) data.
+                More efficient for saving/writing to disk.
+
+        Returns:
+            None
+        """
+        d = self.as_dict()
+        dumpfn(d, filename)
+
+
+class BEEPEarlyCyclesFeaturizer(BEEPFeaturizer):
+    """Base class for featurizers that return a constant number of features
+    for any number of cycles in a structured datapath.
+
+    These features are typically used for early prediction.
+
+    A BEEPEarlyCyclesFeaturizer always returns the same number of features
+    for files for datapaths with any number of samples. Thus,
+
+
+    [Datapath w/ 2 cycles]   ---> (vector of k features)
+
+    [Datapath w/ 100 cycles] ---> (vector of k features)
+    """
+    PER_CYCLE = False
+
+
+class BEEPPerCycleFeaturizer(BEEPFeaturizer):
+    """Base class for featurizers that return a vector of features for
+    EACH cycle in a structured datapath.
+
+    These features are generally used for analysis
+
+    A BEEPPerCycleFeaturizer always returns an (n x k) matrix of features
+    for datapaths with n cycles each producing k features. Thus,
+
+    [Datapath w/ 2 cycles]   ---> (2 x k feature matrix)
+
+    [Datapath w/ 100 cycles] ---> (100 x k feature matrix)
+
+    """
+    PER_CYCLE = True
+    SPECIAL_COLUMNS = ("cycle_index", "diag_pos")
+
+
+
+
+
+
diff --git a/beep/features/featurizer_helpers.py b/beep/features/helper_functions.py
similarity index 100%
rename from beep/features/featurizer_helpers.py
rename to beep/features/helper_functions.py
diff --git a/beep/features/intracell/__init__.py b/beep/features/intracell/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/beep/features/intracell_analysis.py b/beep/features/intracell/intracell_analysis.py
similarity index 100%
rename from beep/features/intracell_analysis.py
rename to beep/features/intracell/intracell_analysis.py
diff --git a/beep/features/intracell/intracell_analysisv2.py b/beep/features/intracell/intracell_analysisv2.py
new file mode 100644
index 00000000..bf42f9b7
--- /dev/null
+++ b/beep/features/intracell/intracell_analysisv2.py
@@ -0,0 +1,1410 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib import cm
+from scipy.interpolate import interp1d
+from scipy.spatial import distance
+from scipy.optimize import differential_evolution
+
+
+class IntracellAnalysisV2:
+    # IA constants
+    FC_UPPER_VOLTAGE = 4.20
+    FC_LOWER_VOLTAGE = 2.70
+    NE_UPPER_VOLTAGE = 0.01
+    NE_LOWER_VOLTAGE = 1.50
+    PE_UPPER_VOLTAGE = 4.30
+    PE_LOWER_VOLTAGE = 2.86
+    THRESHOLD = 4.84 * 0.0
+
+    def __init__(self,
+                 pe_pristine_files_dict={},
+                 pe_pristine_usecols=['Ecell/V','Capacity/mA.h',
+                                      'SOC_aligned','c_rate',
+                                      'BVV_c_rate','Voltage_aligned'],
+                 ne_1_pristine_files_dict={},
+                 ne_1_pristine_usecols=['Ecell/V','Capacity/mA.h',
+                                      'SOC_aligned','c_rate',
+                                      'BVV_c_rate','Voltage_aligned'],
+                 Q_fc_nom=4.84,
+                 C_nom=-0.2,
+                 cycle_type='rpt_0.2C',
+                 step_type=0,
+                 error_type='V-Q',
+                 error_weighting='uniform',
+                 dvdq_bound=None,
+                 ne_2pos_file=None,
+                 ne_2neg_file=None
+                 ):
+        """
+        Invokes the cell electrode analysis class. This is a class designed to fit the cell and electrode
+        parameters in order to determine changes of electrodes within the full cell from only full cell cycling data.
+        Args:
+            pe_pristine_dict (str): dictionary for the half cell rate data of the pristine (uncycled) positive
+                electrode. Keys are the rate and the entries are dataframes of Voltage vs. SOC.
+            ne_1_pristine_dict (str): dictionary for the half cell rate data of the pristine (uncycled) negative
+                electrode. Keys are the rate and the entris are dataframes of Voltage vs. SOC.
+            cycle_type (str): type of diagnostic cycle for the fitting
+            step_type (int): charge or discharge (0 for charge, 1 for discharge)
+            error_type (str): defines which error metric is to be used 
+            ne_2neg_file (str): file name of the data for the negative component of the anode
+            ne_2pos_file (str): file name of the data for the positive component of the anode
+        """
+        
+        self.pe_pristine_files_dict = pe_pristine_files_dict
+        self.pe_pristine_dict = {}
+        [self.pe_pristine_dict.update(
+            {key:pd.read_csv(pe_pristine_files_dict[key],
+                             usecols=pe_pristine_usecols)
+                }) for key in pe_pristine_files_dict]
+        
+        self.ne_1_pristine_files_dict = ne_1_pristine_files_dict
+        self.ne_1_pristine_dict = {}
+        [self.ne_1_pristine_dict.update(
+            {key:pd.read_csv(ne_1_pristine_files_dict[key],
+                             usecols=ne_1_pristine_usecols)
+                }) for key in ne_1_pristine_files_dict]
+        
+        
+        
+        self.Q_fc_nom = Q_fc_nom
+        self.C_nom = C_nom
+
+        if ne_2neg_file and ne_2pos_file:
+            self.ne_2_pristine_pos = pd.read_csv(ne_2pos_file)
+            self.ne_2_pristine_neg = pd.read_csv(ne_2neg_file)
+        else:
+            self.ne_2_pristine_pos = pd.DataFrame()
+            self.ne_2_pristine_neg = pd.DataFrame()
+            
+        if step_type == 0:
+            self.capacity_col = 'charge_capacity'
+        else:
+            self.capacity_col = 'discharge_capacity'
+
+        self.cycle_type = cycle_type
+        self.step_type = step_type
+        self.error_type = error_type
+        self.error_weighting = error_weighting
+        self.dvdq_bound = dvdq_bound
+
+
+    def process_beep_cycle_data_for_candidate_halfcell_analysis_ah(self,
+                                                                cell_struct,
+                                                                cycle_index):
+        """
+        Ingests BEEP structured cycling data and cycle_index and returns
+                a Dataframe of evenly spaced capacity with corresponding voltage.
+        
+        Inputs:
+        cell_struct (MaccorDatapath): BEEP structured cycling data
+        cycle_index (int): cycle number at which to evaluate
+        
+        Outputs:
+        real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned (evenly spaced)
+                and Voltage_aligned
+        """
+
+        # filter the data down to the diagnostic type of interest
+        diag_type_cycles = cell_struct.diagnostic_data.loc[cell_struct.diagnostic_data['cycle_type'] == self.cycle_type]
+        real_cell_candidate_profile = diag_type_cycles.loc[
+            (diag_type_cycles.cycle_index == cycle_index)
+            & (diag_type_cycles.step_type == self.step_type)  # step_type = 0 is charge, 1 is discharge
+            & (diag_type_cycles.voltage < self.FC_UPPER_VOLTAGE)
+            & (diag_type_cycles[self.capacity_col] > 0)][['voltage', self.capacity_col]]
+        
+        # modify discharge data to match the V-Q directionality of charge data (increasing capacity w/ increasing voltage)
+        if self.step_type == 1:
+            real_cell_candidate_profile[self.capacity_col] = np.nanmax(real_cell_candidate_profile[self.capacity_col]) - real_cell_candidate_profile[self.capacity_col].copy()
+#         real_cell_candidate_profile['voltage'] = np.flipud(real_cell_candidate_profile['voltage'].copy())
+        
+        
+        # renaming capacity,voltage column
+        real_cell_candidate_profile['Q'] =  real_cell_candidate_profile[self.capacity_col]
+                                                     
+        real_cell_candidate_profile['Voltage'] = real_cell_candidate_profile['voltage']
+        real_cell_candidate_profile.drop('voltage', axis=1, inplace=True)
+        
+        # interpolate voltage along evenly spaced capacity axis
+        Q_vec = np.linspace(0, np.max(real_cell_candidate_profile['Q']),1001)  
+
+        real_cell_candidate_profile_aligned = pd.DataFrame()
+        real_cell_candidate_profile_interper = interp1d(real_cell_candidate_profile['Q'],
+                                                               real_cell_candidate_profile['Voltage'],
+                                                               bounds_error=False,
+                                                               fill_value=(self.FC_LOWER_VOLTAGE, self.FC_UPPER_VOLTAGE))
+        real_cell_candidate_profile_aligned['Voltage_aligned'] = real_cell_candidate_profile_interper(
+            Q_vec)
+
+        real_cell_candidate_profile_aligned['Q_aligned'] = Q_vec
+
+        return real_cell_candidate_profile_aligned
+    
+    def _impose_electrode_scale(self,
+                            pe_pristine=pd.DataFrame(),
+                            ne_1_pristine=pd.DataFrame(),
+                            ne_2_pristine_pos=pd.DataFrame(),
+                            ne_2_pristine_neg=pd.DataFrame(),
+                            lli=0.0, Q_pe=0.0, Q_ne=0.0, x_ne_2=0.0):
+        
+        """
+        Scales the reference electrodes according to specified capacities and 
+        offsets their capacities according to lli. Blends negative electrode materials.
+        
+        Inputs:
+        pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                electrode
+        ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                electrode
+        ne_2_pos (Dataframe): half cell data for the positive component of the anode
+        ne_2_neg (Dataframe): half cell data for the negative component of the anode
+        lli (float): Loss of Lithium Inventory - capacity of the misalignment between 
+                cathode and anode zero-capacity
+        Q_pe (float): capacity of the positive electrode (cathode)
+        Q_ne (float): capacity of the negative electrode (anode)
+        x_ne_2 (float): fraction of ne_2_pristine_pos or ne_2_pristine_neg 
+                (positive or negative value, respectively) to ne_1_pristine
+        
+        Outputs:
+        pe_degraded (Dataframe): positive electrode with imposed capacity 
+                scale to emulate degradation
+        ne_degraded (Dataframe): negative electrode with imposed capacity 
+                scale and capacity offset to emulate degradation
+        """
+
+        # Blend negative electrodes
+        ne_pristine = blend_electrodes(ne_1_pristine, ne_2_pristine_pos, ne_2_pristine_neg, x_ne_2)
+        
+        # rescaling pristine electrodes to Q_pe and Q_ne
+        pe_Q_scaled = pe_pristine.copy()
+        pe_Q_scaled['Q_aligned'] = (pe_Q_scaled['SOC_aligned']/np.nanmax(pe_Q_scaled['SOC_aligned']))*Q_pe
+        ne_Q_scaled = ne_pristine.copy()
+        ne_Q_scaled['Q_aligned'] = (ne_Q_scaled['SOC_aligned']/np.nanmax(ne_Q_scaled['SOC_aligned']))*Q_ne
+        
+        # translate pristine ne electrode with lli
+        ne_Q_scaled['Q_aligned'] = ne_Q_scaled['Q_aligned'] + lli
+        
+        # Re-interpolate to align dataframes for differencing
+        lower_Q = np.min((np.min(pe_Q_scaled['Q_aligned']),
+                            np.min(ne_Q_scaled['Q_aligned'])))
+        upper_Q = np.max((np.max(pe_Q_scaled['Q_aligned']),
+                            np.max(ne_Q_scaled['Q_aligned'])))
+        Q_vec = np.linspace(lower_Q, upper_Q, 1001)
+        
+        # Actually aligning the electrode Q's
+        pe_pristine_interper = interp1d(pe_Q_scaled['Q_aligned'],
+                                        pe_Q_scaled['Voltage_aligned'], bounds_error=False)
+        pe_degraded = pe_Q_scaled.copy()
+        pe_degraded['Q_aligned'] = Q_vec
+        pe_degraded['Voltage_aligned'] = pe_pristine_interper(Q_vec)
+        
+
+        ne_pristine_interper = interp1d(ne_Q_scaled['Q_aligned'],
+                                        ne_Q_scaled['Voltage_aligned'], bounds_error=False)
+        ne_degraded = ne_Q_scaled.copy()
+        ne_degraded['Q_aligned'] = Q_vec
+        ne_degraded['Voltage_aligned'] = ne_pristine_interper(Q_vec)
+
+        # Returning pe and ne degraded on an Ah basis
+        return pe_degraded, ne_degraded
+    
+    def halfcell_degradation_matching_ah(self, x, *params):
+        """
+        Calls underlying functions to impose degradation through electrode 
+        capacity scale and alignment through LLI. Modifies emulated full cell
+        data to be within full cell voltage range and calibrates (zeros) capacity
+        at the lowest permissible voltage. Interpolates real and emulated data onto
+        a common capacity axis.
+        
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned
+
+        Outputs:
+        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+
+        df_real_aligned (Dataframe): capacity and voltage interpolated evenly across
+                capacity for the real cell data
+        emulated_full_cell_aligned (Dataframe): capacity and voltage interpolated evenly
+                across capacity for the emulated cell data
+        """
+        
+        lli = x[0]
+        Q_pe = x[1]
+        Q_ne = x[2]
+        IR_coef_pe = x[3]
+        IR_coef_ne = x[4]
+        x_ne_2 = x[5]
+        
+        # Q_fc_nom (scalar) : nominal capacity of pristine cell
+        # C_nom (scalar) : nominal C-rate of pristine cell
+        Q_fc_nom = self.Q_fc_nom 
+        C_nom = self.C_nom
+        
+        pe_pristine_dict, ne_1_pristine_dict, ne_2_pristine_pos, ne_2_pristine_neg, real_cell_candidate_profile_aligned= params
+        
+        if len(pe_pristine_dict) == 1:
+            pe_pristine = pe_pristine_dict[list(pe_pristine_dict.keys())[0]]
+        elif len(pe_pristine_dict) > 1:
+            pe_at_I_eff, R_at_SOC_pe = (
+                self._get_effective_electrode_rate_data(pe_pristine_dict, Q_pe, Q_fc_nom, C_nom, IR_coef_pe)
+            )
+            pe_pristine = pe_at_I_eff # the effective rate profile is now the reference profile at pristine state
+        else:
+            raise ValueError('No dictionary entries in pe_pristine_dict')
+            
+        if len(ne_1_pristine_dict) == 1:
+            ne_1_pristine = ne_1_pristine_dict[list(ne_1_pristine_dict.keys())[0]]
+        elif len(ne_1_pristine_dict) > 1:
+            
+            ne_at_I_eff, R_at_SOC_ne = (
+                self._get_effective_electrode_rate_data( ne_1_pristine_dict, Q_ne, Q_fc_nom, C_nom, IR_coef_ne)
+                                                        )
+            ne_1_pristine = ne_at_I_eff # the effective rate profile is now the reference profile at pristine state
+        else:
+            raise ValueError('No dictionary entries in ne_pristine_dict')
+
+        # output degraded ne and pe (on a AH basis, with electrode alignment (NaNs for voltage, when no capacity actually at the corresponding capacity index))
+        pe_out, ne_out = self._impose_electrode_scale(pe_pristine, ne_1_pristine,
+                                                  ne_2_pristine_pos, ne_2_pristine_neg,
+                                                  lli, Q_pe,
+                                                  Q_ne, x_ne_2)
+        
+        # PE - NE = full cell voltage
+        emulated_full_cell_with_degradation = pd.DataFrame()
+        emulated_full_cell_with_degradation['Q_aligned'] = pe_out['Q_aligned'].copy()
+        emulated_full_cell_with_degradation['Voltage_aligned'] = pe_out['Voltage_aligned'] - ne_out['Voltage_aligned']
+        
+        # Replace emulated full cell values outside of voltage range with NaN
+        emulated_full_cell_with_degradation['Voltage_aligned'].loc[
+            emulated_full_cell_with_degradation['Voltage_aligned'] < self.FC_LOWER_VOLTAGE] = np.nan
+        emulated_full_cell_with_degradation['Voltage_aligned'].loc[
+            emulated_full_cell_with_degradation['Voltage_aligned'] > self.FC_UPPER_VOLTAGE] = np.nan
+        
+        ## Center the emulated full cell and half cell curves onto the same Q at which the real (degraded) capacity measurement started (self.FC_LOWER_VOLTAGE)
+        emulated_full_cell_with_degradation_zeroed = pd.DataFrame()
+
+        emulated_full_cell_with_degradation_zeroed['Voltage_aligned'] = emulated_full_cell_with_degradation[
+            'Voltage_aligned'].copy()
+
+        zeroing_value = emulated_full_cell_with_degradation['Q_aligned'].loc[
+            np.nanargmin(emulated_full_cell_with_degradation['Voltage_aligned'])
+                                                                            ]
+    
+        emulated_full_cell_with_degradation_zeroed['Q_aligned'] = \
+            (emulated_full_cell_with_degradation['Q_aligned'].copy() - zeroing_value)
+
+        pe_out_zeroed = pe_out.copy()
+        pe_out_zeroed['Q_aligned'] = pe_out['Q_aligned'] - zeroing_value
+
+        ne_out_zeroed = ne_out.copy()
+        ne_out_zeroed['Q_aligned'] = ne_out['Q_aligned'] - zeroing_value
+
+        # Interpolate full cell profiles across same Q range
+        min_Q = np.min(
+            real_cell_candidate_profile_aligned['Q_aligned'].loc[
+                ~real_cell_candidate_profile_aligned['Voltage_aligned'].isna()])
+        max_Q = np.max(
+            (real_cell_candidate_profile_aligned['Q_aligned'].loc[
+                ~real_cell_candidate_profile_aligned['Voltage_aligned'].isna()].max(),
+            emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[
+                ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()].max())
+                        )
+        emulated_interper = interp1d(emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[
+                                         ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()],
+                                     emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].loc[
+                                         ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()],
+                                     bounds_error=False)
+        real_interper = interp1d(
+            real_cell_candidate_profile_aligned['Q_aligned'].loc[
+                ~real_cell_candidate_profile_aligned['Voltage_aligned'].isna()],
+            real_cell_candidate_profile_aligned['Voltage_aligned'].loc[
+                ~real_cell_candidate_profile_aligned['Voltage_aligned'].isna()],
+            bounds_error=False)
+
+        Q_vec = np.linspace(min_Q, max_Q, 1001)
+
+        emulated_aligned = pd.DataFrame()
+        emulated_aligned['Q_aligned'] = Q_vec
+        emulated_aligned['Voltage_aligned'] = emulated_interper(Q_vec)
+
+        real_aligned = pd.DataFrame()
+        real_aligned['Q_aligned'] = Q_vec
+        real_aligned['Voltage_aligned'] = real_interper(Q_vec)
+
+        return pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned
+
+    def get_dQdV_over_V_from_degradation_matching_ah(self, x, *params):
+        """
+        This function imposes degradation scaling ,then outputs the dQdV representation of the emulated cell data.
+        
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned
+        
+        Outputs:
+        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        dq_dv_over_v_real (Dataframe): dQdV across voltage for the real cell data
+        dq_dv_over_v_emulated (Dataframe): dQdV across voltage for the emulated cell data
+        df_real_interped (Dataframe): capacity and voltage interpolated evenly across
+                capacity for the real cell data
+        emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly
+                across capacity for the emulated cell data
+        """
+        
+        pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped = \
+            self.halfcell_degradation_matching_ah(x, *params)
+
+        # Calculate dQdV from full cell profiles
+        dq_dv_real = pd.DataFrame(np.gradient(df_real_interped['Q_aligned'], df_real_interped['Voltage_aligned']),
+                                  columns=['dQdV']).ewm(0.1).mean()
+        dq_dv_emulated = pd.DataFrame(
+            np.gradient(emulated_full_cell_interped['Q_aligned'], emulated_full_cell_interped['Voltage_aligned']),
+            columns=['dQdV']).ewm(0.1).mean()
+
+        # Include original data
+        dq_dv_real['Q_aligned'] = df_real_interped['Q_aligned']
+        dq_dv_real['Voltage_aligned'] = df_real_interped['Voltage_aligned']
+
+        dq_dv_emulated['Q_aligned'] = emulated_full_cell_interped['Q_aligned']
+        dq_dv_emulated['Voltage_aligned'] = emulated_full_cell_interped['Voltage_aligned']
+
+        # Interpolate dQdV and Q over V, aligns real and emulated over V
+        voltage_vec = np.linspace(self.FC_LOWER_VOLTAGE, self.FC_UPPER_VOLTAGE, 1001)
+
+        v_dq_dv_interper_real = interp1d(dq_dv_real['Voltage_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()],
+                                         dq_dv_real['dQdV'].loc[~dq_dv_real['Voltage_aligned'].isna()],
+                                         bounds_error=False, fill_value=0)
+        v_Q_interper_real = interp1d(dq_dv_real['Voltage_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()],
+                                       dq_dv_real['Q_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()],
+                                       bounds_error=False, fill_value=(0, np.max(df_real_interped['Q_aligned'])))
+
+        v_dq_dv_interper_emulated = interp1d(dq_dv_emulated['Voltage_aligned'].loc[
+                                                 ~dq_dv_emulated['Voltage_aligned'].isna()],
+                                             dq_dv_emulated['dQdV'].loc[~dq_dv_emulated['Voltage_aligned'].isna()],
+                                             bounds_error=False, fill_value=0)
+        v_Q_interper_emulated = interp1d(dq_dv_emulated['Voltage_aligned'].loc[
+                                               ~dq_dv_emulated['Voltage_aligned'].isna()],
+                                        dq_dv_emulated['Q_aligned'].loc[~dq_dv_emulated['Voltage_aligned'].isna()],
+                                           bounds_error=False, fill_value=(0,np.max(df_real_interped['Q_aligned'])))
+
+        dq_dv_over_v_real = pd.DataFrame(v_dq_dv_interper_real(voltage_vec), columns=['dQdV']).fillna(0)
+        dq_dv_over_v_real['Q_aligned'] = v_Q_interper_real(voltage_vec)
+        dq_dv_over_v_real['Voltage_aligned'] = voltage_vec
+
+        dq_dv_over_v_emulated = pd.DataFrame(v_dq_dv_interper_emulated(voltage_vec), columns=['dQdV']).fillna(0)
+        dq_dv_over_v_emulated['Q_aligned'] = v_Q_interper_emulated(voltage_vec)
+        dq_dv_over_v_emulated['Voltage_aligned'] = voltage_vec
+
+        return (pe_out_zeroed,
+                ne_out_zeroed,
+                dq_dv_over_v_real,
+                dq_dv_over_v_emulated,
+                df_real_interped,
+                emulated_full_cell_interped)
+
+    def get_dVdQ_over_Q_from_degradation_matching_ah(self, x, *params):
+        """
+        This function imposes degradation scaling ,then outputs the dVdQ representation of the emulated cell data.
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned        
+        Outputs:
+        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        dv_dq_real (Dataframe): dVdQ across capacity for the real cell data
+        dv_dq_emulated (Dataframe): dVdQ across capacity for the emulated cell data
+        df_real_interped (Dataframe): capacity and voltage interpolated evenly across
+                capacity for the real cell data
+        emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly
+                across capacity for the emulated cell data        
+        """
+        
+        pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped = \
+            self.halfcell_degradation_matching_ah(x, *params)
+
+        # Calculate dVdQ from full cell profiles
+#         dv_dq_real = pd.DataFrame(np.gradient(df_real_interped['Voltage_aligned'], df_real_interped['Q_aligned']),
+#                                   columns=['dVdQ']).ewm(alpha=0.3).mean()
+#         dv_dq_emulated = pd.DataFrame(
+#             np.gradient(emulated_full_cell_interped['Voltage_aligned'], emulated_full_cell_interped['Q_aligned']),
+#             columns=['dVdQ']).ewm(alpha=0.3).mean()
+        dv_dq_real = pd.DataFrame(np.gradient(df_real_interped['Voltage_aligned'], df_real_interped['Q_aligned']),
+                                  columns=['dVdQ'])
+        dv_dq_emulated = pd.DataFrame(
+            np.gradient(emulated_full_cell_interped['Voltage_aligned'], emulated_full_cell_interped['Q_aligned']),
+            columns=['dVdQ'])
+
+        # Include original data
+        dv_dq_real['Q_aligned'] = df_real_interped['Q_aligned']
+        dv_dq_real['Voltage_aligned'] = df_real_interped['Voltage_aligned']
+
+        dv_dq_emulated['Q_aligned'] = emulated_full_cell_interped['Q_aligned']
+        dv_dq_emulated['Voltage_aligned'] = emulated_full_cell_interped['Voltage_aligned']
+
+        # Q interpolation not needed, as interpolated over Q by default
+       
+        return (pe_out_zeroed,
+                ne_out_zeroed,
+                dv_dq_real,
+                dv_dq_emulated,
+                df_real_interped,
+                emulated_full_cell_interped)
+    
+    def get_V_over_Q_from_degradation_matching_ah(self, x, *params):
+        """
+        This function imposes degradation scaling ,then outputs the V-Q representation of the emulated cell data.
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned
+        Outputs:
+        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        df_real_interped (Dataframe): capacity and voltage interpolated evenly across
+                capacity for the real cell data
+        emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly
+                across capacity for the emulated cell data
+        """
+        (pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned) = \
+            self.halfcell_degradation_matching_ah(x, *params)
+
+        min_soc_full_cell = np.min(real_aligned.loc[~real_aligned.Voltage_aligned.isna()].Q_aligned)
+        max_soc_full_cell = np.max((real_aligned.loc[~real_aligned.Voltage_aligned.isna()].Q_aligned.max(),
+                                    emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned.max()))
+
+        soc_vec_full_cell = np.linspace(min_soc_full_cell, max_soc_full_cell, 1001)
+
+        emulated_full_cell_interper = interp1d(
+            emulated_aligned.Q_aligned.loc[~emulated_aligned.Voltage_aligned.isna()],
+            emulated_aligned.Voltage_aligned.loc[~emulated_aligned.Voltage_aligned.isna()],
+            bounds_error=False)
+        real_full_cell_interper = interp1d(real_aligned.Q_aligned.loc[~real_aligned.Voltage_aligned.isna()],
+                                           real_aligned.Voltage_aligned.loc[~real_aligned.Voltage_aligned.isna()],
+                                           bounds_error=False)
+
+        # Interpolate the emulated full-cell profile
+        emulated_full_cell_interped = pd.DataFrame()
+        emulated_full_cell_interped['Q_aligned'] = soc_vec_full_cell
+        emulated_full_cell_interped['Voltage_aligned'] = emulated_full_cell_interper(soc_vec_full_cell)
+
+        # Interpolate the true full-cell profile
+        df_real_interped = emulated_full_cell_interped.copy()
+        df_real_interped['Q_aligned'] = soc_vec_full_cell
+        df_real_interped['Voltage_aligned'] = real_full_cell_interper(soc_vec_full_cell)
+        return pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped
+    
+    
+    def get_V_over_Q_from_degradation_matching_ah_no_real(self, x, *params):
+        """
+        This function imposes degradation scaling ,then outputs the V-Q representation of the emulated cell data, in the absence of real cell data.
+        
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned
+                        
+        Outputs:
+        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly
+                across capacity for the emulated cell data
+        
+        """
+        (pe_out_zeroed, ne_out_zeroed, emulated_aligned) = \
+            self.halfcell_degradation_matching_ah_no_real(x, *params)
+
+        min_Q_full_cell = np.min(emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned)
+        max_Q_full_cell = np.max(emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned)
+
+        Q_vec_full_cell = np.linspace(min_Q_full_cell, max_Q_full_cell, 1001)
+
+        emulated_full_cell_interper = interp1d(
+            emulated_aligned.Q_aligned.loc[~emulated_aligned.Voltage_aligned.isna()],
+            emulated_aligned.Voltage_aligned.loc[~emulated_aligned.Voltage_aligned.isna()],
+            bounds_error=False)
+
+        # Interpolate the emulated full-cell profile
+        emulated_full_cell_interped = pd.DataFrame()
+        emulated_full_cell_interped['Q_aligned'] = Q_vec_full_cell
+        emulated_full_cell_interped['Voltage_aligned'] = emulated_full_cell_interper(Q_vec_full_cell)
+
+        return pe_out_zeroed, ne_out_zeroed, emulated_full_cell_interped
+    
+    def halfcell_degradation_matching_ah_no_real(self, x, *params):
+        """
+        Calls underlying functions to impose degradation through electrode 
+        capacity scale and alignment through LLI. Modifies emulated full cell
+        data to be within full cell voltage range and calibrates (zeros) capacity
+        at the lowest permissible voltage.
+        
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned
+                        
+        Outputs:
+        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        emulated_aligned (Dataframe): full cell data corresponding to the imposed degradation
+        """
+        lli = x[0]
+        Q_pe = x[1]
+        Q_ne = x[2]
+        x_ne_2 = x[3]
+
+        pe_pristine, ne_1_pristine, ne_2_pristine_pos, ne_2_pristine_neg = params
+
+        pe_out, ne_out = self._impose_electrode_scale(pe_pristine, ne_1_pristine,
+                                                  ne_2_pristine_pos, ne_2_pristine_neg,
+                                                  lli, Q_pe,
+                                                  Q_ne, x_ne_2) #outputs degraded ne and pe (on a AH basis, with electrode alignment (NaNs for voltage, when no overlap))
+        
+        emulated_full_cell_with_degradation = pd.DataFrame()
+        emulated_full_cell_with_degradation['Q_aligned'] = pe_out['Q_aligned'].copy()
+        emulated_full_cell_with_degradation['Voltage_aligned'] = pe_out['Voltage_aligned'] - ne_out['Voltage_aligned']
+        
+        # Replace emulated full cell values outside of voltage range with NaN
+        emulated_full_cell_with_degradation['Voltage_aligned'].loc[
+            emulated_full_cell_with_degradation['Voltage_aligned'] < self.FC_LOWER_VOLTAGE] = np.nan
+        emulated_full_cell_with_degradation['Voltage_aligned'].loc[
+            emulated_full_cell_with_degradation['Voltage_aligned'] > self.FC_UPPER_VOLTAGE] = np.nan
+
+
+        ## Center the emulated full cell and half cell curves onto the same Q at which the real (degraded) capacity measurement started (self.FC_LOWER_VOLTAGE)
+        emulated_full_cell_with_degradation_zeroed = pd.DataFrame()
+
+        emulated_full_cell_with_degradation_zeroed['Voltage_aligned'] = emulated_full_cell_with_degradation[
+            'Voltage_aligned']
+        
+        zeroing_value = emulated_full_cell_with_degradation['Q_aligned'].loc[
+            np.nanargmin(emulated_full_cell_with_degradation['Voltage_aligned'])
+                                                                            ]
+        
+        emulated_full_cell_with_degradation_zeroed['Q_aligned'] = \
+            (emulated_full_cell_with_degradation['Q_aligned'] - zeroing_value)
+
+        pe_out_zeroed = pe_out.copy()
+        pe_out_zeroed['Q_aligned'] = pe_out['Q_aligned'] - zeroing_value
+
+        ne_out_zeroed = ne_out.copy()
+        ne_out_zeroed['Q_aligned'] = ne_out['Q_aligned'] - zeroing_value
+
+        # Interpolate full profiles across same Q range
+        min_Q = np.min(
+            emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[
+                ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()])
+        max_Q = np.max(
+            emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[
+                ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()])
+        
+        emulated_interper = interp1d(emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[
+                                         ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()],
+                                     emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].loc[
+                                         ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()],
+                                     bounds_error=False)
+
+        Q_vec = np.linspace(min_Q, max_Q, 1001)
+
+        emulated_aligned = pd.DataFrame()
+        emulated_aligned['Q_aligned'] = Q_vec
+        emulated_aligned['Voltage_aligned'] = emulated_interper(Q_vec)
+
+        return pe_out_zeroed, ne_out_zeroed, emulated_aligned
+    
+    def _get_error_from_degradation_matching_ah(self, x, *params):
+        """
+        Wrapper function which selects the correct error sub routine and returns its error value.
+        
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned        
+        
+        Outputs:
+            error value (float) - output of the specified error sub function
+        """
+        error_type = self.error_type
+        if error_type == 'V-Q':
+            return self._get_error_from_degradation_matching_V_Q(x,*params)[0]
+        elif error_type == 'dVdQ':
+            return self._get_error_from_degradation_matching_dVdQ(x,*params)[0]
+        elif error_type == 'dQdV':
+            return self._get_error_from_degradation_matching_dQdV(x,*params)[0]
+        else:
+            return self._get_error_from_degradation_matching_V_Q(x,*params)[0]
+        
+    def _get_error_from_degradation_matching_V_Q(self, x, *params):
+        """
+        Error function returning the mean standardized Euclidean distance of each point of the real curve to
+                the closest value on the emulated curve in the V-Q representation.
+        
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned        
+        
+        Outputs:
+            error (float): output of the specified error sub function
+            error_vector (array): vector containingEuclidean distance of each point of the real curve to
+                the closest value on the emulated curve in the V-Q representation
+            XA (Dataframe): real full cell data used for error analysis
+            XB (Dataframe): emulated full cell  data used for error analysis
+        """
+        
+        try:
+            (pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned
+             ) = self.get_V_over_Q_from_degradation_matching_ah(x, *params)
+            
+            XA = real_aligned.dropna()
+            XB = emulated_aligned.dropna()
+            error_matrix = distance.cdist(XA, XB, 'seuclidean')
+            error_vector = error_matrix.min(axis = 1)
+            error = error_vector.mean()
+        except ValueError:
+            error = 100
+            return error, None, None, None
+        return error, error_vector, XA, XB
+    
+        # Pairwise euclidean from premade dQdV
+    def _get_error_from_degradation_matching_dQdV(self, x, *params):
+        """
+        Error function returning the mean standardized Euclidean distance of each point of the real curve to
+                the closest value on the emulated curve in the dQdV representation.
+        
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned        
+        
+        Outputs:
+            error (float): output of the specified error sub function
+            error_vector (array): vector containing Euclidean distance of each point of the real curve to
+                the closest value on the emulated curve in the dQdV representation
+            XA (Dataframe): real full cell data used for error analysis
+            XB (Dataframe): emulated full cell  data used for error analysis
+        """
+        
+        try:
+            # Call dQdV generating function
+            (PE_out_zeroed,
+            NE_out_zeroed,
+            dQdV_over_v_real,
+            dQdV_over_v_emulated,
+            df_real_interped,
+            emulated_full_cell_interped) = self.get_dQdV_over_V_from_degradation_matching_ah(x, *params)
+
+            XA = dQdV_over_v_real[[ 'Voltage_aligned','dQdV']].dropna()
+            XB = dQdV_over_v_emulated[['Voltage_aligned', 'dQdV']].dropna()
+            error_matrix = distance.cdist(XA, XB, 'seuclidean')
+            error_vector = error_matrix.min(axis = 1)
+            error = error_vector.mean()
+            
+        except ValueError:
+            error = 100
+            return error, None, None, None
+        return error, error_vector, XA, XB
+    
+    def _get_error_from_degradation_matching_dVdQ(self, x, *params):
+        """
+        Error function returning the mean standardized Euclidean distance of each point of the real curve to
+                the closest value on the emulated curve in the dVdQ representation.
+        
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned        
+        
+        Outputs:
+            error (float): output of the specified error sub function
+            error_vector (array): vector containing Euclidean distance of each point of the real curve to
+                the closest value on the emulated curve in the dVdQ representation
+            error_vector_weighted (array): error_vector multiplied by dQdV values as weighting
+            XA (Dataframe): real full cell data used for error analysis
+            XB (Dataframe): emulated full cell  data used for error analysis
+        """
+        
+#         try:
+        (PE_out_zeroed,
+                NE_out_zeroed,
+                dVdQ_over_Q_real,
+                dVdQ_over_Q_emulated,
+                df_real_interped,
+                emulated_full_cell_interped) = self.get_dVdQ_over_Q_from_degradation_matching_ah(x, *params)
+
+        XA = dVdQ_over_Q_real[[ 'Q_aligned','dVdQ']].dropna()
+        XB = dVdQ_over_Q_emulated[['Q_aligned', 'dVdQ']].dropna()
+            
+        if self.dvdq_bound is not None:
+            # down-select to values with dVdQ less than 0.8 V/Ahr to eliminate high-slope/high-valued region of dVdQ
+            XA = XA.loc[XA.dVdQ < self.dvdq_bound]
+            XB = XB.loc[XB.dVdQ < self.dvdq_bound]
+
+        error_matrix = distance.cdist(XA, XB, 'seuclidean')
+        error_vector_raw = error_matrix.min(axis = 1)
+
+        # apply weighting scheme
+        if self.error_weighting == 'dQdV':
+            dQdV_over_v_real = self.get_dQdV_over_V_from_degradation_matching_ah(x, *params)[2]
+            if self.dvdq_bound is not None:
+                error_vector_weighted = np.multiply(
+                    error_vector_raw,dQdV_over_v_real['dQdV'].loc[(XA.dVdQ < 0.8).index])
+            elif self.dvdq_bound is None:
+                error_vector_weighted = np.multiply(
+                    error_vector_raw,dQdV_over_v_real['dQdV'])                    
+        elif self.error_weighting == 'uniform':
+            error_vector_weighted = error_vector_raw
+        else:
+            raise ValueError('Unknown weighting scheme provided. Check the value of "error_weighting".')
+
+        error = error_vector_weighted.mean()
+            
+#         except ValueError:
+#             print('ValueError')
+#             error = 100
+#             return error, None, None, None, None
+        return error, error_vector_raw, error_vector_weighted, XA, XB
+    
+    def _get_error_from_synthetic_fitting_ah(self, x, *params):
+        """
+        Wrapper function which selects the correct error sub routine and returns its error value.
+        This function is specific to fitting synthetic data rather than real cycling data.
+        
+        Inputs:
+        x (list): [LLI, Q_pe, Q_ne, x_ne_2]
+        *params:       
+                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
+                        electrode
+                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
+                        electrode
+                ne_2_pos (Dataframe): half cell data for the positive component of the anode
+                ne_2_neg (Dataframe): half cell data for the negative component of the anode
+                real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned 
+                        (evenly spaced) and Voltage_aligned        
+        
+        Outputs:
+            error value (float) - output of the specified error sub function
+        """
+        
+        error_type = self.error_type
+        
+        try:
+            if error_type == 'V-Q':
+                return self._get_error_from_degradation_matching_V_Q(x, *params)[0]
+            elif error_type == 'dVdQ':
+                return self._get_error_from_degradation_matching_dVdQ(x, *params)[0]
+            elif error_type == 'dQdV':
+                return self._get_error_from_degradation_matching_dQdV(x, *params)[0]
+            else:
+                return self._get_error_from_degradation_matching_V_Q(x, *params)[0]
+        except:
+            print("Can't return error")
+            return 100
+        
+    def intracell_values_wrapper_ah(self,
+                                 cycle_index,
+                                 cell_struct,
+                                 degradation_bounds=None
+                                 ):
+        """
+        Wrapper function to solve capacity sizing and offset of reference electrodes to real full cell cycle data.
+        
+        Inputs:
+        cycle_index (int): the index of the cycle of interest of the structured real cycling data
+        cell_struct (MaccorDatapath): BEEP structured cycling data
+
+        Outputs:
+        loss_dict (dict): dictionary with key of cycle index and entry of a list of
+                error, LLI_opt, Q_pe_opt, Q_ne_opt, x_NE_2, Q_li
+        profiles_dict (dict): dictionary with key of cycle index and entry of a dictionary
+                containing various key/entry pairs of resulting from the fitting
+        """
+
+        if degradation_bounds is None:
+            degradation_bounds = ((0, 2.0),  # LLI
+                                  (2.5, 6.5),  # Q_pe (Q_pe must be > LLI)
+                                  (2.5, 6.5),  # Q_ne
+                                  (1,1), #IR coef pe
+                                  (1,1), #IR coef ne
+                                  (1, 1),  # (-1,1) x_NE_2
+                                  )
+
+        real_cell_candidate_profile_aligned = self.process_beep_cycle_data_for_candidate_halfcell_analysis_ah(
+            cell_struct,
+            cycle_index)
+        
+        
+
+        degradation_optimization_result = differential_evolution(self._get_error_from_degradation_matching_ah,
+                                                                 degradation_bounds,
+                                                                 args=(self.pe_pristine_dict,
+                                                                       self.ne_1_pristine_dict,
+                                                                       self.ne_2_pristine_pos,
+                                                                       self.ne_2_pristine_neg,
+                                                                       real_cell_candidate_profile_aligned,
+                                                                       ),
+                                                                 strategy='best1bin', maxiter=1000,
+                                                                 popsize=15, tol=0.01, mutation=0.5,
+                                                                 recombination=0.7,
+                                                                 seed=0,
+                                                                 callback=None, disp=False, polish=True,
+                                                                 init='latinhypercube',
+                                                                 atol=0, updating='deferred', workers=-1,
+                                                                 constraints=()
+                                                                 )
+#         print(degradation_optimization_result.x) #BVV
+        (PE_out_zeroed,
+         NE_out_zeroed,
+         dVdQ_over_Q_real,
+         dVdQ_over_Q_emulated,
+         df_real_interped,
+         emulated_full_cell_interped) = self.get_dVdQ_over_Q_from_degradation_matching_ah(
+            degradation_optimization_result.x,
+            self.pe_pristine_dict,
+            self.ne_1_pristine_dict,
+            self.ne_2_pristine_pos,
+            self.ne_2_pristine_neg,
+            real_cell_candidate_profile_aligned)
+    
+        (dQdV_over_v_real,
+         dQdV_over_v_emulated) = self.get_dQdV_over_V_from_degradation_matching_ah(
+                degradation_optimization_result.x,
+                self.pe_pristine_dict,
+                self.ne_1_pristine_dict,
+                self.ne_2_pristine_pos,
+                self.ne_2_pristine_neg,
+                real_cell_candidate_profile_aligned)[2:4]
+        #
+        electrode_info_df = self.get_electrode_info_ah(PE_out_zeroed, NE_out_zeroed)
+        #
+        error = degradation_optimization_result.fun
+        LLI_opt = degradation_optimization_result.x[0]
+        Q_pe_opt = degradation_optimization_result.x[1]
+        Q_ne_opt = degradation_optimization_result.x[2]
+        IR_coef_pe_opt = degradation_optimization_result.x[3]
+        IR_coef_ne_opt = degradation_optimization_result.x[4]
+        x_NE_2 = degradation_optimization_result.x[5]
+        
+        loss_dict = {cycle_index: np.append([error, LLI_opt, Q_pe_opt, Q_ne_opt,IR_coef_pe_opt,IR_coef_ne_opt,
+                                             x_NE_2],
+                                             electrode_info_df.iloc[-1].values)
+                     }
+        
+        
+        error, error_vector_raw, error_vector_weighted, XA, XB = self._get_error_from_degradation_matching_dVdQ(
+            [LLI_opt, Q_pe_opt, Q_ne_opt,IR_coef_pe_opt,IR_coef_ne_opt, x_NE_2],
+            self.pe_pristine_dict,
+            self.ne_1_pristine_dict,
+            self.ne_2_pristine_pos,
+            self.ne_2_pristine_neg,
+            real_cell_candidate_profile_aligned)
+        
+        profiles_per_cycle_dict = {'NE_zeroed' : NE_out_zeroed, 
+                                   'PE_zeroed' : PE_out_zeroed, 
+                                   'dVdQ_over_Q_real': dVdQ_over_Q_real,
+                                   'dVdQ_over_Q_emulated': dVdQ_over_Q_emulated,
+                                   'dQdV_over_v_real':dQdV_over_v_real,
+                                   'dQdV_over_v_emulated':dQdV_over_v_emulated,
+                                   'df_real_interped': df_real_interped,
+                                   'emulated_full_cell_interped': emulated_full_cell_interped ,
+                                   'real_cell_candidate_profile_aligned': real_cell_candidate_profile_aligned,
+                                   'error_vector_raw':error_vector_raw,
+                                   'error_vector_weighted':error_vector_weighted,
+                                   'XA':XA,
+                                   'XB':XB
+                                  }
+        
+        profiles_dict = {cycle_index: profiles_per_cycle_dict}
+
+        return loss_dict, profiles_dict
+    
+    def solve_emulated_degradation(self,
+                                 forward_simulated_profile,
+                                 degradation_bounds=None
+                                 ):
+        
+        """
+        
+        
+        """
+
+        if degradation_bounds is None:
+            degradation_bounds = ((0, 2.0),  # LLI
+                                  (2.5, 6.5),  # Q_pe
+                                  (2.5, 6.5),  # Q_ne
+                                  (1, 1),  # (-1,1) x_NE_2
+                                  )
+
+        degradation_optimization_result = differential_evolution(self._get_error_from_synthetic_fitting_ah,
+                                                                 degradation_bounds,
+                                                                 args=(self.pe_pristine_dict,
+                                                                       self.ne_1_pristine_dict,
+                                                                       self.ne_2_pristine_pos,
+                                                                       self.ne_2_pristine_neg,
+                                                                       forward_simulated_profile,
+                                                                       ),
+                                                                 strategy='best1bin', maxiter=100000,
+                                                                 popsize=15, tol=0.001, mutation=0.5,
+                                                                 recombination=0.7,
+                                                                 seed=1,
+                                                                 callback=None, disp=False, polish=True,
+                                                                 init='latinhypercube',
+                                                                 atol=0, updating='deferred', workers=-1,
+                                                                 constraints=()
+                                                                 )
+
+        return degradation_optimization_result
+    
+    def get_electrode_info_ah(self, pe_out_zeroed, ne_out_zeroed):
+        """
+        Calculates a variety of half-cell metrics at various positions in the full-cell profile.
+
+        Inputs:
+        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
+                offset, and aligned along capacity
+
+        Outputs:
+        electrode_info_df (Dataframe): dataframe containing a variety of half-cell metrics 
+            at various positions in the emulated full-cell profile.
+
+            pe_voltage_FC4p2V: voltage of the positive electrode (catahode) corresponding
+                to the full cell at 4.2V
+            ...
+            pe_voltage_FC2p7V: voltage of the positive electrode (catahode) corresponding
+                to the full cell at 2.7V
+
+            pe_soc_FC4p2V: state of charge of the positive electrode corresponding
+                to the full cell at 4.2V
+            ...
+            pe_soc_FC2p7V: state of charge of the positive electrode corresponding
+                to the full cell at 2.7V
+
+            ne_voltage_FC4p2V: voltage of the negative electrode (anode) corresponding
+                to the full cell at 4.2V
+            ...
+            ne_voltage_FC2p7V: voltage of the negative electrode (anode) corresponding
+                to the full cell at 2.7V
+
+            ne_soc_FC4p2V: state of charge of the anode electrode corresponding
+                to the full cell at 4.2V
+            ...
+            ne_soc_FC2p7V: state of charge of the anode electrode corresponding
+                to the full cell at 2.7V
+
+            Q_fc: capacity of the full cecll within the full cell voltage limits
+            Q_pe: capacity of the cathode
+            Q_ne: capacity of the anode [Ahr]
+            Q_li
+        """
+        pe_minus_ne_zeroed = pd.DataFrame(pe_out_zeroed['Voltage_aligned'] - ne_out_zeroed['Voltage_aligned'],
+                                            columns=['Voltage_aligned'])
+        pe_minus_ne_zeroed['Q_aligned'] = pe_out_zeroed['Q_aligned']
+
+        electrode_info_df = pd.DataFrame(index=[0])
+
+        electrode_info_df['pe_voltage_FC4p2V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC4p1V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC4p0V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p9V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p8V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p7V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p6V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p5V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p4V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p3V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p2V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p1V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC3p0V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC2p9V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC2p8V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Voltage_aligned
+        electrode_info_df['pe_voltage_FC2p7V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Voltage_aligned
+
+        electrode_info_df['pe_soc_FC4p2V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 4.2V
+        electrode_info_df['pe_soc_FC4p1V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 4.1V
+        electrode_info_df['pe_soc_FC4p0V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 4.0V
+        electrode_info_df['pe_soc_FC3p9V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.9V
+        electrode_info_df['pe_soc_FC3p8V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.8V
+        electrode_info_df['pe_soc_FC3p7V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.7V
+        electrode_info_df['pe_soc_FC3p6V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.6V
+        electrode_info_df['pe_soc_FC3p5V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.5V    
+        electrode_info_df['pe_soc_FC3p4V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.4V
+        electrode_info_df['pe_soc_FC3p3V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.3V    
+        electrode_info_df['pe_soc_FC3p2V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.2V    
+        electrode_info_df['pe_soc_FC3p1V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.1V    
+        electrode_info_df['pe_soc_FC3p0V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.0V
+        electrode_info_df['pe_soc_FC2p9V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 2.9V    
+        electrode_info_df['pe_soc_FC2p8V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 2.8V        
+        electrode_info_df['pe_soc_FC2p7V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Q_aligned - 
+                            np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 2.7V
+
+        electrode_info_df['ne_voltage_FC4p2V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC4p1V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC4p0V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p9V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p8V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p7V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p6V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p5V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p4V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p3V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p2V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p1V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC3p0V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC2p9V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC2p8V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Voltage_aligned
+        electrode_info_df['ne_voltage_FC2p7V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Voltage_aligned
+
+        electrode_info_df['ne_soc_FC4p2V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 4.2V    
+        electrode_info_df['ne_soc_FC4p1V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 4.1V
+        electrode_info_df['ne_soc_FC4p0V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 4.0V
+        electrode_info_df['ne_soc_FC3p9V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.9V
+        electrode_info_df['ne_soc_FC3p8V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3Q_aligned.8V
+        electrode_info_df['ne_soc_FC3p7V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.7V
+        electrode_info_df['ne_soc_FC3p6V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.6V
+        electrode_info_df['ne_soc_FC3p5V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.5V
+        electrode_info_df['ne_soc_FC3p4V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.4V
+        electrode_info_df['ne_soc_FC3p3V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.3V
+        electrode_info_df['ne_soc_FC3p2V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.2V
+        electrode_info_df['ne_soc_FC3p1V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.1V
+        electrode_info_df['ne_soc_FC3p0V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 3.0V
+        electrode_info_df['ne_soc_FC2p9V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 2.9V
+        electrode_info_df['ne_soc_FC2p8V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 2.8V
+        electrode_info_df['ne_soc_FC2p7V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Q_aligned - 
+                            np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
+                                np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
+                                np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
+                        ) # 2.7V
+
+        electrode_info_df['Q_fc'] = pe_minus_ne_zeroed.loc[
+            np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.20))].Q_aligned
+
+        electrode_info_df['Q_pe'] = np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])
+
+        electrode_info_df['Q_ne'] = np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])
+
+        electrode_info_df['Q_li'] = np.max(pe_minus_ne_zeroed['Q_aligned'].loc[~pe_minus_ne_zeroed.Voltage_aligned.isna()]) - np.min(pe_minus_ne_zeroed['Q_aligned'].loc[~pe_minus_ne_zeroed.Voltage_aligned.isna()])
+        
+
+        return electrode_info_df
+    
+    def _get_effective_electrode_rate_data(self,electrode_rate_dict=dict(),
+                                          Q_electrode=4.84,Q_fc_nom=4.84,
+                                          C_nom=-0.2,IR_coef=1.0):
+        """
+        Inputs:
+        electrode_rate_dict (dict): dictonary of rate data for an electrode. Keys are float representing C rate; entries are dataframe of rate data with columns Voltage_aligned and SOC_aligned. Sign of cathode rate keys should be same as full cell (positive for charge, negative for discharge). Sign of anode rate keys should be opposite of the full cell.
+        Q_electrode (float): capacity of the electrode in Ahr
+        Q_fc_nom (float): nominal capacity of full cell which was used to define the nominal C rate
+        C_nom (float): nominal current rate in full cell (negative for discharge)
+        IR_coef (float): scaling coefficient of overpotential term, used in fitting
+        
+        Outputs:
+        electrode_at_I_eff (DataFrame): electrode data modified for effective rate, based on the supplied dictionary of rates and an overpotential scaling term.
+        
+        """
+        
+        I_eff = C_nom / (Q_electrode/Q_fc_nom)
+        C_nom_sign = np.sign(list(electrode_rate_dict.keys())[0])
+        
+        R_at_SOC = []
+        for SOC in electrode_rate_dict[list(electrode_rate_dict.keys())[0]]['SOC_aligned']:
+
+            V_at_SOC_for_rates = []
+            for rate in electrode_rate_dict.keys():
+                V_at_SOC_for_rates = np.append(V_at_SOC_for_rates,
+                                               electrode_rate_dict[rate]['Voltage_aligned'].loc[
+                                                   electrode_rate_dict[rate]['SOC_aligned'] == SOC
+                                               ])
+            R_at_SOC = np.append(R_at_SOC,
+                np.polynomial.polynomial.Polynomial.fit(
+                list(electrode_rate_dict.keys()),
+                V_at_SOC_for_rates,deg=2).deriv(m=1)(I_eff)
+                                    )
+        electrode_at_I_eff = pd.DataFrame()
+        electrode_at_I_eff['SOC_aligned'] = electrode_rate_dict[
+            list(electrode_rate_dict.keys())[0]]['SOC_aligned']
+        
+        closest_rate_key = list(electrode_rate_dict.keys())[
+            np.argmin(np.abs(
+                np.array(list(electrode_rate_dict.keys())) - I_eff))]
+        electrode_at_I_eff['Voltage_aligned'] = electrode_rate_dict[
+            closest_rate_key]['Voltage_aligned'] + IR_coef*(I_eff - (C_nom*C_nom_sign)*R_at_SOC)
+        
+        return electrode_at_I_eff, R_at_SOC
+
+def blend_electrodes(electrode_1, electrode_2_pos, electrode_2_neg, x_2): ## this function needs revisited
+    """
+    Blends two electrode materials from their SOC-V profiles to form a blended electrode.
+    
+    Inputs:
+    electrode_1: Primary material in electrode, typically Gr. DataFrame supplied with SOC evenly spaced and voltage.
+    electrode_2: Secondary material in electrode, typically Si. DataFrame supplied with SOC evenly spaced and
+        voltage as an additional column.
+    x_2: Fraction of electrode_2 material's capacity (not mass). Supplied as scalar value.
+    
+    Outputs:
+    df_blended_soc_mod (Dataframe): blended electrode with SOC_aligned and Voltage_aligned columns
+    """
+    if electrode_2_pos.empty:
+        df_blended = electrode_1
+        return df_blended
+
+    if electrode_2_neg.empty:
+        electrode_2 = electrode_2_pos
+        x_2 = np.abs(x_2)
+    elif x_2 > 0:
+        electrode_2 = electrode_2_pos
+    else:
+        electrode_2 = electrode_2_neg
+        x_2 = np.abs(x_2)
+
+    electrode_1_interper = interp1d(electrode_1['Voltage_aligned'], electrode_1['SOC_aligned'], bounds_error=False,
+                                    fill_value='extrapolate')
+    electrode_2_interper = interp1d(electrode_2['Voltage_aligned'], electrode_2['SOC_aligned'], bounds_error=False,
+                                    fill_value='extrapolate')
+
+    voltage_vec = np.linspace(np.min((np.min(electrode_1['Voltage_aligned']),
+                                      np.min(electrode_2['Voltage_aligned']))),
+                              np.max((np.max(electrode_1['Voltage_aligned']),
+                                      np.max(electrode_2['Voltage_aligned']))),
+                              1001)
+    electrode_1_voltage_aligned = pd.DataFrame(electrode_1_interper(voltage_vec), columns=['SOC'])
+    electrode_2_voltage_aligned = pd.DataFrame(electrode_2_interper(voltage_vec), columns=['SOC'])
+    electrode_1_voltage_aligned['Voltage'] = voltage_vec
+    electrode_2_voltage_aligned['Voltage'] = voltage_vec
+
+    df_blend_voltage_aligned = pd.DataFrame(
+        (1 - x_2) * electrode_1_voltage_aligned['SOC'] + x_2 * electrode_2_voltage_aligned['SOC'], columns=['SOC'])
+    df_blend_voltage_aligned['Voltage'] = electrode_1_voltage_aligned.merge(electrode_2_voltage_aligned,
+                                                                            on='Voltage')['Voltage']
+
+    df_blended_interper = interp1d(df_blend_voltage_aligned['SOC'], df_blend_voltage_aligned['Voltage'],
+                                   bounds_error=False)
+    soc_vec = np.linspace(0, 100, 1001)
+
+    df_blended = pd.DataFrame(df_blended_interper(soc_vec), columns=['Voltage_aligned'])
+    df_blended['SOC_aligned'] = soc_vec
+
+    # Modify NE to fully span 100% SOC within its valid voltage window
+    df_blended_soc_mod_interper = interp1d(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()],
+                                           df_blended['Voltage_aligned'].loc[~df_blended['Voltage_aligned'].isna()],
+                                           bounds_error=False)
+    soc_vec = np.linspace(np.min(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()]),
+                          np.max(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()]),
+                          1001)
+    df_blended_soc_mod = pd.DataFrame(df_blended_soc_mod_interper(soc_vec), columns=['Voltage_aligned'])
+    df_blended_soc_mod['SOC_aligned'] = soc_vec / np.max(soc_vec) * 100
+    return df_blended_soc_mod
+
diff --git a/beep/features/intracell_losses.py b/beep/features/intracell/intracell_losses.py
similarity index 96%
rename from beep/features/intracell_losses.py
rename to beep/features/intracell/intracell_losses.py
index e8ce7746..0ac1da92 100644
--- a/beep/features/intracell_losses.py
+++ b/beep/features/intracell/intracell_losses.py
@@ -3,15 +3,15 @@
 import pandas as pd
 
 from beep import PROTOCOL_PARAMETERS_DIR
-from beep.features import featurizer_helpers
-from beep.features.base import BEEPFeaturizer
-from beep.features.intracell_analysis import IntracellAnalysis
+from beep.features import helper_functions
+from beep.features.featurizer import BEEPEarlyCyclesFeaturizer
+from beep.features.intracell.intracell_analysis import IntracellAnalysis
 
 
 DEFAULT_CELL_INFO_DIR = os.path.join(PROTOCOL_PARAMETERS_DIR, "intracell_info")
 
 
-class IntracellCycles(BEEPFeaturizer):
+class IntracellAllCycles(BEEPEarlyCyclesFeaturizer):
     """
     Object corresponding to the fitted material parameters of the cell. Material parameters
     are determined by using high resolution half cell data to fit full cell dQdV curves. Rows
@@ -52,7 +52,7 @@ def validate(self):
         Returns:
             bool: True/False indication of ability to proceed with feature generation
         """
-        val, msg = featurizer_helpers.check_diagnostic_validation(self.datapath)
+        val, msg = helper_functions.check_diagnostic_validation(self.datapath)
         if val:
             conditions = []
 
@@ -131,7 +131,7 @@ def create_features(self):
         self.features = degradation_df
 
 
-class IntracellFeatures(IntracellCycles):
+class IntracellAllCyclesFeatures(IntracellAllCycles):
     """
     Object corresponding to the fitted material parameters of the cell. Material parameters
     are determined by using high resolution half cell data to fit full cell dQdV curves. The
diff --git a/beep/features/intracell_losses_v2.py b/beep/features/intracell/intracell_lossesv2.py
similarity index 77%
rename from beep/features/intracell_losses_v2.py
rename to beep/features/intracell/intracell_lossesv2.py
index ab0413d1..fdad7202 100644
--- a/beep/features/intracell_losses_v2.py
+++ b/beep/features/intracell/intracell_lossesv2.py
@@ -1,17 +1,16 @@
 import os
-
+import numpy as np
 import pandas as pd
 
 from beep import PROTOCOL_PARAMETERS_DIR
-from beep.features import featurizer_helpers
-from beep.features.base import BEEPFeaturizer
-from beep.features.intracell_analysis_v2 import IntracellAnalysisV2
-
+from beep.features import helper_functions
+from beep.features.featurizer import BEEPPerCycleFeaturizer
+from beep.features.intracell.intracell_analysisv2 import IntracellAnalysisV2
 
 DEFAULT_CELL_INFO_DIR = os.path.join(PROTOCOL_PARAMETERS_DIR, "intracell_info")
 
 
-class IntracellCyclesV2(BEEPFeaturizer):
+class IntracellCyclesV2(BEEPPerCycleFeaturizer):
     """
     Object corresponding to the fitted material parameters of the cell. Material parameters
     are determined by using high resolution half cell data to fit full cell dQdV curves. Rows
@@ -24,19 +23,24 @@ class IntracellCyclesV2(BEEPFeaturizer):
     """
 
     DEFAULT_HYPERPARAMETERS = {
-        "diagnostic_cycle_type": 'rpt_0.2C',
-        "step_type": 0,
-        # Paths for anode files should be absolute
-        # Defaults are for the specified names in the current dir
-        "anode_file": os.path.join(
-            DEFAULT_CELL_INFO_DIR,
-            'anode_test.csv'
-        ),
-        "cathode_file": os.path.join(
-            DEFAULT_CELL_INFO_DIR,
-            'cathode_test.csv'
-        ),
-    }
+        'pe_pristine_files_dict':{},
+        'pe_pristine_usecols':['Ecell/V','Capacity/mA.h',
+                               'SOC_aligned','c_rate',
+                               'BVV_c_rate','Voltage_aligned'],
+        'ne_1_pristine_files_dict':{},
+        'ne_1_pristine_usecols':['Ecell/V','Capacity/mA.h',
+                                 'SOC_aligned','c_rate',
+                                 'BVV_c_rate','Voltage_aligned'],
+        'Q_fc_nom':4.84,
+        'C_nom':-0.2,
+        'cycle_type':'rpt_0.2C',
+        'step_type': 1, 
+        'error_type':'dVdQ',
+        'error_weighting':'dQdV',
+        'dvdq_bound':None,
+        'ne_2pos_file':None,
+        'ne_2neg_file':None
+    } 
 
     def validate(self):
         """
@@ -52,13 +56,13 @@ def validate(self):
         Returns:
             bool: True/False indication of ability to proceed with feature generation
         """
-        val, msg = featurizer_helpers.check_diagnostic_validation(self.datapath)
+        val, msg = helper_functions.check_diagnostic_validation(self.datapath)
         if val:
             conditions = []
 
             # Ensure overlap of cycle indices above threshold and matching cycle type
             eol_cycle_index_list = self.datapath.diagnostic_summary[
-                (self.datapath.diagnostic_summary.cycle_type == self.hyperparameters["diagnostic_cycle_type"]) &
+                (self.datapath.diagnostic_summary.cycle_type == self.hyperparameters["cycle_type"]) &
                 (self.datapath.diagnostic_summary.discharge_capacity > IntracellAnalysisV2.THRESHOLD)
                 ].cycle_index.to_list()
             if not eol_cycle_index_list:
@@ -93,10 +97,19 @@ def create_features(self):
              (pd.DataFrame) containing the cell material parameters as a function of cycle index
         """
         ia = IntracellAnalysisV2(
-            self.hyperparameters["cathode_file"],
-            self.hyperparameters["anode_file"],
-            cycle_type=self.hyperparameters["diagnostic_cycle_type"],
-            step_type=self.hyperparameters["step_type"]
+            pe_pristine_files_dict=self.hyperparameters["pe_pristine_files_dict"],
+            pe_pristine_usecols=self.hyperparameters["pe_pristine_usecols"],
+            ne_1_pristine_files_dict=self.hyperparameters["ne_1_pristine_files_dict"],
+            ne_1_pristine_usecols=self.hyperparameters["ne_1_pristine_usecols"],
+            Q_fc_nom=self.hyperparameters["Q_fc_nom"],
+            C_nom=self.hyperparameters["C_nom"],
+            cycle_type=self.hyperparameters["cycle_type"],
+            step_type=self.hyperparameters["step_type"],
+            error_type=self.hyperparameters["error_type"],
+            error_weighting=self.hyperparameters["error_weighting"],
+            dvdq_bound=self.hyperparameters["dvdq_bound"],
+            ne_2pos_file=self.hyperparameters["ne_2pos_file"],
+            ne_2neg_file=self.hyperparameters["ne_2neg_file"]
         )
 
         # (cell_init_aligned, cell_init_profile, PE_matched, NE_matched) = ia.intracell_wrapper_init(
@@ -111,15 +124,17 @@ def create_features(self):
         # initialize dicts before for loop
         dataset_dict_of_cell_degradation_path = dict()
         real_cell_dict_of_profiles = dict()
-        for i, cycle_index in enumerate(eol_cycle_index_list):
+        for diag_pos, cycle_index in enumerate(eol_cycle_index_list):
             loss_dict, profiles_dict = ia.intracell_values_wrapper_ah(cycle_index,
                                                                       self.datapath
                                                                       )
+            loss_dict[cycle_index] = np.append(diag_pos,loss_dict[cycle_index])
             dataset_dict_of_cell_degradation_path.update(loss_dict)
-            real_cell_dict_of_profiles.update(profiles_dict)
+#             real_cell_dict_of_profiles.update(profiles_dict)
 
         degradation_df = pd.DataFrame(dataset_dict_of_cell_degradation_path,
-                                      index=['rmse_error', 'LLI_opt', 'Q_pe_opt', 'Q_ne_opt', 'x_NE_2',
+                                      index=['diag_pos','rmse_error', 'LLI_opt', 'Q_pe_opt', 'Q_ne_opt', 'x_NE_2',
+                                             'IR_coef_pe_opt','IR_coef_ne_opt',
                                              'pe_voltage_FC4p2V', 'pe_voltage_FC4p1V', 'pe_voltage_FC4p0V',
                                              'pe_voltage_FC3p9V', 'pe_voltage_FC3p8V', 'pe_voltage_FC3p7V',
                                              'pe_voltage_FC3p6V', 'pe_voltage_FC3p5V', 'pe_voltage_FC3p4V',
@@ -143,6 +158,8 @@ def create_features(self):
                                              'ne_soc_FC3p2V', 'ne_soc_FC3p1V',
                                              'ne_soc_FC3p0V', 'ne_soc_FC2p9V', 'ne_soc_FC2p8V', 'ne_soc_FC2p7V',
                                              'Q_fc', 'Q_pe', 'Q_ne', 'Q_li']).T
+        degradation_df = degradation_df.reset_index().rename(columns={'index':'cycle_index'})
+        degradation_df['diag_pos'] = degradation_df['diag_pos'].astype(int)
         self.features = degradation_df
 
 
@@ -173,10 +190,17 @@ def create_features(self):
         """
 
         ia = IntracellAnalysisV2(
-            self.hyperparameters["cathode_file"],
-            self.hyperparameters["anode_file"],
-            cycle_type=self.hyperparameters["diagnostic_cycle_type"],
-            step_type=self.hyperparameters["step_type"]
+            pe_pristine_dict=self.hyperparameters["pe_pristine_dict"],
+            ne_1_pristine_dict=self.hyperparameters["ne_1_pristine_dict"],
+            Q_fc_nom=self.hyperparameters["Q_fc_nom"],
+            C_nom=self.hyperparameters["C_nom"],
+            cycle_type=self.hyperparameters["cycle_type"],
+            step_type=self.hyperparameters["step_type"],
+            error_type=self.hyperparameters["error_type"],
+            error_weighting=self.hyperparameters["error_weighting"],
+            dvdq_bound=self.hyperparameters["dvdq_bound"],
+            ne_2pos_file=self.hyperparameters["ne_2pos_file"],
+            ne_2neg_file=self.hyperparameters["ne_2neg_file"]
         )
 
         # (cell_init_aligned, cell_init_profile, PE_matched, NE_matched) = ia.intracell_wrapper_init(
@@ -228,4 +252,4 @@ def create_features(self):
         diag_1_names = ["diag_1_" + name for name in degradation_df.columns]
         values = {0: degradation_df.iloc[0].tolist() + degradation_df.iloc[1].tolist()}
         features_df = pd.DataFrame(values, index=diag_0_names+diag_1_names).T
-        self.features = features_df
+        self.features = features_df
\ No newline at end of file
diff --git a/beep/features/intracell_analysis_v2.py b/beep/features/intracell_analysis_v2.py
deleted file mode 100644
index a88b44ee..00000000
--- a/beep/features/intracell_analysis_v2.py
+++ /dev/null
@@ -1,1320 +0,0 @@
-import numpy as np
-import pandas as pd
-from scipy.interpolate import interp1d
-from scipy.spatial import distance
-from scipy.optimize import differential_evolution
-
-
-class IntracellAnalysisV2:
-    # IA constants
-    FC_UPPER_VOLTAGE = 4.20
-    FC_LOWER_VOLTAGE = 2.70
-    NE_UPPER_VOLTAGE = 0.01
-    NE_LOWER_VOLTAGE = 1.50
-    PE_UPPER_VOLTAGE = 4.30
-    PE_LOWER_VOLTAGE = 2.86
-    THRESHOLD = 4.84 * 0.0
-
-    def __init__(self,
-                 pe_pristine_file,
-                 ne_pristine_file,
-                 cycle_type='rpt_0.2C',
-                 step_type=0,
-                 error_type='V-Q',
-                 ne_2pos_file=None,
-                 ne_2neg_file=None
-                 ):
-        """
-        Invokes the cell electrode analysis class. This is a class designed to fit the cell and electrode
-        parameters in order to determine changes of electrodes within the full cell from only full cell cycling data.
-        Args:
-            pe_pristine_file (str): file name for the half cell data of the pristine (uncycled) positive
-                electrode
-            ne_pristine_file (str): file name for the half cell data of the pristine (uncycled) negative
-                electrode
-            cycle_type (str): type of diagnostic cycle for the fitting
-            step_type (int): charge or discharge (0 for charge, 1 for discharge)
-            error_type (str): defines which error metric is to be used
-            ne_2neg_file (str): file name of the data for the negative component of the anode
-            ne_2pos_file (str): file name of the data for the positive component of the anode
-        """
-        self.pe_pristine = pd.read_csv(pe_pristine_file, usecols=['SOC_aligned', 'Voltage_aligned'])
-        self.ne_1_pristine = pd.read_csv(ne_pristine_file, usecols=['SOC_aligned', 'Voltage_aligned'])
-
-        if ne_2neg_file and ne_2pos_file:
-            self.ne_2_pristine_pos = pd.read_csv(ne_2pos_file)
-            self.ne_2_pristine_neg = pd.read_csv(ne_2neg_file)
-        else:
-            self.ne_2_pristine_pos = pd.DataFrame()
-            self.ne_2_pristine_neg = pd.DataFrame()
-
-        if step_type == 0:
-            self.capacity_col = 'charge_capacity'
-        else:
-            self.capacity_col = 'discharge_capacity'
-
-        self.cycle_type = cycle_type
-        self.step_type = step_type
-        self.error_type = error_type
-
-    def process_beep_cycle_data_for_candidate_halfcell_analysis_ah(self,
-                                                                   cell_struct,
-                                                                   cycle_index):
-        """
-        Ingests BEEP structured cycling data and cycle_index and returns
-                a Dataframe of evenly spaced capacity with corresponding voltage.
-
-        Inputs:
-        cell_struct (MaccorDatapath): BEEP structured cycling data
-        cycle_index (int): cycle number at which to evaluate
-
-        Outputs:
-        real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned (evenly spaced)
-                and Voltage_aligned
-        """
-
-        # filter the data down to the diagnostic type of interest
-        diag_type_cycles = cell_struct.diagnostic_data.loc[cell_struct.diagnostic_data['cycle_type'] == self.cycle_type]
-        real_cell_candidate_charge_profile = diag_type_cycles.loc[
-            (diag_type_cycles.cycle_index == cycle_index)
-            & (diag_type_cycles.step_type == 0)  # step_type = 0 is charge, 1 is discharge
-            & (diag_type_cycles.voltage < self.FC_UPPER_VOLTAGE)
-            & (diag_type_cycles[self.capacity_col] > 0)][['voltage', 'charge_capacity']]
-
-        # renaming capacity,voltage column
-        real_cell_candidate_charge_profile['Q'] = real_cell_candidate_charge_profile['charge_capacity']
-
-        real_cell_candidate_charge_profile['Voltage'] = real_cell_candidate_charge_profile['voltage']
-        real_cell_candidate_charge_profile.drop('voltage', axis=1, inplace=True)
-
-        # interpolate voltage along evenly spaced capacity axis
-        q_vec = np.linspace(0, np.max(real_cell_candidate_charge_profile['Q']), 1001)
-
-        real_cell_candidate_charge_profile_aligned = pd.DataFrame()
-        real_cell_candidate_charge_profile_interper = interp1d(real_cell_candidate_charge_profile['Q'],
-                                                               real_cell_candidate_charge_profile['Voltage'],
-                                                               bounds_error=False,
-                                                               fill_value=(
-                                                               self.FC_LOWER_VOLTAGE, self.FC_UPPER_VOLTAGE))
-        real_cell_candidate_charge_profile_aligned['Voltage_aligned'] = real_cell_candidate_charge_profile_interper(
-            q_vec)
-
-        real_cell_candidate_charge_profile_aligned['Q_aligned'] = q_vec
-
-        return real_cell_candidate_charge_profile_aligned
-
-    def _impose_electrode_scale(self,
-                                pe_pristine=pd.DataFrame(),
-                                ne_1_pristine=pd.DataFrame(),
-                                ne_2_pristine_pos=pd.DataFrame(),
-                                ne_2_pristine_neg=pd.DataFrame(),
-                                lli=0.0, q_pe=0.0, q_ne=0.0, x_ne_2=0.0):
-        """
-        Scales the reference electrodes according to specified capacities and
-        offsets their capacities according to lli. Blends negative electrode materials.
-
-        Inputs:
-        pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                electrode
-        ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                electrode
-        ne_2_pos (Dataframe): half cell data for the positive component of the anode
-        ne_2_neg (Dataframe): half cell data for the negative component of the anode
-        lli (float): Loss of Lithium Inventory - capacity of the misalignment between
-                cathode and anode zero-capacity
-        q_pe (float): capacity of the positive electrode (cathode)
-        q_ne (float): capacity of the negative electrode (anode)
-        x_ne_2 (float): fraction of ne_2_pristine_pos or ne_2_pristine_neg
-                (positive or negative value, respectively) to ne_1_pristine
-
-        Outputs:
-        pe_degraded (Dataframe): positive electrode with imposed capacity
-                scale to emulate degradation
-        ne_degraded (Dataframe): negative electrode with imposed capacity
-                scale and capacity offset to emulate degradation
-        """
-        # Blend negative electrodes
-        ne_pristine = blend_electrodes(ne_1_pristine, ne_2_pristine_pos, ne_2_pristine_neg, x_ne_2)
-
-        # rescaling pristine electrodes to q_pe and q_ne
-        pe_q_scaled = pe_pristine.copy()
-        pe_q_scaled['Q_aligned'] = (pe_q_scaled['SOC_aligned'] / 100) * q_pe
-        ne_q_scaled = ne_pristine.copy()
-        ne_q_scaled['Q_aligned'] = (ne_q_scaled['SOC_aligned'] / 100) * q_ne
-
-        # translate pristine ne electrode with lli
-        ne_q_scaled['Q_aligned'] = ne_q_scaled['Q_aligned'] + lli
-
-        # Re-interpolate to align dataframes for differencing
-        lower_q = np.min((np.min(pe_q_scaled['Q_aligned']),
-                          np.min(ne_q_scaled['Q_aligned'])))
-        upper_q = np.max((np.max(pe_q_scaled['Q_aligned']),
-                          np.max(ne_q_scaled['Q_aligned'])))
-        q_vec = np.linspace(lower_q, upper_q, 1001)
-
-        # Actually aligning the electrode Q's
-        pe_pristine_interper = interp1d(pe_q_scaled['Q_aligned'],
-                                        pe_q_scaled['Voltage_aligned'], bounds_error=False)
-        pe_degraded = pe_q_scaled.copy()
-        pe_degraded['Q_aligned'] = q_vec
-        pe_degraded['Voltage_aligned'] = pe_pristine_interper(q_vec)
-
-        ne_pristine_interper = interp1d(ne_q_scaled['Q_aligned'],
-                                        ne_q_scaled['Voltage_aligned'], bounds_error=False)
-        ne_degraded = ne_q_scaled.copy()
-        ne_degraded['Q_aligned'] = q_vec
-        ne_degraded['Voltage_aligned'] = ne_pristine_interper(q_vec)
-
-        # Returning pe and ne degraded on an Ah basis
-        return pe_degraded, ne_degraded
-
-    def halfcell_degradation_matching_ah(self, x, *params):
-        """
-        Calls underlying functions to impose degradation through electrode
-        capacity scale and alignment through LLI. Modifies emulated full cell
-        data to be within full cell voltage range and calibrates (zeros) capacity
-        at the lowest permissible voltage. Interpolates real and emulated data onto
-        a common capacity axis.
-
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-        Outputs:
-        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-
-        df_real_aligned (Dataframe): capacity and voltage interpolated evenly across
-                capacity for the real cell data
-        emulated_full_cell_aligned (Dataframe): capacity and voltage interpolated evenly
-                across capacity for the emulated cell data
-        """
-
-        lli = x[0]
-        q_pe = x[1]
-        q_ne = x[2]
-        x_ne_2 = x[3]
-
-        (pe_pristine,
-         ne_1_pristine,
-         ne_2_pristine_pos,
-         ne_2_pristine_neg,
-         real_cell_candidate_charge_profile_aligned) = params
-
-        # output degraded ne and pe (on a AH basis, with electrode alignment
-        # (NaNs for voltage, when no capacity actually at the corresponding capacity index))
-        pe_out, ne_out = self._impose_electrode_scale(pe_pristine, ne_1_pristine,
-                                                      ne_2_pristine_pos, ne_2_pristine_neg,
-                                                      lli, q_pe,
-                                                      q_ne, x_ne_2)
-
-        # PE - NE = full cell voltage
-        emulated_full_cell_with_degradation = pd.DataFrame()
-        emulated_full_cell_with_degradation['Q_aligned'] = pe_out['Q_aligned'].copy()
-        emulated_full_cell_with_degradation['Voltage_aligned'] = pe_out['Voltage_aligned'] - ne_out['Voltage_aligned']
-
-        # Replace emulated full cell values outside of voltage range with NaN
-        emulated_full_cell_with_degradation['Voltage_aligned'].loc[
-            emulated_full_cell_with_degradation['Voltage_aligned'] < self.FC_LOWER_VOLTAGE] = np.nan
-        emulated_full_cell_with_degradation['Voltage_aligned'].loc[
-            emulated_full_cell_with_degradation['Voltage_aligned'] > self.FC_UPPER_VOLTAGE] = np.nan
-
-        # Center the emulated full cell and half cell curves onto the same Q at which the real (degraded)
-        # capacity measurement started (self.FC_LOWER_VOLTAGE)
-        emulated_full_cell_with_degradation_zeroed = pd.DataFrame()
-
-        emulated_full_cell_with_degradation_zeroed['Voltage_aligned'] = emulated_full_cell_with_degradation[
-            'Voltage_aligned'].copy()
-
-        zeroing_value = emulated_full_cell_with_degradation['Q_aligned'].loc[
-            np.nanargmin(emulated_full_cell_with_degradation['Voltage_aligned'])
-        ]
-
-        emulated_full_cell_with_degradation_zeroed['Q_aligned'] = \
-            (emulated_full_cell_with_degradation['Q_aligned'].copy() - zeroing_value)
-
-        pe_out_zeroed = pe_out.copy()
-        pe_out_zeroed['Q_aligned'] = pe_out['Q_aligned'] - zeroing_value
-
-        ne_out_zeroed = ne_out.copy()
-        ne_out_zeroed['Q_aligned'] = ne_out['Q_aligned'] - zeroing_value
-
-        # Interpolate full cell profiles across same Q range
-        min_q = np.min(
-            real_cell_candidate_charge_profile_aligned['Q_aligned'].loc[
-                ~real_cell_candidate_charge_profile_aligned['Voltage_aligned'].isna()])
-        max_q = np.max(
-            real_cell_candidate_charge_profile_aligned['Q_aligned'].loc[
-                ~real_cell_candidate_charge_profile_aligned['Voltage_aligned'].isna()])
-        emulated_interper = interp1d(emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[
-                                         ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()],
-                                     emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].loc[
-                                         ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()],
-                                     bounds_error=False)
-        real_interper = interp1d(
-            real_cell_candidate_charge_profile_aligned['Q_aligned'].loc[
-                ~real_cell_candidate_charge_profile_aligned['Voltage_aligned'].isna()],
-            real_cell_candidate_charge_profile_aligned['Voltage_aligned'].loc[
-                ~real_cell_candidate_charge_profile_aligned['Voltage_aligned'].isna()],
-            bounds_error=False)
-
-        q_vec = np.linspace(min_q, max_q, 1001)
-
-        emulated_aligned = pd.DataFrame()
-        emulated_aligned['Q_aligned'] = q_vec
-        emulated_aligned['Voltage_aligned'] = emulated_interper(q_vec)
-
-        real_aligned = pd.DataFrame()
-        real_aligned['Q_aligned'] = q_vec
-        real_aligned['Voltage_aligned'] = real_interper(q_vec)
-
-        return pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned
-
-    def get_dqdv_over_v_from_degradation_matching_ah(self, x, *params):
-        """
-        This function imposes degradation scaling ,then outputs the dqdv representation of the emulated cell data.
-
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-
-        Outputs:
-        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        dq_dv_over_v_real (Dataframe): dqdv across voltage for the real cell data
-        dq_dv_over_v_emulated (Dataframe): dqdv across voltage for the emulated cell data
-        df_real_interped (Dataframe): capacity and voltage interpolated evenly across
-                capacity for the real cell data
-        emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly
-                across capacity for the emulated cell data
-        """
-
-        pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped = \
-            self.halfcell_degradation_matching_ah(x, *params)
-
-        # Calculate dqdv from full cell profiles
-        dq_dv_real = pd.DataFrame(np.gradient(df_real_interped['Q_aligned'], df_real_interped['Voltage_aligned']),
-                                  columns=['dQdV']).ewm(0.1).mean()
-        dq_dv_emulated = pd.DataFrame(
-            np.gradient(emulated_full_cell_interped['Q_aligned'], emulated_full_cell_interped['Voltage_aligned']),
-            columns=['dQdV']).ewm(0.1).mean()
-
-        # Include original data
-        dq_dv_real['Q_aligned'] = df_real_interped['Q_aligned']
-        dq_dv_real['Voltage_aligned'] = df_real_interped['Voltage_aligned']
-
-        dq_dv_emulated['Q_aligned'] = emulated_full_cell_interped['Q_aligned']
-        dq_dv_emulated['Voltage_aligned'] = emulated_full_cell_interped['Voltage_aligned']
-
-        # Interpolate dQdV and Q over V, aligns real and emulated over V
-        voltage_vec = np.linspace(self.FC_LOWER_VOLTAGE, self.FC_UPPER_VOLTAGE, 1001)
-
-        v_dq_dv_interper_real = interp1d(dq_dv_real['Voltage_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()],
-                                         dq_dv_real['dQdV'].loc[~dq_dv_real['Voltage_aligned'].isna()],
-                                         bounds_error=False, fill_value=0)
-        v_q_interper_real = interp1d(dq_dv_real['Voltage_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()],
-                                     dq_dv_real['Q_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()],
-                                     bounds_error=False, fill_value=(0, np.max(df_real_interped['Q_aligned'])))
-
-        v_dq_dv_interper_emulated = interp1d(dq_dv_emulated['Voltage_aligned'].loc[
-                                                 ~dq_dv_emulated['Voltage_aligned'].isna()],
-                                             dq_dv_emulated['dQdV'].loc[~dq_dv_emulated['Voltage_aligned'].isna()],
-                                             bounds_error=False, fill_value=0)
-        v_q_interper_emulated = interp1d(dq_dv_emulated['Voltage_aligned'].loc[
-                                             ~dq_dv_emulated['Voltage_aligned'].isna()],
-                                         dq_dv_emulated['Q_aligned'].loc[~dq_dv_emulated['Voltage_aligned'].isna()],
-                                         bounds_error=False, fill_value=(0, np.max(df_real_interped['Q_aligned'])))
-
-        dq_dv_over_v_real = pd.DataFrame(v_dq_dv_interper_real(voltage_vec), columns=['dQdV']).fillna(0)
-        dq_dv_over_v_real['Q_aligned'] = v_q_interper_real(voltage_vec)
-        dq_dv_over_v_real['Voltage_aligned'] = voltage_vec
-
-        dq_dv_over_v_emulated = pd.DataFrame(v_dq_dv_interper_emulated(voltage_vec), columns=['dQdV']).fillna(0)
-        dq_dv_over_v_emulated['Q_aligned'] = v_q_interper_emulated(voltage_vec)
-        dq_dv_over_v_emulated['Voltage_aligned'] = voltage_vec
-
-        return (pe_out_zeroed,
-                ne_out_zeroed,
-                dq_dv_over_v_real,
-                dq_dv_over_v_emulated,
-                df_real_interped,
-                emulated_full_cell_interped)
-
-    def get_dvdq_over_q_from_degradation_matching_ah(self, x, *params):
-        """
-        This function imposes degradation scaling ,then outputs the dVdQ representation of the emulated cell data.
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-        Outputs:
-        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        dv_dq_real (Dataframe): dVdQ across capacity for the real cell data
-        dv_dq_emulated (Dataframe): dVdQ across capacity for the emulated cell data
-        df_real_interped (Dataframe): capacity and voltage interpolated evenly across
-                capacity for the real cell data
-        emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly
-                across capacity for the emulated cell data
-        """
-
-        pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped = \
-            self.halfcell_degradation_matching_ah(x, *params)
-
-        # Calculate dQdV from full cell profiles
-        dv_dq_real = pd.DataFrame(np.gradient(df_real_interped['Voltage_aligned'], df_real_interped['Q_aligned']),
-                                  columns=['dVdQ']).ewm(0.1).mean()
-        dv_dq_emulated = pd.DataFrame(
-            np.gradient(emulated_full_cell_interped['Voltage_aligned'], emulated_full_cell_interped['Q_aligned']),
-            columns=['dVdQ']).ewm(0.1).mean()
-
-        # Include original data
-        dv_dq_real['Q_aligned'] = df_real_interped['Q_aligned']
-        dv_dq_real['Voltage_aligned'] = df_real_interped['Voltage_aligned']
-
-        dv_dq_emulated['Q_aligned'] = emulated_full_cell_interped['Q_aligned']
-        dv_dq_emulated['Voltage_aligned'] = emulated_full_cell_interped['Voltage_aligned']
-
-        # Q interpolation not needed, as interpolated over Q by default
-
-        return (pe_out_zeroed,
-                ne_out_zeroed,
-                dv_dq_real,
-                dv_dq_emulated,
-                df_real_interped,
-                emulated_full_cell_interped)
-
-    def get_v_over_q_from_degradation_matching_ah(self, x, *params):
-        """
-        This function imposes degradation scaling ,then outputs the V-Q representation of the emulated cell data.
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-        Outputs:
-        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        df_real_interped (Dataframe): capacity and voltage interpolated evenly across
-                capacity for the real cell data
-        emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly
-                across capacity for the emulated cell data
-        """
-        (pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned) = \
-            self.halfcell_degradation_matching_ah(x, *params)
-
-        min_soc_full_cell = np.min(real_aligned.loc[~real_aligned.Voltage_aligned.isna()].Q_aligned)
-        max_soc_full_cell = np.max(real_aligned.loc[~real_aligned.Voltage_aligned.isna()].Q_aligned)
-
-        soc_vec_full_cell = np.linspace(min_soc_full_cell, max_soc_full_cell, 1001)
-
-        emulated_full_cell_interper = interp1d(
-            emulated_aligned.Q_aligned.loc[~real_aligned.Voltage_aligned.isna()],
-            emulated_aligned.Voltage_aligned.loc[~real_aligned.Voltage_aligned.isna()],
-            bounds_error=False)
-        real_full_cell_interper = interp1d(real_aligned.Q_aligned.loc[~real_aligned.Voltage_aligned.isna()],
-                                           real_aligned.Voltage_aligned.loc[~real_aligned.Voltage_aligned.isna()],
-                                           bounds_error=False)
-
-        # Interpolate the emulated full-cell profile
-        emulated_full_cell_interped = pd.DataFrame()
-        emulated_full_cell_interped['Q_aligned'] = soc_vec_full_cell
-        emulated_full_cell_interped['Voltage_aligned'] = emulated_full_cell_interper(soc_vec_full_cell)
-
-        # Interpolate the true full-cell profile
-        df_real_interped = emulated_full_cell_interped.copy()
-        df_real_interped['Q_aligned'] = soc_vec_full_cell
-        df_real_interped['Voltage_aligned'] = real_full_cell_interper(soc_vec_full_cell)
-        return pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped
-
-    def get_v_over_q_from_degradation_matching_ah_no_real(self, x, *params):
-        """
-        This function imposes degradation scaling ,then outputs the V-Q representation of the
-        emulated cell data, in the absence of real cell data.
-
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-
-        Outputs:
-        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly
-                across capacity for the emulated cell data
-
-        """
-        (pe_out_zeroed, ne_out_zeroed, emulated_aligned) = \
-            self.halfcell_degradation_matching_ah_no_real(x, *params)
-
-        min_q_full_cell = np.min(emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned)
-        max_q_full_cell = np.max(emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned)
-
-        q_vec_full_cell = np.linspace(min_q_full_cell, max_q_full_cell, 1001)
-
-        emulated_full_cell_interper = interp1d(
-            emulated_aligned.Q_aligned.loc[~emulated_aligned.Voltage_aligned.isna()],
-            emulated_aligned.Voltage_aligned.loc[~emulated_aligned.Voltage_aligned.isna()],
-            bounds_error=False)
-
-        # Interpolate the emulated full-cell profile
-        emulated_full_cell_interped = pd.DataFrame()
-        emulated_full_cell_interped['Q_aligned'] = q_vec_full_cell
-        emulated_full_cell_interped['Voltage_aligned'] = emulated_full_cell_interper(q_vec_full_cell)
-
-        return pe_out_zeroed, ne_out_zeroed, emulated_full_cell_interped
-
-    def halfcell_degradation_matching_ah_no_real(self, x, *params):
-        """
-        Calls underlying functions to impose degradation through electrode
-        capacity scale and alignment through LLI. Modifies emulated full cell
-        data to be within full cell voltage range and calibrates (zeros) capacity
-        at the lowest permissible voltage.
-
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-
-        Outputs:
-        pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
-                offset, and aligned along capacity
-        emulated_aligned (Dataframe): full cell data corresponding to the imposed degradation
-        """
-        lli = x[0]
-        q_pe = x[1]
-        q_ne = x[2]
-        x_ne_2 = x[3]
-
-        pe_pristine, ne_1_pristine, ne_2_pristine_pos, ne_2_pristine_neg = params
-
-        pe_out, ne_out = self._impose_electrode_scale(pe_pristine, ne_1_pristine,
-                                                      ne_2_pristine_pos, ne_2_pristine_neg,
-                                                      lli, q_pe,
-                                                      q_ne,
-                                                      x_ne_2)
-        # outputs degraded ne and pe (on a AH basis, with electrode alignment (NaNs for voltage, when no overlap))
-
-        emulated_full_cell_with_degradation = pd.DataFrame()
-        emulated_full_cell_with_degradation['Q_aligned'] = pe_out['Q_aligned'].copy()
-        emulated_full_cell_with_degradation['Voltage_aligned'] = pe_out['Voltage_aligned'] - ne_out['Voltage_aligned']
-
-        # Replace emulated full cell values outside of voltage range with NaN
-        emulated_full_cell_with_degradation['Voltage_aligned'].loc[
-            emulated_full_cell_with_degradation['Voltage_aligned'] < self.FC_LOWER_VOLTAGE] = np.nan
-        emulated_full_cell_with_degradation['Voltage_aligned'].loc[
-            emulated_full_cell_with_degradation['Voltage_aligned'] > self.FC_UPPER_VOLTAGE] = np.nan
-
-        # Center the emulated full cell and half cell curves onto the same Q at which the real (degraded)
-        # capacity measurement started (self.FC_LOWER_VOLTAGE)
-        emulated_full_cell_with_degradation_zeroed = pd.DataFrame()
-
-        emulated_full_cell_with_degradation_zeroed['Voltage_aligned'] = emulated_full_cell_with_degradation[
-            'Voltage_aligned']
-
-        zeroing_value = emulated_full_cell_with_degradation['Q_aligned'].loc[
-            np.nanargmin(emulated_full_cell_with_degradation['Voltage_aligned'])
-        ]
-
-        emulated_full_cell_with_degradation_zeroed['Q_aligned'] = \
-            (emulated_full_cell_with_degradation['Q_aligned'] - zeroing_value)
-
-        pe_out_zeroed = pe_out.copy()
-        pe_out_zeroed['Q_aligned'] = pe_out['Q_aligned'] - zeroing_value
-
-        ne_out_zeroed = ne_out.copy()
-        ne_out_zeroed['Q_aligned'] = ne_out['Q_aligned'] - zeroing_value
-
-        # Interpolate full profiles across same Q range
-        min_q = np.min(
-            emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[
-                ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()])
-        max_q = np.max(
-            emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[
-                ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()])
-
-        emulated_interper = interp1d(emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[
-                                         ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()],
-                                     emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].loc[
-                                         ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()],
-                                     bounds_error=False)
-
-        q_vec = np.linspace(min_q, max_q, 1001)
-
-        emulated_aligned = pd.DataFrame()
-        emulated_aligned['Q_aligned'] = q_vec
-        emulated_aligned['Voltage_aligned'] = emulated_interper(q_vec)
-
-        return pe_out_zeroed, ne_out_zeroed, emulated_aligned
-
-    def _get_error_from_degradation_matching_ah(self, x, *params):
-        """
-        Wrapper function which selects the correct error sub routine and returns its error value.
-
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-
-        Outputs:
-            error value (float) - output of the specified error sub function
-        """
-        error_type = self.error_type
-        if error_type == 'V-Q':
-            return self._get_error_from_degradation_matching_v_q(x, *params)[0]
-        elif error_type == 'dVdQ':
-            return self._get_error_from_degradation_matching_dvdq(x, *params)[0]
-        elif error_type == 'dQdV':
-            return self._get_error_from_degradation_matching_dqdv(x, *params)[0]
-        else:
-            return self._get_error_from_degradation_matching_v_q(x, *params)[0]
-
-    def _get_error_from_degradation_matching_v_q(self, x, *params):
-        """
-        Error function returning the mean standardized Euclidean distance of each point of the real curve to
-                the closest value on the emulated curve in the V-Q representation.
-
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-
-        Outputs:
-            error (float): output of the specified error sub function
-            error_vector (array): vector containingEuclidean distance of each point of the real curve to
-                the closest value on the emulated curve in the V-Q representation
-            xa (Dataframe): real full cell data used for error analysis
-            xb (Dataframe): emulated full cell  data used for error analysis
-        """
-
-        try:
-            (pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned
-             ) = self.get_v_over_q_from_degradation_matching_ah(x, *params)
-
-            xa = real_aligned.dropna()
-            xb = emulated_aligned.dropna()
-            error_matrix = distance.cdist(xa, xb, 'seuclidean')
-            error_vector = error_matrix.min(axis=1)
-            error = error_vector.mean()
-        except ValueError:
-            error = 100
-            return error, None, None, None
-        return error, error_vector, xa, xb
-
-        # Pairwise euclidean from premade dQdV
-
-    def _get_error_from_degradation_matching_dqdv(self, x, *params):
-        """
-        Error function returning the mean standardized Euclidean distance of each point of the real curve to
-                the closest value on the emulated curve in the dQdV representation.
-
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-
-        Outputs:
-            error (float): output of the specified error sub function
-            error_vector (array): vector containing Euclidean distance of each point of the real curve to
-                the closest value on the emulated curve in the dQdV representation
-            xa (Dataframe): real full cell data used for error analysis
-            xb (Dataframe): emulated full cell  data used for error analysis
-        """
-
-        try:
-            # Call dQdV generating function
-            (pe_out_zeroed,
-             ne_out_zeroed,
-             dqdv_over_v_real,
-             dqdv_over_v_emulated,
-             df_real_interped,
-             emulated_full_cell_interped) = self.get_dqdv_over_v_from_degradation_matching_ah(x, *params)
-
-            xa = dqdv_over_v_real[['Voltage_aligned', 'dQdV']].dropna()
-            xb = dqdv_over_v_emulated[['Voltage_aligned', 'dQdV']].dropna()
-            error_matrix = distance.cdist(xa, xb, 'seuclidean')
-            error_vector = error_matrix.min(axis=1)
-            error = error_vector.mean()
-
-        except ValueError:
-            error = 100
-            return error, None, None, None
-        return error, error_vector, xa, xb
-
-    def _get_error_from_degradation_matching_dvdq(self, x, *params):
-        """
-        Error function returning the mean standardized Euclidean distance of each point of the real curve to
-                the closest value on the emulated curve in the dVdQ representation.
-
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-
-        Outputs:
-            error (float): output of the specified error sub function
-            error_vector (array): vector containing Euclidean distance of each point of the real curve to
-                the closest value on the emulated curve in the dVdQ representation
-            xa (Dataframe): real full cell data used for error analysis
-            xb (Dataframe): emulated full cell  data used for error analysis
-        """
-
-        try:
-            (pe_out_zeroed,
-             ne_out_zeroed,
-             dvdq_over_q_real,
-             dvdq_over_q_emulated,
-             df_real_interped,
-             emulated_full_cell_interped) = self.get_dvdq_over_q_from_degradation_matching_ah(x, *params)
-
-            xa = dvdq_over_q_real[['Q_aligned', 'dVdQ']].dropna()
-            xb = dvdq_over_q_emulated[['Q_aligned', 'dVdQ']].dropna()
-
-            # down-select to values with capacity more than 0.5 Ahr to eliminate high-slope region of dVdQ
-            xa = xa.loc[(xa.Q_aligned > 0.5)]
-            xb = xb.loc[(xb.Q_aligned > 0.5)]
-
-            error_matrix = distance.cdist(xa, xb, 'seuclidean')
-            error_vector = error_matrix.min(axis=1)
-            error = error_vector.mean()
-
-        except ValueError:
-            error = 100
-            return error, None, None, None
-        return error, error_vector, xa, xb
-
-    def _get_error_from_synthetic_fitting_ah(self, x, *params):
-        """
-        Wrapper function which selects the correct error sub routine and returns its error value.
-        This function is specific to fitting synthetic data rather than real cycling data.
-
-        Inputs:
-        x (list): [LLI, q_pe, q_ne, x_ne_2]
-        *params:
-                pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive
-                        electrode
-                ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative
-                        electrode
-                ne_2_pos (Dataframe): half cell data for the positive component of the anode
-                ne_2_neg (Dataframe): half cell data for the negative component of the anode
-                real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned
-                        (evenly spaced) and Voltage_aligned
-
-        Outputs:
-            error value (float) - output of the specified error sub function
-        """
-
-        error_type = self.error_type
-
-        try:
-            if error_type == 'V-Q':
-                return self._get_error_from_degradation_matching_v_q(x, *params)[0]
-            elif error_type == 'dVdQ':
-                return self._get_error_from_degradation_matching_dvdq(x, *params)[0]
-            elif error_type == 'dQdV':
-                return self._get_error_from_degradation_matching_dvdq(x, *params)[0]
-            else:
-                return self._get_error_from_degradation_matching_v_q(x, *params)[0]
-        except RuntimeError:
-            print("Can't return error")
-            return 100
-
-    def intracell_values_wrapper_ah(self,
-                                    cycle_index,
-                                    cell_struct,
-                                    degradation_bounds=None
-                                    ):
-        """
-        Wrapper function to solve capacity sizing and offset of reference electrodes to real full cell cycle data.
-
-        Inputs:
-        cycle_index (int): the index of the cycle of interest of the structured real cycling data
-        cell_struct (MaccorDatapath): BEEP structured cycling data
-
-        Outputs:
-        loss_dict (dict): dictionary with key of cycle index and entry of a list of
-                error, lli_opt, q_pe_opt, q_ne_opt, x_ne_2, Q_li
-        profiles_dict (dict): dictionary with key of cycle index and entry of a dictionary
-                containing various key/entry pairs of resulting from the fitting
-        """
-        if degradation_bounds is None:
-            degradation_bounds = ((0, 3),  # LLI
-                                  (2.5, 6.5),  # q_pe
-                                  (2.5, 6.5),  # q_ne
-                                  (1, 1),  # (-1,1) x_ne_2
-                                  )
-
-        real_cell_candidate_charge_profile_aligned = self.process_beep_cycle_data_for_candidate_halfcell_analysis_ah(
-            cell_struct,
-            cycle_index)
-
-        degradation_optimization_result = differential_evolution(self._get_error_from_degradation_matching_ah,
-                                                                 degradation_bounds,
-                                                                 args=(self.pe_pristine,
-                                                                       self.ne_1_pristine,
-                                                                       self.ne_2_pristine_pos,
-                                                                       self.ne_2_pristine_neg,
-                                                                       real_cell_candidate_charge_profile_aligned
-                                                                       ),
-                                                                 strategy='best1bin', maxiter=100000,
-                                                                 popsize=15, tol=0.001, mutation=0.5,
-                                                                 recombination=0.7,
-                                                                 seed=1,
-                                                                 callback=None, disp=False, polish=True,
-                                                                 init='latinhypercube',
-                                                                 atol=0, updating='deferred', workers=-1,
-                                                                 constraints=()
-                                                                 )
-        #         print(degradation_optimization_result.x) #BVV
-
-        (pe_out_zeroed,
-         ne_out_zeroed,
-         dqdv_over_v_real,
-         dqdv_over_v_emulated,
-         df_real_interped,
-         emulated_full_cell_interped) = self.get_dqdv_over_v_from_degradation_matching_ah(
-            degradation_optimization_result.x,
-            self.pe_pristine,
-            self.ne_1_pristine,
-            self.ne_2_pristine_pos,
-            self.ne_2_pristine_neg,
-            real_cell_candidate_charge_profile_aligned)
-        #
-        electrode_info_df = get_electrode_info_ah(pe_out_zeroed, ne_out_zeroed)
-        #
-        error = degradation_optimization_result.fun
-        lli_opt = degradation_optimization_result.x[0]
-        q_pe_opt = degradation_optimization_result.x[1]
-        q_ne_opt = degradation_optimization_result.x[2]
-        x_ne_2 = degradation_optimization_result.x[3]
-
-        loss_dict = {cycle_index: np.append([error, lli_opt, q_pe_opt, q_ne_opt,
-                                             x_ne_2],
-                                            electrode_info_df.iloc[-1].values)
-                     }
-
-        profiles_per_cycle_dict = {
-           'NE_zeroed': ne_out_zeroed,
-           'PE_zeroed': pe_out_zeroed,
-           'dQdV_over_v_real': dqdv_over_v_real,
-           'dQdV_over_v_emulated': dqdv_over_v_emulated,
-           'df_real_interped': df_real_interped,
-           'emulated_full_cell_interped': emulated_full_cell_interped,
-           'real_cell_candidate_charge_profile_aligned': real_cell_candidate_charge_profile_aligned
-                                   }
-
-        profiles_dict = {cycle_index: profiles_per_cycle_dict}
-
-        return loss_dict, profiles_dict
-
-    def solve_emulated_degradation(self,
-                                   forward_simulated_profile,
-                                   degradation_bounds=None
-                                   ):
-
-        """
-
-
-        """
-
-        if degradation_bounds is None:
-            degradation_bounds = ((0, 3),  # LLI
-                                  (2.5, 6.5),  # q_pe
-                                  (2.5, 6.5),  # q_ne
-                                  (1, 1),  # (-1,1) x_ne_2
-                                  )
-
-        degradation_optimization_result = differential_evolution(self._get_error_from_synthetic_fitting_ah,
-                                                                 degradation_bounds,
-                                                                 args=(self.pe_pristine,
-                                                                       self.ne_1_pristine,
-                                                                       self.ne_2_pristine_pos,
-                                                                       self.ne_2_pristine_neg,
-                                                                       forward_simulated_profile,
-                                                                       ),
-                                                                 strategy='best1bin', maxiter=100000,
-                                                                 popsize=15, tol=0.001, mutation=0.5,
-                                                                 recombination=0.7,
-                                                                 seed=1,
-                                                                 callback=None, disp=False, polish=True,
-                                                                 init='latinhypercube',
-                                                                 atol=0, updating='deferred', workers=-1,
-                                                                 constraints=()
-                                                                 )
-
-        return degradation_optimization_result
-
-
-# TODO revisit this function
-def blend_electrodes(electrode_1, electrode_2_pos, electrode_2_neg, x_2):
-    """
-    Blends two electrode materials from their SOC-V profiles to form a blended electrode.
-
-    Inputs:
-    electrode_1: Primary material in electrode, typically Gr. DataFrame supplied with SOC evenly spaced and voltage.
-    electrode_2: Secondary material in electrode, typically Si. DataFrame supplied with SOC evenly spaced and
-        voltage as an additional column.
-    x_2: Fraction of electrode_2 material's capacity (not mass). Supplied as scalar value.
-
-    Outputs:
-    df_blended_soc_mod (Dataframe): blended electrode with SOC_aligned and Voltage_aligned columns
-    """
-    if electrode_2_pos.empty:
-        df_blended = electrode_1
-        return df_blended
-
-    if electrode_2_neg.empty:
-        electrode_2 = electrode_2_pos
-        x_2 = np.abs(x_2)
-    elif x_2 > 0:
-        electrode_2 = electrode_2_pos
-    else:
-        electrode_2 = electrode_2_neg
-        x_2 = np.abs(x_2)
-
-    electrode_1_interper = interp1d(electrode_1['Voltage_aligned'], electrode_1['SOC_aligned'], bounds_error=False,
-                                    fill_value='extrapolate')
-    electrode_2_interper = interp1d(electrode_2['Voltage_aligned'], electrode_2['SOC_aligned'], bounds_error=False,
-                                    fill_value='extrapolate')
-
-    voltage_vec = np.linspace(np.min((np.min(electrode_1['Voltage_aligned']),
-                                      np.min(electrode_2['Voltage_aligned']))),
-                              np.max((np.max(electrode_1['Voltage_aligned']),
-                                      np.max(electrode_2['Voltage_aligned']))),
-                              1001)
-    electrode_1_voltage_aligned = pd.DataFrame(electrode_1_interper(voltage_vec), columns=['SOC'])
-    electrode_2_voltage_aligned = pd.DataFrame(electrode_2_interper(voltage_vec), columns=['SOC'])
-    electrode_1_voltage_aligned['Voltage'] = voltage_vec
-    electrode_2_voltage_aligned['Voltage'] = voltage_vec
-
-    df_blend_voltage_aligned = pd.DataFrame(
-        (1 - x_2) * electrode_1_voltage_aligned['SOC'] + x_2 * electrode_2_voltage_aligned['SOC'], columns=['SOC'])
-    df_blend_voltage_aligned['Voltage'] = electrode_1_voltage_aligned.merge(electrode_2_voltage_aligned,
-                                                                            on='Voltage')['Voltage']
-
-    df_blended_interper = interp1d(df_blend_voltage_aligned['SOC'], df_blend_voltage_aligned['Voltage'],
-                                   bounds_error=False)
-    soc_vec = np.linspace(0, 100, 1001)
-
-    df_blended = pd.DataFrame(df_blended_interper(soc_vec), columns=['Voltage_aligned'])
-    df_blended['SOC_aligned'] = soc_vec
-
-    # Modify NE to fully span 100% SOC within its valid voltage window
-    df_blended_soc_mod_interper = interp1d(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()],
-                                           df_blended['Voltage_aligned'].loc[~df_blended['Voltage_aligned'].isna()],
-                                           bounds_error=False)
-    soc_vec = np.linspace(np.min(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()]),
-                          np.max(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()]),
-                          1001)
-    df_blended_soc_mod = pd.DataFrame(df_blended_soc_mod_interper(soc_vec), columns=['Voltage_aligned'])
-    df_blended_soc_mod['SOC_aligned'] = soc_vec / np.max(soc_vec) * 100
-    return df_blended_soc_mod
-
-
-def get_electrode_info_ah(pe_out_zeroed, ne_out_zeroed):
-    """
-    Calculates a variety of half-cell metrics at various positions in the full-cell profile.
-
-    Inputs:
-    pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled,
-            offset, and aligned along capacity
-    ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled,
-            offset, and aligned along capacity
-
-    Outputs:
-    electrode_info_df (Dataframe): dataframe containing a variety of half-cell metrics
-        at various positions in the emulated full-cell profile.
-
-        pe_voltage_FC4p2V: voltage of the positive electrode (catahode) corresponding
-            to the full cell at 4.2V
-        ...
-        pe_voltage_FC2p7V: voltage of the positive electrode (catahode) corresponding
-            to the full cell at 2.7V
-
-        pe_soc_FC4p2V: state of charge of the positive electrode corresponding
-            to the full cell at 4.2V
-        ...
-        pe_soc_FC2p7V: state of charge of the positive electrode corresponding
-            to the full cell at 2.7V
-
-        ne_voltage_FC4p2V: voltage of the negative electrode (anode) corresponding
-            to the full cell at 4.2V
-        ...
-        ne_voltage_FC2p7V: voltage of the negative electrode (anode) corresponding
-            to the full cell at 2.7V
-
-        ne_soc_FC4p2V: state of charge of the anode electrode corresponding
-            to the full cell at 4.2V
-        ...
-        ne_soc_FC2p7V: state of charge of the anode electrode corresponding
-            to the full cell at 2.7V
-
-        Q_fc: capacity of the full cecll within the full cell voltage limits
-        q_pe: capacity of the cathode
-        q_ne: capacity of the anode [Ahr]
-        Q_li
-    """
-    pe_minus_ne_zeroed = pd.DataFrame(pe_out_zeroed['Voltage_aligned'] - ne_out_zeroed['Voltage_aligned'],
-                                      columns=['Voltage_aligned'])
-    pe_minus_ne_zeroed['Q_aligned'] = pe_out_zeroed['Q_aligned']
-
-    electrode_info_df = pd.DataFrame(index=[0])
-
-    electrode_info_df['pe_voltage_FC4p2V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC4p1V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC4p0V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p9V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p8V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p7V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p6V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p5V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p4V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p3V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p2V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p1V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC3p0V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC2p9V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC2p8V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Voltage_aligned
-    electrode_info_df['pe_voltage_FC2p7V'] = pe_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Voltage_aligned
-
-    electrode_info_df['pe_soc_FC4p2V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 4.2V
-    electrode_info_df['pe_soc_FC4p1V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 4.1V
-    electrode_info_df['pe_soc_FC4p0V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 4.0V
-    electrode_info_df['pe_soc_FC3p9V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.9V
-    electrode_info_df['pe_soc_FC3p8V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.8V
-    electrode_info_df['pe_soc_FC3p7V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.7V
-    electrode_info_df['pe_soc_FC3p6V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.6V
-    electrode_info_df['pe_soc_FC3p5V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.5V
-    electrode_info_df['pe_soc_FC3p4V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.4V
-    electrode_info_df['pe_soc_FC3p3V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.3V
-    electrode_info_df['pe_soc_FC3p2V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.2V
-    electrode_info_df['pe_soc_FC3p1V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.1V
-    electrode_info_df['pe_soc_FC3p0V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.0V
-    electrode_info_df['pe_soc_FC2p9V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 2.9V
-    electrode_info_df['pe_soc_FC2p8V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 2.8V
-    electrode_info_df['pe_soc_FC2p7V'] = (
-                (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Q_aligned -
-                 np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 2.7V
-
-    electrode_info_df['ne_voltage_FC4p2V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC4p1V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC4p0V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p9V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p8V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p7V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p6V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p5V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p4V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p3V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p2V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p1V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC3p0V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC2p9V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC2p8V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Voltage_aligned
-    electrode_info_df['ne_voltage_FC2p7V'] = ne_out_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Voltage_aligned
-
-    electrode_info_df['ne_soc_FC4p2V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 4.2V
-    electrode_info_df['ne_soc_FC4p1V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 4.1V
-    electrode_info_df['ne_soc_FC4p0V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 4.0V
-    electrode_info_df['ne_soc_FC3p9V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.9V
-    electrode_info_df['ne_soc_FC3p8V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3Q_aligned.8V
-    electrode_info_df['ne_soc_FC3p7V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.7V
-    electrode_info_df['ne_soc_FC3p6V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.6V
-    electrode_info_df['ne_soc_FC3p5V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.5V
-    electrode_info_df['ne_soc_FC3p4V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.4V
-    electrode_info_df['ne_soc_FC3p3V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.3V
-    electrode_info_df['ne_soc_FC3p2V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.2V
-    electrode_info_df['ne_soc_FC3p1V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.1V
-    electrode_info_df['ne_soc_FC3p0V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 3.0V
-    electrode_info_df['ne_soc_FC2p9V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 2.9V
-    electrode_info_df['ne_soc_FC2p8V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 2.8V
-    electrode_info_df['ne_soc_FC2p7V'] = (
-                (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Q_aligned -
-                 np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / (
-                        np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) -
-                        np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]))
-                )  # 2.7V
-
-    electrode_info_df['Q_fc'] = pe_minus_ne_zeroed.loc[
-        np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.20))].Q_aligned
-
-    electrode_info_df['Q_pe'] = np.max(
-        pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - np.min(
-        pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])
-
-    electrode_info_df['Q_ne'] = np.max(
-        ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - np.min(
-        ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])
-
-    electrode_info_df['Q_li'] = np.max(
-        pe_minus_ne_zeroed['Q_aligned'].loc[~pe_minus_ne_zeroed.Voltage_aligned.isna()]) - np.min(
-        pe_minus_ne_zeroed['Q_aligned'].loc[~pe_minus_ne_zeroed.Voltage_aligned.isna()])
-
-    return electrode_info_df
diff --git a/beep/features/matrix.py b/beep/features/matrix.py
new file mode 100644
index 00000000..872c93e2
--- /dev/null
+++ b/beep/features/matrix.py
@@ -0,0 +1,490 @@
+
+import copy
+import hashlib
+from typing import List, Union
+
+import pandas as pd
+from monty.json import MSONable, MontyDecoder
+from monty.serialization import loadfn, dumpfn
+
+from beep.features.featurizer import BEEPFeaturizer, BEEPPerCycleFeaturizer, BEEPEarlyCyclesFeaturizer
+
+
+class BEEPFeatureMatrixError(BaseException):
+    """ Raise when there is a BEEP-specific problem with a dataset"""
+    pass
+
+
+class BEEPFeatureMatrix(MSONable):
+    """
+    Create an array composed of BEEPFeaturizer objects.
+
+    The array may either be:
+
+    PER-CYCLER-RUN, using BEEPEarlyCyclesFeaturizer.
+    One feature vector per cycler file, resulting in an array w. dimenions:
+        (n battery cycler files) x (k features)
+
+    OR:
+
+    PER-CYCLE, using BEEPPerCycleFeaturizer.
+    One feature vector per cycle, resulting in an array w. dimensions:
+        (n total cycles) x (k features)
+
+    Sets of featurizer objects must exclusively belong to EITHER of these
+    two paradigms (base classes), but may not be mixed.
+
+    So a set of featurizrs may be -per-cycler-file OR per-cycle, but not
+    both.
+
+    Args:
+        beepfeaturizers ([BEEPFeaturizer]): A list of BEEPFeaturizer objects,
+            either ALL BEEPEarlyCyclesFeaturizer child objects OR ALL
+            BEEPPerCycleFeaturizer child objects.
+
+    """
+
+    OP_DELIMITER = "::"
+
+    def __init__(
+            self,
+            beepfeaturizers: List[Union[BEEPPerCycleFeaturizer, BEEPEarlyCyclesFeaturizer]]
+    ):
+
+        if beepfeaturizers:
+            bfs_types_per_cycle = [bf.PER_CYCLE for bf in beepfeaturizers]
+
+            # the array should be either all True or all False
+            if all(bfs_types_per_cycle):
+                self.per_cycle = True
+            elif not any(bfs_types_per_cycle):
+                self.per_cycle = False
+            else:
+                raise TypeError(
+                    f"Featurizer types are mixed!\n"
+                    f"BEEPFeatureMatrix can only use EITHER a set of ALL "
+                    f"BEEPAllCyclesFeaturizers OR a set of ALL "
+                    f"BEEPPerCycleFeaturizers.")
+            dfs_by_file = {bf.paths.get("structured", "no file found"): [] for bf in beepfeaturizers}
+
+            unique_features = {}
+            for i, bf in enumerate(beepfeaturizers):
+
+                if bf.features is None:
+                    raise BEEPFeatureMatrixError(
+                        f"BEEPFeaturizer {bf} has not created features")
+                else:
+                    bfcn = bf.__class__.__name__
+
+                    fname = bf.paths.get("structured", None)
+                    if not fname:
+                        raise BEEPFeatureMatrixError(
+                            "Cannot join features automatically as no linking can be done "
+                            "based on original structured filename."
+                        )
+
+
+                    # Ensure no per-cycle featurizer is missing required special columns for merge
+                    if self.per_cycle:
+                        missing_special_columns = set()
+                        for sf in BEEPPerCycleFeaturizer.SPECIAL_COLUMNS:
+                            if sf not in bf.features.columns:
+                                missing_special_columns.add(sf)
+                        if missing_special_columns:
+                            raise BEEPFeatureMatrixError(
+                                f"Per cycle featurizer object missing special columns: {missing_special_columns}"
+                            )
+
+                    # Check for any possible feature collisions using identical featurizers
+                    # on identical files
+
+                    # sort params for this featurizer obj by key
+                    params = sorted(list(bf.hyperparameters.items()), key=lambda x: x[0])
+
+                    # Prevent identical features from identical input files
+                    # create a unique operation string for the application of this featurizer
+                    # on a specific file, this op string will be the same as long as
+                    # the featurizer class name, hyperparameters, and class are the same
+
+                    param_str = "-".join([f"{k}:{v}" for k, v in params])
+                    param_hash = hashlib.sha256(
+                        param_str.encode("utf-8")).hexdigest()
+
+                    # Get an id for this featurizer operation (including hyperparameters)
+                    # regardless of the file it is applied on
+                    feature_op_id = f"{bfcn}{self.OP_DELIMITER}{param_hash}"
+
+                    # Get an id for this featurizer operation (including hyperparameters)
+                    # on THIS SPECIFIC file.
+                    file_feature_op_id = f"{fname}{self.OP_DELIMITER}{bfcn}{self.OP_DELIMITER}{param_hash}"
+
+                    # Get a unique id for every feature generated by a specific
+                    # featurizer on a specific file.
+                    this_file_feature_columns_ids = \
+                        [
+                            f"{file_feature_op_id}{self.OP_DELIMITER}{c}" for c
+                            in bf.features.columns
+                        ]
+
+                    # Check to make sure there are no duplicates of the exact same feature for
+                    # the exact same featurizer with the exact same hyperparameters on the exact
+                    # same file.
+                    collisions = {c: f for c, f in unique_features.items() if
+                                  c in this_file_feature_columns_ids}
+                    if collisions:
+                        raise BEEPFeatureMatrixError(
+                            f"Multiple features generated with identical classes and identical hyperparameters"
+                            f" attempted to be joined into same dataset; \n"
+                            f"{bfcn} features collide with existing: \n{collisions}"
+                        )
+                    for c in this_file_feature_columns_ids:
+                        unique_features[c] = bfcn
+
+                    # Create consistent scheme for naming features regardless of file
+                    # Only rename non-special column names
+                    df = copy.deepcopy(bf.features)
+                    special_column_names = BEEPPerCycleFeaturizer.SPECIAL_COLUMNS if self.per_cycle else set()
+                    consistent_column_names = []
+
+                    for c in df.columns:
+                        if c in special_column_names:
+                            consistent_column_names.append(c)
+                        else:
+                            consistent_column_names.append(f"{c}{self.OP_DELIMITER}{feature_op_id}")
+                    df.columns = consistent_column_names
+
+                    # ensure cycle_index and diag_pos are integers
+                    if self.per_cycle:
+                        for col in BEEPPerCycleFeaturizer.SPECIAL_COLUMNS:
+                            df[col] = df[col].astype(int)
+                        df["filename"] = [fname] * df.shape[0]
+
+                    else:
+                        df.index = [fname] * df.shape[0]
+                        df.index.rename("filename", inplace=True)
+
+                    dfs_by_file[fname].append(df)
+
+            blocks = []
+            self.dfs_by_file = dfs_by_file
+            indexing_cols = ["filename"] + list(BEEPPerCycleFeaturizer.SPECIAL_COLUMNS)
+
+            # concat dfs by file across columns
+            for filename, dfs in dfs_by_file.items():
+                if self.per_cycle:
+                    rows = dfs[0]
+                    for df in dfs[1:]:
+                        rows = pd.merge(
+                            rows,
+                            df,
+                            how="outer",
+                            on=indexing_cols
+                        )
+                    rows = rows.reset_index().sort_values(["cycle_index"])
+                    feature_cols = sorted([f for f in rows.columns if f not in indexing_cols])
+                    rows = rows[indexing_cols + feature_cols]
+
+                else:
+                    rows = pd.concat(dfs, axis=1)
+                    rows = rows[sorted(rows.columns)]
+
+                blocks.append(rows)
+
+            # concat all dfs for all files across rows
+            self.matrix = pd.concat(blocks, axis=0)
+
+        else:
+            self.matrix = None
+
+        self.featurizers = beepfeaturizers
+
+    def as_dict(self):
+        """Serialize a BEEPFeatureMatrix as a dictionary.
+
+        Must not be loaded from legacy.
+
+        Returns:
+            (dict): corresponding to dictionary for serialization.
+
+        """
+
+        return {
+            "@module": self.__class__.__module__,
+            "@class": self.__class__.__name__,
+
+            # Core parts of BEEPFeaturizer
+            "featurizers": [f.as_dict() for f in self.featurizers],
+            "matrix": self.matrix.to_dict("list"),
+        }
+
+    @classmethod
+    def from_dict(cls, d):
+        """Create a BEEPFeatureMatrix object from a dictionary.
+
+        Args:
+            d (dict): dictionary represenation.
+
+        Returns:
+            beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun.
+        """
+        # no need for original datapaths, as their ref paths should
+        # be in the subobjects
+        featurizers = [MontyDecoder().process_decoded(f) for f in
+                       d["featurizers"]]
+        return cls(featurizers)
+
+    @classmethod
+    def from_json_file(cls, filename):
+        """Load a structured run previously saved to file.
+
+        .json.gz files are supported.
+
+        Loads a BEEPFeatureMatrix from json.
+
+        Can be used in combination with files serialized with BEEPFeatures.to_json_file.
+
+        Args:
+            filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file.
+
+        Returns:
+            None
+        """
+        return loadfn(filename)
+
+    def to_json_file(self, filename):
+        """Save a BEEPFeatureMatrix to disk as a json.
+
+        .json.gz files are supported.
+
+        Not named from_json to avoid conflict with MSONable.from_json(*)
+
+        Args:
+            filename (str, Pathlike): The filename to save the file to.
+            omit_raw (bool): If True, saves only structured (NOT RAW) data.
+                More efficient for saving/writing to disk.
+
+        Returns:
+            None
+        """
+        d = self.as_dict()
+        dumpfn(d, filename)
+
+
+# class BEEPCycleFeatureMatrix(MSONable):
+#     """
+#     Create an ((n battery cycler files) x (j cycles)) x (k features)  array composed of
+#     m BEEPFeaturizer objects.
+#
+#     Args:
+#         beepfeaturizers ([BEEPFeaturizer]): A list of BEEPFeaturizer objects
+#
+#     """
+#
+#     OP_DELIMITER = "::"
+#
+#     def __init__(self, beepfeaturizers: List[BEEPFeaturizer]):
+#
+#         if beepfeaturizers:
+#             # initialize emtpy dict of file names
+#             dfs_by_file = {os.path.basename(
+#                 bf.paths.get("structured", "no file found")
+#             )[0:-19]: pd.DataFrame(
+#                 columns=['filename', 'cycle_index', 'diag_pos']
+#                 ) for bf in beepfeaturizers}
+#             # big_df_rows = {bf.__class__.__name__: [] for bf in beepfeaturizers}
+#             unique_features = {}
+#             for i, bf in enumerate(beepfeaturizers):
+#                 if bf.features is None:
+#                     raise BEEPFeatureMatrixError(
+#                         f"BEEPFeaturizer {bf} has not created features")
+#
+#                 #                 elif bf.features.shape[0] != 1:
+#                 #                     raise BEEPFeatureMatrixError(f"BEEPFeaturizer {bf} features are not 1-dimensional.")
+#                 else:
+#                     bfcn = bf.__class__.__name__
+#
+#                     #                     fname = bf.paths.get("structured", None)
+#                     fname = os.path.basename(bf.paths['structured'])[0:-19]
+#                     if not fname:
+#                         raise BEEPFeatureMatrixError(
+#                             "Cannot join features automatically as no linking can be done "
+#                             "based on original structured filename."
+#                         )
+#
+#                     # Check for any possible feature collisions using identical featurizers
+#                     # on identical files
+#
+#                     # sort params for this featurizer obj by key
+#                     params = sorted(list(bf.hyperparameters.items()),
+#                                     key=lambda x: x[0])
+#
+#                     # Prevent identical features from identical input files
+#                     # create a unique operation string for the application of this featurizer
+#                     # on a specific file, this op string will be the same as long as
+#                     # the featurizer class name, hyperparameters, and class are the same
+#
+#                     param_str = "-".join([f"{k}:{v}" for k, v in params])
+#                     param_hash = hashlib.sha256(
+#                         param_str.encode("utf-8")).hexdigest()
+#
+#                     # Get an id for this featurizer operation (including hyperparameters)
+#                     # regardless of the file it is applied on
+#                     feature_op_id = f"{bfcn}{self.OP_DELIMITER}{param_hash}"
+#
+#                     # Get an id for this featurizer operation (including hyperparameters)
+#                     # on THIS SPECIFIC file.
+#                     file_feature_op_id = f"{fname}{self.OP_DELIMITER}{bfcn}{self.OP_DELIMITER}{param_hash}"
+#
+#                     # Get a unique id for every feature generated by a specific
+#                     # featurizer on a specific file.
+#                     this_file_feature_columns_ids = \
+#                         [
+#                             f"{file_feature_op_id}{self.OP_DELIMITER}{c}" for c
+#                             in bf.features.columns
+#                         ]
+#
+#                     # Check to make sure there are no duplicates of the exact same feature for
+#                     # the exact same featurizer with the exact same hyperparameters on the exact
+#                     # same file.
+#                     collisions = {c: f for c, f in unique_features.items() if
+#                                   c in this_file_feature_columns_ids}
+#                     if collisions:
+#                         raise BEEPFeatureMatrixError(
+#                             f"Multiple features generated with identical classes and identical hyperparameters"
+#                             f" attempted to be joined into same dataset; \n"
+#                             f"{bfcn} features collide with existing: \n{collisions}"
+#                         )
+#                     for c in this_file_feature_columns_ids:
+#                         unique_features[c] = bfcn
+#
+#                     # Create consistent scheme for naming features regardless of file
+#                     df = copy.deepcopy(bf.features)
+#                     consistent_column_names = [
+#                         f"{c}{self.OP_DELIMITER}{feature_op_id}" for c in
+#                         df.columns]
+#                     df.columns = consistent_column_names
+#
+#                     #                     df.index = [fname] * df.shape[0]
+#                     #                     df.index.rename("filename", inplace=True)
+#
+#                     # create filename column to merge on
+#                     df['filename'] = os.path.basename(bf.paths['structured'])[
+#                                      0:-19]
+#
+#                     #                     df = df.reset_index(drop=True)
+#
+#                     # remove hash from cycle_index and diag_pos column
+#                     cycle_index_col = [col for col in df.columns if
+#                                        'cycle_index' in col]
+#                     df.rename(columns={cycle_index_col[0]: 'cycle_index'},
+#                               inplace=True)
+#
+#                     # remove hash from diag_pos column
+#                     diag_pos_col = [col for col in df.columns if
+#                                     'diag_pos' in col]
+#                     df.rename(columns={diag_pos_col[0]: 'diag_pos'},
+#                               inplace=True)
+#
+#                     # ensure cycle_index and diag_pos are integers
+#                     df['cycle_index'] = df['cycle_index'].astype(int)
+#                     df['diag_pos'] = df['diag_pos'].astype(int)
+#
+#                     # append each BEEPFeaturizer df to the corresponding cell dict entry
+#                     #                     dfs_by_file[fname].append(df)
+#                     dfs_by_file[fname] = dfs_by_file[fname].merge(
+#                         df, how='outer',
+#                         on=['filename', 'cycle_index', 'diag_pos']).sort_values(
+#                         'cycle_index').reset_index(drop=True)
+#             #                     dfs_by_file[fname] = pd.concat(
+#             #                         [dfs_by_file[fname],df],
+#             #                         axis=1,join='outer',ignore_index=True,
+#             #                         keys=['filename'])
+#             #                     self.dfs_by_file = dfs_by_file
+#             #                     self.df = df
+#             #             return None
+#
+#             rows = []
+#             self.matrix = pd.DataFrame()
+#             for filename, dfs in dfs_by_file.items():
+#                 #                 row = pd.concat([row,dfs], axis=1)
+#                 #                 row = row[sorted(row.columns)]
+#                 #                 rows.append(row)
+#                 self.matrix = pd.concat([self.matrix, dfs], axis=0,
+#                                         ignore_index=True,
+#                                         join='outer')  # , keys=['filename']
+#
+#         else:
+#             self.matrix = None
+#
+#         self.featurizers = beepfeaturizers
+#
+#     def as_dict(self):
+#         """Serialize a BEEPDatapath as a dictionary.
+#
+#         Must not be loaded from legacy.
+#
+#         Returns:
+#             (dict): corresponding to dictionary for serialization.
+#
+#         """
+#
+#         return {
+#             "@module": self.__class__.__module__,
+#             "@class": self.__class__.__name__,
+#
+#             # Core parts of BEEPFeaturizer
+#             "featurizers": [f.as_dict() for f in self.featurizers],
+#             "matrix": self.matrix.to_dict("list"),
+#         }
+#
+#     @classmethod
+#     def from_dict(cls, d):
+#         """Create a BEEPDatapath object from a dictionary.
+#
+#         Args:
+#             d (dict): dictionary represenation.
+#
+#         Returns:
+#             beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun.
+#         """
+#         # no need for original datapaths, as their ref paths should
+#         # be in the subobjects
+#         featurizers = [MontyDecoder().process_decoded(f) for f in
+#                        d["featurizers"]]
+#         return cls(featurizers)
+#
+#     @classmethod
+#     def from_json_file(cls, filename):
+#         """Load a structured run previously saved to file.
+#
+#         .json.gz files are supported.
+#
+#         Loads a BEEPFeatureMatrix from json.
+#
+#         Can be used in combination with files serialized with BEEPFeatures.to_json_file.
+#
+#         Args:
+#             filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file.
+#
+#         Returns:
+#             None
+#         """
+#         return loadfn(filename)
+#
+#     def to_json_file(self, filename):
+#         """Save a BEEPFeatureMatrix to disk as a json.
+#
+#         .json.gz files are supported.
+#
+#         Not named from_json to avoid conflict with MSONable.from_json(*)
+#
+#         Args:
+#             filename (str, Pathlike): The filename to save the file to.
+#             omit_raw (bool): If True, saves only structured (NOT RAW) data.
+#                 More efficient for saving/writing to disk.
+#
+#         Returns:
+#             None
+#         """
+#         d = self.as_dict()
+#         dumpfn(d, filename)
\ No newline at end of file
diff --git a/beep/features/per_cycle/__init__.py b/beep/features/per_cycle/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/beep/features/per_cycle/diagnostic.py b/beep/features/per_cycle/diagnostic.py
new file mode 100644
index 00000000..f4a1b0ef
--- /dev/null
+++ b/beep/features/per_cycle/diagnostic.py
@@ -0,0 +1,177 @@
+
+import pandas as pd
+
+from beep import PROTOCOL_PARAMETERS_DIR
+from beep.features import helper_functions
+from functools import reduce
+
+from beep.features.featurizer import BEEPPerCycleFeaturizer
+
+
+class DiagnosticFeaturesPerCycle(BEEPPerCycleFeaturizer):
+    """
+    This class stores fractional levels of degradation in discharge capacity and discharge energy
+    relative to the first cycle at each diagnostic cycle, grouped by diagnostic cycle type.
+
+        name (str): predictor object name.
+        X (pandas.DataFrame): features in DataFrame format.
+        metadata (dict): information about the conditions, data
+            and code used to produce features
+
+    Hyperparameters:
+        parameters_dir (str): Full path to directory of parameters to analyse the
+            diagnostic cycles
+    """
+    DEFAULT_HYPERPARAMETERS = {
+        "parameters_dir": PROTOCOL_PARAMETERS_DIR,
+        "nominal_capacity": 4.84,
+
+    }
+
+    def validate(self):
+        """
+        This function determines if the input data has the necessary attributes for
+        creation of this feature class. It should test for all of the possible reasons
+        that feature generation would fail for this particular input data.
+
+        Args:
+            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+            gets featurized. These could be filters for column or row operations
+        Returns:
+            bool: True/False indication of ability to proceed with feature generation
+        """
+        return helper_functions.check_diagnostic_validation(self.datapath)
+
+    def create_features(self):
+        """
+        Generates diagnostic-property features from processed cycler run, including values for n*x method
+        Args:
+            self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+                gets featurized. These could be filters for column or row operations
+            parameters_path (str): Root directory storing project parameter files.
+
+        Returns:
+            pd.DataFrame: cycle_index, RPT discharge capacities and energies, aging cycle discharge capacity and energy,
+                            equivalent full cycles of aging cycle discharge, cumulative discharge throughput.
+            for each diagnostic cycle of the cell
+        """
+
+        parameters_path = self.hyperparameters["parameters_dir"]
+
+        # RPT discharge capacities
+        data_rpt_02C = self.datapath.diagnostic_data.loc[
+            self.datapath.diagnostic_data.cycle_type == 'rpt_0.2C']
+        Q_rpt_02C = data_rpt_02C.groupby('cycle_index')[
+            ['discharge_capacity', 'discharge_energy']].max().reset_index(
+            drop=False)
+        Q_rpt_02C.rename(
+            columns={'discharge_capacity': 'rpt_0.2C_discharge_capacity',
+                     'discharge_energy': 'rpt_0.2C_discharge_energy'},
+            inplace=True)
+        Q_rpt_02C = Q_rpt_02C.reset_index(drop=False).rename(
+            columns={'index': 'diag_pos'})
+
+        rpt_02C_cycles = data_rpt_02C.cycle_index.unique()  # for referencing last regular cycle before diagnostic
+
+        data_rpt_1C = self.datapath.diagnostic_data.loc[
+            self.datapath.diagnostic_data.cycle_type == 'rpt_1C']
+        Q_rpt_1C = data_rpt_1C.groupby('cycle_index')[
+            ['discharge_capacity', 'discharge_energy']].max().reset_index(
+            drop=False)
+        Q_rpt_1C.rename(
+            columns={'discharge_capacity': 'rpt_1C_discharge_capacity',
+                     'discharge_energy': 'rpt_1C_discharge_energy'},
+            inplace=True)
+        Q_rpt_1C = Q_rpt_1C.reset_index(drop=False).rename(
+            columns={'index': 'diag_pos'})
+
+        data_rpt_2C = self.datapath.diagnostic_data.loc[
+            self.datapath.diagnostic_data.cycle_type == 'rpt_2C']
+        Q_rpt_2C = data_rpt_2C.groupby('cycle_index')[
+            ['discharge_capacity', 'discharge_energy']].max().reset_index(
+            drop=False)
+        Q_rpt_2C.rename(
+            columns={'discharge_capacity': 'rpt_2C_discharge_capacity',
+                     'discharge_energy': 'rpt_2C_discharge_energy'},
+            inplace=True)
+        Q_rpt_2C = Q_rpt_2C.reset_index(drop=False).rename(
+            columns={'index': 'diag_pos'})
+
+        # cumuative discharge throughput
+        aging_df = self.datapath.structured_summary[
+            ['cycle_index', 'charge_throughput', 'energy_throughput',
+             'energy_efficiency', 'charge_duration', 'CV_time', 'CV_current',
+             'energy_efficiency']]
+        aging_df = aging_df.loc[aging_df.cycle_index.isin(rpt_02C_cycles - 3)]
+
+        cumulative_discharge_throughput = aging_df[
+            ['cycle_index', 'charge_throughput']].rename(
+            columns={'charge_throughput': 'discharge_throughput'}).reset_index(
+            drop=True)
+        cumulative_discharge_throughput = cumulative_discharge_throughput.reset_index(
+            drop=False).rename(columns={'index': 'diag_pos'})
+
+        cumulative_energy_throughput = aging_df[
+            ['cycle_index', 'energy_throughput']].reset_index(drop=True)
+        cumulative_energy_throughput = cumulative_energy_throughput.reset_index(
+            drop=False).rename(columns={'index': 'diag_pos'})
+
+        equivalent_full_cycles = cumulative_discharge_throughput.copy()
+        equivalent_full_cycles.rename(
+            columns={'discharge_throughput': 'equivalent_full_cycles'},
+            inplace=True)
+        equivalent_full_cycles['equivalent_full_cycles'] = \
+        equivalent_full_cycles['equivalent_full_cycles'] / self.hyperparameters[
+            'nominal_capacity']
+
+        # Q_aging_pre_diag - discharge capacity of aging cycle before diagnostic
+        Q_aging_pre_diag = self.datapath.structured_data.groupby('cycle_index')[
+            'discharge_capacity'].max().loc[rpt_02C_cycles[1:] - 3].reset_index(
+            drop=False)  # ignore first diagnostic, adjust cycle index to Q_aging_pre_diag
+        Q_aging_pre_diag.rename(
+            columns={'discharge_capacity': 'Q_aging_pre_diag'}, inplace=True)
+        Q_aging_pre_diag = Q_aging_pre_diag.reset_index(
+            drop=False).rename(columns={'index': 'diag_pos'})
+        Q_aging_pre_diag['diag_pos'] = Q_aging_pre_diag[
+                                           'diag_pos'] + 1  # since, first diag is ignored, add one to diag_pos
+
+        # Q_aging_post_diag - discharge capacity of aging cycle after diagnostic
+        Q_aging_post_diag = \
+        self.datapath.structured_data.groupby('cycle_index')[
+            'discharge_capacity'].max().loc[rpt_02C_cycles + 3].reset_index(
+            drop=False)  # does not ignore first diag since Q_aging exists after first diag
+        Q_aging_post_diag.rename(
+            columns={'discharge_capacity': 'Q_aging_post_diag'}, inplace=True)
+        Q_aging_post_diag = Q_aging_post_diag.reset_index(
+            drop=False).rename(columns={'index': 'diag_pos'})
+
+        # Diagnostic time
+        diagnostic_time = data_rpt_02C.groupby('cycle_index')[
+            'test_time'].min().reset_index(drop=False).rename(
+            columns={'test_time': 'diagnostic_time'})
+        diagnostic_time = diagnostic_time.reset_index(
+            drop=False).rename(columns={'index': 'diag_pos'})
+
+        # Combine dataframes
+        df_list = [Q_rpt_02C, Q_rpt_1C, Q_rpt_2C,
+                   cumulative_discharge_throughput,
+                   cumulative_energy_throughput,
+                   equivalent_full_cycles,
+                   Q_aging_pre_diag,
+                   Q_aging_post_diag,
+                   diagnostic_time]
+
+        for df in df_list:
+            df['cycle_index'] = df['cycle_index'].copy().astype(int)
+            df['diag_pos'] = df['diag_pos'].copy().astype(int)
+
+        cycle_features = reduce(
+            lambda x, y: pd.merge(x, y, on=['cycle_index', 'diag_pos'],
+                                  how='outer'), df_list)
+        self.features = cycle_features.sort_values('cycle_index').reset_index(
+            drop=True)
+
+
+
diff --git a/beep/features/per_cycle/hppc.py b/beep/features/per_cycle/hppc.py
new file mode 100644
index 00000000..d95f4c3c
--- /dev/null
+++ b/beep/features/per_cycle/hppc.py
@@ -0,0 +1,80 @@
+import pandas as pd
+
+from beep import PROTOCOL_PARAMETERS_DIR
+from beep.features import helper_functions
+
+from beep.features.featurizer import BEEPPerCycleFeaturizer
+
+
+
+class HPPCResistanceVoltagePerCycle(BEEPPerCycleFeaturizer):
+    DEFAULT_HYPERPARAMETERS = {
+        "test_time_filter_sec": 1000000,
+        "cycle_index_filter": 6,
+        "soc_window": 8,
+        "parameters_path": PROTOCOL_PARAMETERS_DIR
+    }
+
+    def validate(self):
+        val, msg = helper_functions.check_diagnostic_validation(self.datapath)
+        if val:
+            conditions = []
+            conditions.append(
+                any(
+                    [
+                        "hppc" in x
+                        for x in
+                        self.datapath.diagnostic_summary.cycle_type.unique()
+                    ]
+                )
+            )
+            if all(conditions):
+                return True, None
+            else:
+                return False, "HPPC conditions not met for this cycler run"
+        else:
+            return val, msg
+
+    def create_features(self):
+        # Filter out low cycle numbers at the end of the test, corresponding to the "final" diagnostic
+        self.datapath.diagnostic_data = self.datapath.diagnostic_data[
+            ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[
+                'test_time_filter_sec']) &
+              (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[
+                  'cycle_index_filter']))
+        ]
+        self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby(
+            ["cycle_index", "step_index", "step_index_counter"]
+        ).filter(lambda x: ~x["test_time"].isnull().all())
+
+        # Only hppc_resistance_features are able to be calculated without error.
+        # Xiao Cui should be pulled in to understand the issue with the others features.
+
+        # diffusion features
+        #         diffusion_features = featurizer_helpers.get_diffusion_cycle_features(
+        #             self.datapath,
+        #         )
+
+        # hppc resistance features
+        hppc_resistance_features = helper_functions.get_hppc_resistance_cycle_features(
+            self.datapath,
+        )
+
+        # the variance of ocv features
+        #         hppc_ocv_features = featurizer_helpers.get_hppc_ocv_cycle_features(
+        #             self.datapath,
+        #         )
+
+        # the v_diff features
+        #         v_diff = featurizer_helpers.get_v_diff_cycle_features(
+        #             self.datapath,
+        #             self.hyperparameters["soc_window"],
+        #             self.hyperparameters["parameters_path"]
+        #         )
+
+        # merge everything together as a final result dataframe
+        self.features = pd.concat(
+            [hppc_resistance_features,
+             # hppc_ocv_features,
+             # v_diff, #diffusion_features
+             ], axis=1)
diff --git a/beep/features/per_cycle/protocol.py b/beep/features/per_cycle/protocol.py
new file mode 100644
index 00000000..63c78926
--- /dev/null
+++ b/beep/features/per_cycle/protocol.py
@@ -0,0 +1,64 @@
+
+from beep import PROTOCOL_PARAMETERS_DIR
+from beep.features import helper_functions
+from beep.utils.parameters_lookup import get_protocol_parameters
+
+from beep.features.featurizer import BEEPPerCycleFeaturizer
+
+
+class CyclingProtocolPerCycle(BEEPPerCycleFeaturizer):
+    """
+    This class stores information about the charging protocol used
+        name (str): predictor object name.
+        X (pandas.DataFrame): features in DataFrame format.
+        metadata (dict): information about the conditions, data
+            and code used to produce features
+    Hyperparameters:
+        parameters_dir (str): Full path to directory of charging protocol parameters
+        quantities ([str]): list of parameters to return
+    """
+    DEFAULT_HYPERPARAMETERS = {
+        "parameters_dir": PROTOCOL_PARAMETERS_DIR,
+        "quantities": ["charge_constant_current_1", "charge_constant_current_2",
+                       "charge_cutoff_voltage", "charge_constant_voltage_time",
+                       "discharge_constant_current",
+                       "discharge_cutoff_voltage"],
+    }
+
+    def validate(self):
+        """
+        This function determines if the input data has the necessary attributes for
+        creation of this feature class. It should test for all of the possible reasons
+        that feature generation would fail for this particular input data.
+        Args:
+            processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run
+            params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object
+            gets featurized. These could be filters for column or row operations
+        Returns:
+            bool: True/False indication of ability to proceed with feature generation
+        """
+        if not (
+                'raw' in self.datapath.paths.keys() or 'structured' in self.datapath.paths.keys()):
+            message = "datapath paths not set, unable to fetch charging protocol"
+            return False, message
+        else:
+            return helper_functions.check_diagnostic_validation(self.datapath)
+
+    def create_features(self):
+        """
+        Fetches charging protocol features
+        """
+
+        parameters_path = self.hyperparameters["parameters_dir"]
+        file_path = self.datapath.paths[
+            'raw'] if 'raw' in self.datapath.paths.keys() else \
+        self.datapath.paths['structured']
+
+        parameters, _ = get_protocol_parameters(file_path, parameters_path)
+
+        parameters = parameters[self.hyperparameters["quantities"]]
+        parameters['cycle_index'] = int(
+            0)  # create a cycle index column for merging with other featurizers
+        parameters['diag_pos'] = int(
+            0)  # create a diag_pos column for merging with other featurizers
+        self.features = parameters
\ No newline at end of file
diff --git a/beep/features/tests/test_features.py b/beep/features/tests/test_features.py
index 59171edb..9423e331 100644
--- a/beep/features/tests/test_features.py
+++ b/beep/features/tests/test_features.py
@@ -29,7 +29,7 @@
 
 from beep.structure.maccor import MaccorDatapath
 from beep.structure.cli import auto_load_processed, auto_load
-from beep.features import featurizer_helpers
+from beep.features import helper_functions
 from beep.utils import parameters_lookup
 from monty.serialization import dumpfn, loadfn
 from monty.tempfile import ScratchDir
@@ -270,9 +270,9 @@ def test_get_fractional_quantity_remaining_nx(self):
         structured_datapath.structured_summary = structured_datapath.structured_summary[
             ~structured_datapath.structured_summary.cycle_index.isin(structured_datapath.diagnostic_summary.cycle_index)]
 
-        sum_diag = featurizer_helpers.get_fractional_quantity_remaining_nx(structured_datapath,
-                                                                           metric="discharge_energy",
-                                                                           diagnostic_cycle_type="hppc")
+        sum_diag = helper_functions.get_fractional_quantity_remaining_nx(structured_datapath,
+                                                                         metric="discharge_energy",
+                                                                         diagnostic_cycle_type="hppc")
         # print(sum_diag["normalized_regular_throughput"])
         self.assertEqual(len(sum_diag.index), 16)
         self.assertEqual(sum_diag.cycle_index.max(), 1507)
@@ -284,9 +284,9 @@ def test_get_fractional_quantity_remaining_nx(self):
         self.assertEqual(sum_diag['diagnostic_interval'].iloc[0], 100)
         self.assertEqual(sum_diag['epoch_time'].iloc[0], 1576641695)
 
-        sum_diag = featurizer_helpers.get_fractional_quantity_remaining_nx(structured_datapath,
-                                                                           metric="discharge_energy",
-                                                                           diagnostic_cycle_type="rpt_1C")
+        sum_diag = helper_functions.get_fractional_quantity_remaining_nx(structured_datapath,
+                                                                         metric="discharge_energy",
+                                                                         diagnostic_cycle_type="rpt_1C")
         self.assertEqual(len(sum_diag.index), 16)
         self.assertEqual(sum_diag.cycle_index.max(), 1509)
         self.assertEqual(np.around(sum_diag["initial_regular_throughput"].iloc[0], 3), np.around(237.001769, 3))
@@ -301,9 +301,9 @@ def test_get_fractional_quantity_remaining_nx(self):
         )
         structured_datapath = auto_load_processed(processed_cycler_run_path_2)
 
-        sum_diag = featurizer_helpers.get_fractional_quantity_remaining_nx(structured_datapath,
-                                                                           metric="discharge_energy",
-                                                                           diagnostic_cycle_type="hppc")
+        sum_diag = helper_functions.get_fractional_quantity_remaining_nx(structured_datapath,
+                                                                         metric="discharge_energy",
+                                                                         diagnostic_cycle_type="hppc")
         self.assertEqual(len(sum_diag.index), 3)
         self.assertEqual(sum_diag.cycle_index.max(), 242)
         self.assertEqual(np.around(sum_diag["initial_regular_throughput"].iloc[0], 3), np.around(331.428, 3))
@@ -329,7 +329,7 @@ def test_get_v_diff(self):
         with ScratchDir("."):
             # processed_cycler_run_path_1
             structured_datapath = auto_load_processed(processed_cycler_run_path_1)
-            v_vars_df = featurizer_helpers.get_v_diff(structured_datapath, 1, 8)
+            v_vars_df = helper_functions.get_v_diff(structured_datapath, 1, 8)
             # print(v_vars_df)
             self.assertEqual(np.round(v_vars_df.iloc[0]['var_v_diff'], decimals=8),
                              np.round(0.00472705, decimals=8))
@@ -344,7 +344,7 @@ def test_get_v_diff(self):
 
             # processed_cycler_run_path_2
             structured_datapath = auto_load_processed(processed_cycler_run_path_2)
-            v_vars_df = featurizer_helpers.get_v_diff(structured_datapath, 1, 8)
+            v_vars_df = helper_functions.get_v_diff(structured_datapath, 1, 8)
             # print(v_vars_df)
             self.assertEqual(np.round(v_vars_df.iloc[0]['var_v_diff'], decimals=8),
                              np.round(2.664e-05, decimals=8))
@@ -359,7 +359,7 @@ def test_get_v_diff(self):
 
             # processed_cycler_run_path_3
             structured_datapath = auto_load_processed(processed_cycler_run_path_3)
-            v_vars_df = featurizer_helpers.get_v_diff(structured_datapath, 1, 8)
+            v_vars_df = helper_functions.get_v_diff(structured_datapath, 1, 8)
             # print(v_vars_df)
             self.assertEqual(np.round(v_vars_df.iloc[0]['var_v_diff'], decimals=8),
                              np.round(4.82e-06, decimals=8))
@@ -374,7 +374,7 @@ def test_get_v_diff(self):
 
             # processed_cycler_run_path_4
             structured_datapath = auto_load_processed(processed_cycler_run_path_4)
-            v_vars_df = featurizer_helpers.get_v_diff(structured_datapath, 1, 8)
+            v_vars_df = helper_functions.get_v_diff(structured_datapath, 1, 8)
             # print(v_vars_df)
             self.assertEqual(np.round(v_vars_df.iloc[0]['var_v_diff'], decimals=8),
                              np.round(9.71e-06, decimals=8))
@@ -392,7 +392,7 @@ def test_get_hppc_ocv(self):
             TEST_FILE_DIR, "PreDiag_000240_000227_truncated_structure.json"
         )
         structured_datapath = auto_load_processed(structured_datapath_loc)
-        hppc_ocv_features = featurizer_helpers.get_hppc_ocv(structured_datapath, 1)
+        hppc_ocv_features = helper_functions.get_hppc_ocv(structured_datapath, 1)
         self.assertAlmostEqual(hppc_ocv_features['var_ocv'].iloc[0], 0.000016, 6)
         self.assertAlmostEqual(hppc_ocv_features['min_ocv'].iloc[0], -0.001291, 6)
         self.assertAlmostEqual(hppc_ocv_features['mean_ocv'].iloc[0], 0.002221, 6)
@@ -425,9 +425,9 @@ def test_get_step_index(self):
                                             parameter_row["capacity_nominal"].iloc[0], 2)
                     # print(step, median_crate, duration)
 
-        step_ind = featurizer_helpers.get_step_index(structured_datapath,
-                                                     cycle_type="hppc",
-                                                     diag_pos=0)
+        step_ind = helper_functions.get_step_index(structured_datapath,
+                                                   cycle_type="hppc",
+                                                   diag_pos=0)
         self.assertEqual(len(step_ind.values()), 6)
         print([step_ind["hppc_long_rest"],
                step_ind["hppc_discharge_pulse"],
@@ -443,9 +443,9 @@ def test_get_step_index(self):
             'hppc_charge_pulse': 14,
             'hppc_discharge_to_next_soc': 15
         })
-        step_ind = featurizer_helpers.get_step_index(structured_datapath,
-                                                     cycle_type="hppc",
-                                                     diag_pos=1)
+        step_ind = helper_functions.get_step_index(structured_datapath,
+                                                   cycle_type="hppc",
+                                                   diag_pos=1)
         self.assertEqual(len(step_ind.values()), 6)
         self.assertEqual(step_ind, {
             'hppc_charge_to_soc': 41,
@@ -465,9 +465,9 @@ def test_get_step_index_2(self):
         _, protocol_name = os.path.split(structured_datapath.metadata.protocol)
         parameter_row, _ = parameters_lookup.get_protocol_parameters(protocol_name, parameters_path=parameters_path)
 
-        step_ind = featurizer_helpers.get_step_index(structured_datapath,
-                                                     cycle_type="hppc",
-                                                     diag_pos=0)
+        step_ind = helper_functions.get_step_index(structured_datapath,
+                                                   cycle_type="hppc",
+                                                   diag_pos=0)
         self.assertEqual(len(step_ind.values()), 7)
 
         self.assertEqual(step_ind, {
@@ -479,9 +479,9 @@ def test_get_step_index_2(self):
             'hppc_discharge_to_next_soc': 15,
             'hppc_final_discharge': 17
         })
-        step_ind = featurizer_helpers.get_step_index(structured_datapath,
-                                                     cycle_type="hppc",
-                                                     diag_pos=1)
+        step_ind = helper_functions.get_step_index(structured_datapath,
+                                                   cycle_type="hppc",
+                                                   diag_pos=1)
         self.assertEqual(len(step_ind.values()), 7)
         self.assertEqual(step_ind, {
             'hppc_charge_to_soc': 41,
@@ -492,24 +492,24 @@ def test_get_step_index_2(self):
             'hppc_discharge_to_next_soc': 47,
             'hppc_final_discharge': 49
         })
-        step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="reset", diag_pos=0)
+        step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="reset", diag_pos=0)
         self.assertEqual(step_ind, {'reset_charge': 5, 'reset_discharge': 6})
-        step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="reset", diag_pos=1)
+        step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="reset", diag_pos=1)
         self.assertEqual(step_ind, {'reset_charge': 38, 'reset_discharge': 39})
 
-        step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_0.2C", diag_pos=0)
+        step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_0.2C", diag_pos=0)
         self.assertEqual(step_ind, {'rpt_0.2C_charge': 19, 'rpt_0.2C_discharge': 20})
-        step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_0.2C", diag_pos=1)
+        step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_0.2C", diag_pos=1)
         self.assertEqual(step_ind, {'rpt_0.2C_charge': 51, 'rpt_0.2C_discharge': 52})
 
-        step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_1C", diag_pos=0)
+        step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_1C", diag_pos=0)
         self.assertEqual(step_ind, {'rpt_1C_charge': 22, 'rpt_1C_discharge': 23})
-        step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_1C", diag_pos=1)
+        step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_1C", diag_pos=1)
         self.assertEqual(step_ind, {'rpt_1C_charge': 54, 'rpt_1C_discharge': 55})
 
-        step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_2C", diag_pos=0)
+        step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_2C", diag_pos=0)
         self.assertEqual(step_ind, {'rpt_2C_charge': 25, 'rpt_2C_discharge': 26})
-        step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_2C", diag_pos=1)
+        step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_2C", diag_pos=1)
         self.assertEqual(step_ind, {'rpt_2C_charge': 57, 'rpt_2C_discharge': 58})
 
     def test_get_step_index_3(self):
@@ -517,7 +517,7 @@ def test_get_step_index_3(self):
             TEST_FILE_DIR, "PredictionDiagnostics_000136_00002D_truncated_structure.json"
         )
         structured_datapath = auto_load_processed(structured_datapath_loc)
-        step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="hppc", diag_pos=0)
+        step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="hppc", diag_pos=0)
         self.assertEqual(len(step_ind.values()), 6)
 
     def test_get_diffusion_coeff(self):
@@ -526,7 +526,7 @@ def test_get_diffusion_coeff(self):
                 TEST_FILE_DIR, "PreDiag_000240_000227_truncated_structure.json"
             )
             structured_datapath = auto_load_processed(structured_datapath_loc)
-            diffusion_df = featurizer_helpers.get_diffusion_coeff(structured_datapath, 1)
+            diffusion_df = helper_functions.get_diffusion_coeff(structured_datapath, 1)
             print(np.round(diffusion_df.iloc[0].to_list(), 3))
             self.assertEqual(np.round(diffusion_df.iloc[0].to_list(), 3)[0], -0.016)
             self.assertEqual(np.round(diffusion_df.iloc[0].to_list(), 3)[5], -0.011)