diff --git a/beep/cmd.py b/beep/cmd.py index d6569056..f96cf476 100644 --- a/beep/cmd.py +++ b/beep/cmd.py @@ -42,12 +42,13 @@ from beep.structure.base import BEEPDatapath from beep.structure.cli import auto_load, auto_load_processed from beep.structure.validate import BEEPValidationError -from beep.features.base import ( +from beep.features.featurizer import ( BEEPFeaturizer, BEEPFeaturizationError, - BEEPFeatureMatrix, ) -from beep.features.core import ( +from beep.features.matrix import BEEPFeatureMatrix + +from beep.features.early_cycles import ( HPPCResistanceVoltageFeatures, DeltaQFastCharge, TrajectoryFastCharge, diff --git a/beep/features/base.py b/beep/features/base.py deleted file mode 100644 index b70a596f..00000000 --- a/beep/features/base.py +++ /dev/null @@ -1,363 +0,0 @@ -# Copyright [2020] [Toyota Research Institute] -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -For creating features and organizing them into datasets. - -""" -import os -import copy -import abc -import json -import hashlib -from typing import Union, Tuple, List - -import pandas as pd -from monty.io import zopen -from monty.json import MSONable, MontyDecoder -from monty.serialization import loadfn, dumpfn - -from beep.structure.base import BEEPDatapath - - -class BEEPFeaturizationError(BaseException): - """Raise when a featurization-specific error occurs""" - pass - - -class BEEPFeatureMatrixError(BaseException): - """ Raise when there is a BEEP-specific problem with a dataset""" - pass - - -class BEEPFeaturizer(MSONable, abc.ABC): - """ - Base class for all beep feature generation. - - From a structured battery file representing many cycles of one cell, - (AKA a structured datapath), produce a feature vector. - - Works for generating both - - Vectors X to use as training vectors - - Vectors or scalars y to use as ML targets - (as problems may have multiple metrics to predict) - - """ - - DEFAULT_HYPERPARAMETERS = {} - - def __init__(self, structured_datapath: Union[BEEPDatapath, None], hyperparameters: Union[dict, None] = None): - # If all required hyperparameters are specified, use those - # If some subset of required hyperparameters are specified, throw error - # If no hyperparameters are specified, use defaults - if hyperparameters: - if all(k in hyperparameters for k in self.DEFAULT_HYPERPARAMETERS): - self.hyperparameters = hyperparameters - else: - raise BEEPFeaturizationError( - f"Features cannot be created with incomplete set of " - f"hyperparameters {hyperparameters.keys()} < " - f"{self.DEFAULT_HYPERPARAMETERS.keys()}!") - else: - self.hyperparameters = self.DEFAULT_HYPERPARAMETERS - - if not (structured_datapath is None or structured_datapath.is_structured): - raise BEEPFeaturizationError("BEEPDatapath input is not structured!") - self.datapath = structured_datapath - - self.features = None - - # In case these features are loaded from file - # Allow attrs which can hold relevant metadata without having - # to reload the original datapath - self.paths = self.datapath.paths if self.datapath else {} - self.metadata = self.datapath.metadata.raw if self.datapath else {} - self.linked_semiunique_id = self.datapath.semiunique_id if self.datapath else None - - @abc.abstractmethod - def validate(self) -> Tuple[bool, Union[str, None]]: - """ - Validate a featurizer on it's ingested datapath. - - Returns: - (bool, str/None): The validation result and it's message. - - """ - raise NotImplementedError - - @abc.abstractmethod - def create_features(self) -> None: - """ - Should assign a dataframe to self.features. - - Returns: - None - """ - raise NotImplementedError - - def as_dict(self): - """Serialize a BEEPDatapath as a dictionary. - - Must not be loaded from legacy. - - Returns: - (dict): corresponding to dictionary for serialization. - - """ - - if self.features is None: - raise BEEPFeaturizationError("Cannot serialize features which have not been generated.") - - features = self.features.to_dict("list") - - return { - "@module": self.__class__.__module__, - "@class": self.__class__.__name__, - - # Core parts of BEEPFeaturizer - "features": features, - "hyperparameters": self.hyperparameters, - "paths": self.paths, - "metadata": self.metadata, - "linked_datapath_semiunique_id": self.linked_semiunique_id - } - - @classmethod - def from_dict(cls, d): - """Create a BEEPDatapath object from a dictionary. - - Args: - d (dict): dictionary represenation. - - Returns: - beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun. - """ - - # no need for original datapath - bf = cls(structured_datapath=None, hyperparameters=d["hyperparameters"]) - bf.features = pd.DataFrame(d["features"]) - bf.paths = d["paths"] - bf.metadata = d["metadata"] - bf.linked_semiunique_id = d["linked_datapath_semiunique_id"] - return bf - - @classmethod - def from_json_file(cls, filename): - """Load a structured run previously saved to file. - - .json.gz files are supported. - - Loads a BEEPFeaturizer from json. - - Can be used in combination with files serialized with BEEPFeatures.to_json_file. - - Args: - filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file. - - Returns: - None - """ - with zopen(filename, "r") as f: - d = json.load(f) - - # Add this structured file path to the paths dict - paths = d.get("paths", {}) - paths["features"] = os.path.abspath(filename) - d["paths"] = paths - return cls.from_dict(d) - - def to_json_file(self, filename): - """Save a BEEPFeatures to disk as a json. - - .json.gz files are supported. - - Not named from_json to avoid conflict with MSONable.from_json(*) - - Args: - filename (str, Pathlike): The filename to save the file to. - omit_raw (bool): If True, saves only structured (NOT RAW) data. - More efficient for saving/writing to disk. - - Returns: - None - """ - d = self.as_dict() - dumpfn(d, filename) - - -class BEEPFeatureMatrix(MSONable): - """ - Create an (n battery cycler files) x (k features) array composed of - m BEEPFeaturizer objects. - - Args: - beepfeaturizers ([BEEPFeaturizer]): A list of BEEPFeaturizer objects - - """ - - OP_DELIMITER = "::" - - def __init__(self, beepfeaturizers: List[BEEPFeaturizer]): - - if beepfeaturizers: - dfs_by_file = {bf.paths.get("structured", "no file found"): [] for bf in beepfeaturizers} - # big_df_rows = {bf.__class__.__name__: [] for bf in beepfeaturizers} - unique_features = {} - for i, bf in enumerate(beepfeaturizers): - if bf.features is None: - raise BEEPFeatureMatrixError(f"BEEPFeaturizer {bf} has not created features") - elif bf.features.shape[0] != 1: - raise BEEPFeatureMatrixError(f"BEEPFeaturizer {bf} features are not 1-dimensional.") - else: - bfcn = bf.__class__.__name__ - - fname = bf.paths.get("structured", None) - if not fname: - raise BEEPFeatureMatrixError( - "Cannot join features automatically as no linking can be done " - "based on original structured filename." - ) - - # Check for any possible feature collisions using identical featurizers - # on identical files - - # sort params for this featurizer obj by key - params = sorted(list(bf.hyperparameters.items()), key=lambda x: x[0]) - - # Prevent identical features from identical input files - # create a unique operation string for the application of this featurizer - # on a specific file, this op string will be the same as long as - # the featurizer class name, hyperparameters, and class are the same - - param_str = "-".join([f"{k}:{v}" for k, v in params]) - param_hash = hashlib.sha256(param_str.encode("utf-8")).hexdigest() - - # Get an id for this featurizer operation (including hyperparameters) - # regardless of the file it is applied on - feature_op_id = f"{bfcn}{self.OP_DELIMITER}{param_hash}" - - # Get an id for this featurizer operation (including hyperparameters) - # on THIS SPECIFIC file. - file_feature_op_id = f"{fname}{self.OP_DELIMITER}{bfcn}{self.OP_DELIMITER}{param_hash}" - - # Get a unique id for every feature generated by a specific - # featurizer on a specific file. - this_file_feature_columns_ids = \ - [ - f"{file_feature_op_id}{self.OP_DELIMITER}{c}" for c in bf.features.columns - ] - - # Check to make sure there are no duplicates of the exact same feature for - # the exact same featurizer with the exact same hyperparameters on the exact - # same file. - collisions = {c: f for c, f in unique_features.items() if c in this_file_feature_columns_ids} - if collisions: - raise BEEPFeatureMatrixError( - f"Multiple features generated with identical classes and identical hyperparameters" - f" attempted to be joined into same dataset; \n" - f"{bfcn} features collide with existing: \n{collisions}" - ) - for c in this_file_feature_columns_ids: - unique_features[c] = bfcn - - # Create consistent scheme for naming features regardless of file - df = copy.deepcopy(bf.features) - consistent_column_names = [f"{c}{self.OP_DELIMITER}{feature_op_id}" for c in df.columns] - df.columns = consistent_column_names - - df.index = [fname] * df.shape[0] - df.index.rename("filename", inplace=True) - dfs_by_file[fname].append(df) - - rows = [] - for filename, dfs in dfs_by_file.items(): - row = pd.concat(dfs, axis=1) - row = row[sorted(row.columns)] - rows.append(row) - self.matrix = pd.concat(rows, axis=0) - - else: - self.matrix = None - - self.featurizers = beepfeaturizers - - def as_dict(self): - """Serialize a BEEPDatapath as a dictionary. - - Must not be loaded from legacy. - - Returns: - (dict): corresponding to dictionary for serialization. - - """ - - return { - "@module": self.__class__.__module__, - "@class": self.__class__.__name__, - - # Core parts of BEEPFeaturizer - "featurizers": [f.as_dict() for f in self.featurizers], - "matrix": self.matrix.to_dict("list"), - } - - @classmethod - def from_dict(cls, d): - """Create a BEEPDatapath object from a dictionary. - - Args: - d (dict): dictionary represenation. - - Returns: - beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun. - """ - # no need for original datapaths, as their ref paths should - # be in the subobjects - featurizers = [MontyDecoder().process_decoded(f) for f in d["featurizers"]] - return cls(featurizers) - - @classmethod - def from_json_file(cls, filename): - """Load a structured run previously saved to file. - - .json.gz files are supported. - - Loads a BEEPFeatureMatrix from json. - - Can be used in combination with files serialized with BEEPFeatures.to_json_file. - - Args: - filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file. - - Returns: - None - """ - return loadfn(filename) - - def to_json_file(self, filename): - """Save a BEEPFeatureMatrix to disk as a json. - - .json.gz files are supported. - - Not named from_json to avoid conflict with MSONable.from_json(*) - - Args: - filename (str, Pathlike): The filename to save the file to. - omit_raw (bool): If True, saves only structured (NOT RAW) data. - More efficient for saving/writing to disk. - - Returns: - None - """ - d = self.as_dict() - dumpfn(d, filename) diff --git a/beep/features/core.py b/beep/features/core.py deleted file mode 100644 index 878b0987..00000000 --- a/beep/features/core.py +++ /dev/null @@ -1,909 +0,0 @@ -import numpy as np -import pandas as pd -from scipy.stats import skew, kurtosis -from scipy.interpolate import interp1d - -from beep import PROTOCOL_PARAMETERS_DIR -from beep.features import featurizer_helpers -from beep.features.base import BEEPFeaturizer, BEEPFeaturizationError - - -class HPPCResistanceVoltageFeatures(BEEPFeaturizer): - DEFAULT_HYPERPARAMETERS = { - "test_time_filter_sec": 1000000, - "cycle_index_filter": 6, - "diag_pos": 1, - "soc_window": 8, - "parameters_path": PROTOCOL_PARAMETERS_DIR - } - - def validate(self): - val, msg = featurizer_helpers.check_diagnostic_validation(self.datapath) - if val: - conditions = [] - conditions.append( - any( - [ - "hppc" in x - for x in - self.datapath.diagnostic_summary.cycle_type.unique() - ] - ) - ) - if all(conditions): - return True, None - else: - return False, "HPPC conditions not met for this cycler run" - else: - return val, msg - - def create_features(self): - # Filter out low cycle numbers at the end of the test, corresponding to the "final" diagnostic - self.datapath.diagnostic_data = self.datapath.diagnostic_data[ - ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[ - 'test_time_filter_sec']) & - (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[ - 'cycle_index_filter'])) - ] - self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby( - ["cycle_index", "step_index", "step_index_counter"] - ).filter(lambda x: ~x["test_time"].isnull().all()) - - # diffusion features - diffusion_features = featurizer_helpers.get_diffusion_features( - self.datapath, self.hyperparameters["diag_pos"] - ) - - hppc_r = pd.DataFrame() - # the 9 by 6 dataframe - df_dr = featurizer_helpers.get_dr_df( - self.datapath, self.hyperparameters["diag_pos"] - ) - # transform this dataframe to be 1 by 54 - columns = df_dr.columns - for column in columns: - for r in range(len(df_dr[column])): - name = column + str(r) - hppc_r[name] = [df_dr[column][r]] - - # the variance of ocv features - hppc_ocv = featurizer_helpers.get_hppc_ocv( - self.datapath, - self.hyperparameters["diag_pos"], - parameters_path=self.hyperparameters["parameters_path"] - ) - - # the v_diff features - v_diff = featurizer_helpers.get_v_diff( - self.datapath, - self.hyperparameters["diag_pos"], - self.hyperparameters["soc_window"], - self.hyperparameters["parameters_path"] - ) - - # merge everything together as a final result dataframe - self.features = pd.concat( - [hppc_r, hppc_ocv, v_diff, diffusion_features], axis=1) - - -class CycleSummaryStats(BEEPFeaturizer): - DEFAULT_HYPERPARAMETERS = { - "cycle_comp_num": [10, 100], - "statistics": ["var", "min", "mean", "skew", "kurtosis", "abs", - "square"] - } - - def validate(self): - """ - This function determines if the input data has the necessary attributes for - creation of this feature class. It should test for all of the possible reasons - that feature generation would fail for this particular input data. - - Args: - processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run - params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - Returns: - bool: True/False indication of ability to proceed with feature generation - """ - - # TODO: not sure this is necessary - # Check for data in each of the selected cycles - index_1, index_2 = self.hyperparameters['cycle_comp_num'] - cycle_1 = self.datapath.structured_data[ - self.datapath.structured_data.cycle_index == index_1] - cycle_2 = self.datapath.structured_data[ - self.datapath.structured_data.cycle_index == index_2] - if len(cycle_1) == 0 or len(cycle_2) == 0: - return False, "Length of one or more comparison cycles is zero" - - # TODO: check whether this is good - # Check for relevant data - required_columns = [ - 'charge_capacity', - 'discharge_capacity', - 'charge_energy', - 'discharge_energy', - ] - pcycler_run_columns = self.datapath.structured_data.columns - if not all( - [column in pcycler_run_columns for column in required_columns]): - return False, f"Required column not present in all structured data " \ - f"(must have all of: {required_columns})" - - return True, None - - def create_features(self): - """ - Generate features listed in early prediction manuscript using both diagnostic and regular cycles - - Args: - processed_cycler_run (beep.structure.ProcessedCyclerRun) - params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - parameters_path (str): Root directory storing project parameter files. - - Returns: - X (pd.DataFrame): Dataframe containing the feature - """ - - # TODO: extend this dataframe and uncomment energy features when - # structuring is refactored - X = pd.DataFrame(np.zeros((1, 28))) - - reg_cycle_comp_num = self.hyperparameters.get("cycle_comp_num") - cycle_comp_1 = self.datapath.structured_data[ - self.datapath.structured_data.cycle_index == reg_cycle_comp_num[1] - ] - cycle_comp_0 = self.datapath.structured_data[ - self.datapath.structured_data.cycle_index == reg_cycle_comp_num[0] - ] - Qc100_1 = cycle_comp_1[ - cycle_comp_1.step_type == "charge"].charge_capacity - Qc10_1 = cycle_comp_0[ - cycle_comp_0.step_type == "charge"].charge_capacity - QcDiff = Qc100_1.values - Qc10_1.values - QcDiff = QcDiff[~np.isnan(QcDiff)] - - X.loc[0, 0:6] = self.get_summary_statistics(QcDiff) - - Qd100_1 = cycle_comp_1[ - cycle_comp_1.step_type == "discharge"].discharge_capacity - Qd10_1 = cycle_comp_0[ - cycle_comp_0.step_type == "discharge"].discharge_capacity - QdDiff = Qd100_1.values - Qd10_1.values - QdDiff = QdDiff[~np.isnan(QdDiff)] - - X.loc[0, 7:13] = self.get_summary_statistics(QdDiff) - - # # Charging Energy features - Ec100_1 = cycle_comp_1[cycle_comp_1.step_type == "charge"].charge_energy - Ec10_1 = cycle_comp_0[cycle_comp_0.step_type == "charge"].charge_energy - EcDiff = Ec100_1.values - Ec10_1.values - EcDiff = EcDiff[~np.isnan(EcDiff)] - - X.loc[0, 14:20] = self.get_summary_statistics(EcDiff) - - # # Discharging Energy features - Ed100_1 = cycle_comp_1[ - cycle_comp_1.step_type == "charge"].discharge_energy - Ed10_1 = cycle_comp_0[ - cycle_comp_0.step_type == "charge"].discharge_energy - EdDiff = Ed100_1.values - Ed10_1.values - EdDiff = EdDiff[~np.isnan(EdDiff)] - - X.loc[0, 21:27] = self.get_summary_statistics(EdDiff) - - quantities = [ - "charging_capacity", - "discharging_capacity", - "charging_energy", - "discharging_energy", - ] - - X.columns = [y + "_" + x for x in quantities for y in - self.hyperparameters["statistics"]] - - self.features = X - - def get_summary_statistics(self, array): - """ - Static method for getting values corresponding - to standard 7 operations that many beep features - use, i.e. log of absolute value of each of - variance, min, mean, skew, kurtosis, the sum of - the absolute values and the sum of squares - - Args: - array (list, np.ndarray): array of values to get - standard operation values for, e.g. cycle - discharging capacity, QcDiff, etc. - - Returns: - [float]: list of features - - """ - - stats_names = self.hyperparameters["statistics"] - supported_stats = self.DEFAULT_HYPERPARAMETERS["statistics"] - - if any(s not in supported_stats for s in stats_names): - raise ValueError( - f"Unsupported statistics in {stats_names}: supported statistics are {supported_stats}") - - stats = [] - - if "var" in stats_names: - stats.append(np.log10(np.absolute(np.var(array)))) - if "min" in stats_names: - stats.append(np.log10(np.absolute(min(array)))) - if "mean" in stats_names: - stats.append(np.log10(np.absolute(np.mean(array)))) - if "skew" in stats_names: - stats.append(np.log10(np.absolute(skew(array)))) - if "kurtosis" in stats_names: - stats.append(np.log10( - np.absolute(kurtosis(array, fisher=False, bias=False)))) - if "abs" in stats_names: - stats.append(np.log10(np.sum(np.absolute(array)))) - if "square" in stats_names: - stats.append(np.log10(np.sum(np.square(array)))) - - return np.asarray(stats) - - -class DiagnosticSummaryStats(CycleSummaryStats): - """ - Object corresponding to summary statistics from a diagnostic cycle of - specific type. Includes constructors to create the features, object names - and metadata attributes in the object. Inherits from RegularCycleSummaryStats - to reuse standard feature generation - - name (str): predictor object name. - X (pandas.DataFrame): features in DataFrame format. - metadata (dict): information about the conditions, data - and code used to produce features - """ - DEFAULT_HYPERPARAMETERS = { - "test_time_filter_sec": 1000000, - "cycle_index_filter": 6, - "diagnostic_cycle_type": 'rpt_0.2C', - "diag_pos_list": [0, 1], - "statistics": ["var", "min", "mean", "skew", "kurtosis", "abs", - "square"], - "parameters_path": PROTOCOL_PARAMETERS_DIR - } - - def validate(self): - """ - This function determines if the input data has the necessary attributes for - creation of this feature class. It should test for all of the possible reasons - that feature generation would fail for this particular input data. - - Args: - processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run - params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - - Returns: - bool: True/False indication of ability to proceed with feature generation - """ - val, msg = featurizer_helpers.check_diagnostic_validation(self.datapath) - if val: - df = self.datapath.diagnostic_summary - df = df[ - df.cycle_type == self.hyperparameters["diagnostic_cycle_type"]] - if df.cycle_index.nunique() >= max( - self.hyperparameters["diag_pos_list"]) + 1: - return True, None - else: - return False, "Diagnostic cycles insufficient for featurization" - else: - return val, msg - - def get_summary_diff( - self, - pos=None, - cycle_types=("rpt_0.2C", "rpt_1C", "rpt_2C"), - metrics=( - "discharge_capacity", "discharge_energy", "charge_capacity", - "charge_energy") - ): - """ - Helper function to calculate difference between summary values in the diagnostic cycles - - Args: - processed_cycler_run (beep.structure.ProcessedCyclerRun) - pos (list): position of the diagnostics to use in the calculation - cycle_types (list): calculate difference for these diagnostic types - metrics (str): Calculate difference for these metrics - - Returns: - values (list): List of difference values to insert into the dataframe - names (list): List of column headers to use in the creation of the dataframe - """ - pos = self.hyperparameters["diag_pos_list"] if not pos else pos - - values = [] - names = [] - for cycle_type in cycle_types: - diag_type_summary = self.datapath.diagnostic_summary[ - self.datapath.diagnostic_summary.cycle_type == cycle_type] - for metric in metrics: - diff = (diag_type_summary.iloc[pos[1]][metric] - - diag_type_summary.iloc[pos[0]][metric]) \ - / diag_type_summary.iloc[pos[0]][metric] - values.append(diff) - names.append("diag_sum_diff_" + str(pos[0]) + "_" + str( - pos[1]) + "_" + cycle_type + metric) - return values, names - - def create_features(self): - """ - Generate features listed in early prediction manuscript using both diagnostic and regular cycles - - Args: - self.datapathn (beep.structure.ProcessedCyclerRun) - self.hyperparameters (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - parameters_path (str): Root directory storing project parameter files. - - Returns: - X (pd.DataFrame): Dataframe containing the feature - """ - # Filter out "final" diagnostic cycles that have been appended to the end of the file with the wrong - # cycle number(test time is monotonic) - self.datapath.diagnostic_data = self.datapath.diagnostic_data[ - ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[ - 'test_time_filter_sec']) & - (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[ - 'cycle_index_filter'])) - ] - self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby( - ["cycle_index", "step_index", "step_index_counter"] - ).filter(lambda x: ~x["test_time"].isnull().all()) - - diag_intrp = self.datapath.diagnostic_data - - X = pd.DataFrame(np.zeros((1, 54))) - - # Calculate the cycles and the steps for the selected diagnostics - cycles = diag_intrp.cycle_index[diag_intrp.cycle_type == - self.hyperparameters[ - "diagnostic_cycle_type"]].unique() - step_dict_0 = featurizer_helpers.get_step_index( - self.datapath, - cycle_type=self.hyperparameters["diagnostic_cycle_type"], - diag_pos=self.hyperparameters["diag_pos_list"][0], - parameters_path=self.hyperparameters["parameters_path"] - ) - step_dict_1 = featurizer_helpers.get_step_index( - self.datapath, - cycle_type=self.hyperparameters["diagnostic_cycle_type"], - diag_pos=self.hyperparameters["diag_pos_list"][1], - parameters_path=self.hyperparameters["parameters_path"] - ) - - # Create masks for each position in the data - mask_pos_0_charge = ((diag_intrp.cycle_index == cycles[ - self.hyperparameters["diag_pos_list"][0]]) & - (diag_intrp.step_index == step_dict_0[ - self.hyperparameters[ - "diagnostic_cycle_type"] + '_charge'])) - mask_pos_1_charge = ((diag_intrp.cycle_index == cycles[ - self.hyperparameters["diag_pos_list"][1]]) & - (diag_intrp.step_index == step_dict_1[ - self.hyperparameters[ - "diagnostic_cycle_type"] + '_charge'])) - mask_pos_0_discharge = ((diag_intrp.cycle_index == cycles[ - self.hyperparameters["diag_pos_list"][0]]) & - (diag_intrp.step_index == - step_dict_0[self.hyperparameters[ - "diagnostic_cycle_type"] + '_discharge'])) - mask_pos_1_discharge = ((diag_intrp.cycle_index == cycles[ - self.hyperparameters["diag_pos_list"][1]]) & - (diag_intrp.step_index == - step_dict_1[self.hyperparameters[ - "diagnostic_cycle_type"] + '_discharge'])) - - # Charging Capacity features - Qc_1 = diag_intrp.charge_capacity[mask_pos_1_charge] - Qc_0 = diag_intrp.charge_capacity[mask_pos_0_charge] - QcDiff = Qc_1.values - Qc_0.values - QcDiff = QcDiff[~np.isnan(QcDiff)] - - X.loc[0, 0:6] = self.get_summary_statistics(QcDiff) - - # Discharging Capacity features - Qd_1 = diag_intrp.discharge_capacity[mask_pos_1_discharge] - Qd_0 = diag_intrp.discharge_capacity[mask_pos_0_discharge] - QdDiff = Qd_1.values - Qd_0.values - QdDiff = QdDiff[~np.isnan(QdDiff)] - - X.loc[0, 7:13] = self.get_summary_statistics(QdDiff) - - # Charging Energy features - Ec_1 = diag_intrp.charge_energy[mask_pos_1_charge] - Ec_0 = diag_intrp.charge_energy[mask_pos_0_charge] - EcDiff = Ec_1.values - Ec_0.values - EcDiff = EcDiff[~np.isnan(EcDiff)] - - X.loc[0, 14:20] = self.get_summary_statistics(EcDiff) - - # Discharging Energy features - Ed_1 = diag_intrp.discharge_energy[mask_pos_1_discharge] - Ed_0 = diag_intrp.discharge_energy[mask_pos_0_discharge] - EdDiff = Ed_1.values - Ed_0.values - EdDiff = EdDiff[~np.isnan(EdDiff)] - - X.loc[0, 21:27] = self.get_summary_statistics(EdDiff) - - # Charging dQdV features - dQdVc_1 = diag_intrp.charge_dQdV[mask_pos_1_charge] - dQdVc_0 = diag_intrp.charge_dQdV[mask_pos_0_charge] - dQdVcDiff = dQdVc_1.values - dQdVc_0.values - dQdVcDiff = dQdVcDiff[~np.isnan(dQdVcDiff)] - - X.loc[0, 28:34] = self.get_summary_statistics(dQdVcDiff) - - # Discharging Capacity features - dQdVd_1 = diag_intrp.discharge_dQdV[mask_pos_1_discharge] - dQdVd_0 = diag_intrp.discharge_dQdV[mask_pos_0_discharge] - dQdVdDiff = dQdVd_1.values - dQdVd_0.values - dQdVdDiff = dQdVdDiff[~np.isnan(dQdVdDiff)] - - X.loc[0, 35:41] = self.get_summary_statistics(dQdVdDiff) - - X.loc[0, 42:53], names = self.get_summary_diff( - self.hyperparameters["diag_pos_list"] - ) - - quantities = [ - "charging_capacity", - "discharging_capacity", - "charging_energy", - "discharging_energy", - "charging_dQdV", - "discharging_dQdV", - ] - - X.columns = [y + "_" + x for x in quantities for y in - self.hyperparameters["statistics"]] + names - self.features = X - - -class DeltaQFastCharge(BEEPFeaturizer): - """ - Object corresponding to feature object. Includes constructors - to create the features, object names and metadata attributes in the - object - name (str): predictor object name. - X (pandas.DataFrame): features in DataFrame format. - metadata (dict): information about the conditions, data - and code used to produce features - """ - DEFAULT_HYPERPARAMETERS = { - "init_pred_cycle": 10, - "mid_pred_cycle": 91, - "final_pred_cycle": 100, - "n_nominal_cycles": 40 - } - - def validate(self): - """ - This function determines if the input data has the necessary attributes for - creation of this feature class. It should test for all of the possible reasons - that feature generation would fail for this particular input data. - - Args: - self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run - params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - Returns: - bool: True/False indication of ability to proceed with feature generation - """ - - if not self.datapath.structured_summary.index.max() > self.hyperparameters["final_pred_cycle"]: - return False, "Structured summary index max is less than final pred cycle" - elif not self.datapath.structured_summary.index.min() <= self.hyperparameters["init_pred_cycle"]: - return False, "Structured summary index min is more than initial pred cycle" - elif "cycle_index" not in self.datapath.structured_summary.columns: - return False, "Structured summary missing critical data: 'cycle_index'" - elif "cycle_index" not in self.datapath.structured_data.columns: - return False, "Structured data missing critical data: 'cycle_index'" - elif not self.hyperparameters["mid_pred_cycle"] > 10: - return False, "Middle pred. cycle less than threshold value of 10" - elif not self.hyperparameters["final_pred_cycle"] > self.hyperparameters["mid_pred_cycle"]: - return False, "Final pred cycle less than middle pred cycle" - else: - return True, None - - def create_features(self): - """ - Generate features listed in early prediction manuscript, primarily related to the - so called delta Q feature - Args: - processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run - self.hyperparameters (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - parameters_path (str): Root directory storing project parameter files. - - Returns: - pd.DataFrame: features indicative of degradation, derived from the input data - """ - i_final = self.hyperparameters[ - "final_pred_cycle"] - 1 # python indexing - i_mid = self.hyperparameters["mid_pred_cycle"] - 1 - - summary = self.datapath.structured_summary - self.hyperparameters[ - "n_nominal_cycles" - ] = 40 # For nominal capacity, use median discharge capacity of first n cycles - - if "step_type" in self.datapath.structured_data.columns: - interpolated_df = self.datapath.structured_data[ - self.datapath.structured_data.step_type == "discharge" - ] - else: - interpolated_df = self.datapath.structured_data - X = pd.DataFrame(np.zeros((1, 20))) - labels = [] - # Discharge capacity, cycle 2 = Q(n=2) - X[0] = summary.discharge_capacity.iloc[1] - labels.append("discharge_capacity_cycle_2") - - # Max discharge capacity - discharge capacity, cycle 2 = max_n(Q(n)) - Q(n=2) - X[1] = max( - summary.discharge_capacity.iloc[np.arange(i_final + 1)] - - summary.discharge_capacity.iloc[1] - ) - labels.append("max_discharge_capacity_difference") - - # Discharge capacity, cycle 100 = Q(n=100) - X[2] = summary.discharge_capacity.iloc[i_final] - labels.append("discharge_capacity_cycle_100") - - # Feature representing time-temperature integral over cycles 2 to 100 - X[3] = np.nansum( - summary.time_temperature_integrated.iloc[np.arange(i_final + 1)]) - labels.append("integrated_time_temperature_cycles_1:100") - - # Mean of charge times of first 5 cycles - X[4] = np.nanmean(summary.charge_duration.iloc[1:6]) - labels.append("charge_time_cycles_1:5") - - # Descriptors based on capacity loss between cycles 10 and 100. - Qd_final = interpolated_df.discharge_capacity[ - interpolated_df.cycle_index == i_final - ] - Qd_10 = interpolated_df.discharge_capacity[ - interpolated_df.cycle_index == 9] - - Qd_diff = Qd_final.values - Qd_10.values - - # If DeltaQ(V) is not an empty array, compute summary stats, else initialize with np.nan - # Cells discharged rapidly over a narrow voltage window run into have no interpolated discharge steps - if len(Qd_diff): - X[5] = np.log10(np.abs(np.nanmin(Qd_diff))) # Minimum - X[6] = np.log10(np.abs(np.nanmean(Qd_diff))) # Mean - X[7] = np.log10(np.abs(np.nanvar(Qd_diff))) # Variance - X[8] = np.log10(np.abs(skew(Qd_diff))) # Skewness - X[9] = np.log10(np.abs(kurtosis(Qd_diff))) # Kurtosis - X[10] = np.log10(np.abs(Qd_diff[0])) # First difference - else: - X[5:11] = np.nan - - labels.append("abs_min_discharge_capacity_difference_cycles_2:100") - labels.append("abs_mean_discharge_capacity_difference_cycles_2:100") - labels.append("abs_variance_discharge_capacity_difference_cycles_2:100") - labels.append("abs_skew_discharge_capacity_difference_cycles_2:100") - labels.append("abs_kurtosis_discharge_capacity_difference_cycles_2:100") - labels.append("abs_first_discharge_capacity_difference_cycles_2:100") - - X[11] = np.max(summary.temperature_maximum.iloc[ - list(range(1, i_final + 1))]) # Max T - labels.append("max_temperature_cycles_1:100") - - X[12] = np.min(summary.temperature_minimum.iloc[ - list(range(1, i_final + 1))]) # Min T - labels.append("min_temperature_cycles_1:100") - - # Slope and intercept of linear fit to discharge capacity as a fn of cycle #, cycles 2 to 100 - - X[13], X[14] = np.polyfit( - list(range(1, i_final + 1)), - summary.discharge_capacity.iloc[list(range(1, i_final + 1))], - 1, - ) - - labels.append("slope_discharge_capacity_cycle_number_2:100") - labels.append("intercept_discharge_capacity_cycle_number_2:100") - - # Slope and intercept of linear fit to discharge capacity as a fn of cycle #, cycles 91 to 100 - X[15], X[16] = np.polyfit( - list(range(i_mid, i_final + 1)), - summary.discharge_capacity.iloc[list(range(i_mid, i_final + 1))], - 1, - ) - labels.append("slope_discharge_capacity_cycle_number_91:100") - labels.append("intercept_discharge_capacity_cycle_number_91:100") - - IR_trend = summary.dc_internal_resistance.iloc[ - list(range(1, i_final + 1))] - if any(v == 0 for v in IR_trend): - IR_trend[IR_trend == 0] = np.nan - - # Internal resistance minimum - X[17] = np.nanmin(IR_trend) - labels.append("min_internal_resistance_cycles_2:100") - - # Internal resistance at cycle 2 - X[18] = summary.dc_internal_resistance.iloc[1] - labels.append("internal_resistance_cycle_2") - - # Internal resistance at cycle 100 - cycle 2 - X[19] = ( - summary.dc_internal_resistance.iloc[i_final] - - summary.dc_internal_resistance.iloc[1] - ) - labels.append("internal_resistance_difference_cycles_2:100") - - # Nominal capacity - end = self.hyperparameters["n_nominal_cycles"] - X[20] = np.median(summary.discharge_capacity.iloc[0: end]) - labels.append("nominal_capacity_by_median") - - X.columns = labels - self.features = X - - -class TrajectoryFastCharge(DeltaQFastCharge): - """ - Object corresponding to cycle numbers at which the capacity drops below - specific percentages of the initial capacity. Computed on the discharge - portion of the regular fast charge cycles. - - """ - - DEFAULT_HYPERPARAMETERS = { - "thresh_max_cap": 0.98, - "thresh_min_cap": 0.78, - "interval_cap": 0.03 - } - - def validate(self): - """ - This function determines if the input data has the necessary attributes for - creation of this feature class. It should test for all of the possible reasons - that feature generation would fail for this particular input data. - - Args: - processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run - params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - Returns: - bool: True/False indication of ability to proceed with feature generation - """ - cap = self.datapath.structured_summary.discharge_capacity - cap_ratio = cap.min() / cap.max() - max_cap = self.hyperparameters["thresh_max_cap"] - if not cap_ratio < max_cap: - return False, f"thresh_max_cap hyperparameter exceeded: {cap_ratio} !< {max_cap}" - else: - return True, None - - def create_features(self): - """ - Calculate the outcomes from the input data. In particular, the number of cycles - where we expect to reach certain thresholds of capacity loss - Args: - processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run - params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - parameters_path (str): Root directory storing project parameter files. - - Returns: - pd.DataFrame: cycles at which capacity/energy degradation exceeds thresholds - """ - y = self.datapath.capacities_to_cycles( - self.hyperparameters["thresh_max_cap"], - self.hyperparameters["thresh_min_cap"], - self.hyperparameters["interval_cap"], - ) - self.features = y - - -class DiagnosticProperties(BEEPFeaturizer): - """ - This class stores fractional levels of degradation in discharge capacity and discharge energy - relative to the first cycle at each diagnostic cycle, grouped by diagnostic cycle type. - - name (str): predictor object name. - X (pandas.DataFrame): features in DataFrame format. - metadata (dict): information about the conditions, data - and code used to produce features - - Hyperparameters: - parameters_dir (str): Full path to directory of parameters to analyse the - diagnostic cycles - quantities ([str]): Quantities to extract/get fractional metrics for - diagnostic cycles - cycle_type (str): Type of diagnostic cycle being used to measure the - fractional metric - metric (str): The metric being used for fractional capacity - interpolation_axes (list): List of column names to use for - x_axis interpolation (distance to threshold) - threshold (float): Value for the fractional metric to be considered above - or below threshold - filter_kinks (float): If set, cutoff value for the second derivative of - the fractional metric (cells with an abrupt change in degradation - rate might have something else going on). Typical value might be 0.04 - extrapolate_threshold (bool): Should threshold crossing point be - extrapolated for cells that have not yet reached the threshold - (warning: this uses a linear extrapolation from the last two - diagnostic cycles) - """ - DEFAULT_HYPERPARAMETERS = { - "parameters_dir": PROTOCOL_PARAMETERS_DIR, - "quantities": ['discharge_energy', 'discharge_capacity'], - "threshold": 0.8, - "metric": "discharge_energy", - "filter_kinks": None, - "interpolation_axes": ["normalized_regular_throughput", "cycle_index"], - "cycle_type": "rpt_1C", - "extrapolate_threshold": True - } - - def validate(self): - """ - This function determines if the input data has the necessary attributes for - creation of this feature class. It should test for all of the possible reasons - that feature generation would fail for this particular input data. - - Args: - processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run - params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - Returns: - bool: True/False indication of ability to proceed with feature generation - """ - return featurizer_helpers.check_diagnostic_validation(self.datapath) - - def create_features(self): - """ - Generates diagnostic-property features from processed cycler run, including values for n*x method - Args: - self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run - params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object - gets featurized. These could be filters for column or row operations - parameters_path (str): Root directory storing project parameter files. - - Returns: - pd.DataFrame: with "cycle_index", "fractional_metric", "x", "n", "cycle_type" and "metric" columns, rows - for each diagnostic cycle of the cell - """ - - parameters_path = self.hyperparameters["parameters_dir"] - - cycle_types = self.datapath.diagnostic_summary.cycle_type.unique() - X = pd.DataFrame() - for quantity in self.hyperparameters["quantities"]: - for cycle_type in cycle_types: - summary_diag_cycle_type = featurizer_helpers.get_fractional_quantity_remaining_nx( - self.datapath, quantity, cycle_type, - parameters_path=parameters_path - ) - - summary_diag_cycle_type.loc[:, "cycle_type"] = cycle_type - summary_diag_cycle_type.loc[:, "metric"] = quantity - X = X.append(summary_diag_cycle_type) - - X_condensed = self.get_threshold_targets(X) - self.features = X_condensed - - def get_threshold_targets(self, df): - """ - Apply a threshold via interpolation for determining various - metrics (e.g., discharge energy) from diagnostic cycles. - - Args: - df (pd.DataFrame): A dataframe of diagnostic cycle data - for a single battery cycler run. - - Returns: - (pd.DataFrame): Contains a vector for interpolated/intercept - data for determining threshold. - - """ - cycle_type = self.hyperparameters["cycle_type"] - metric = self.hyperparameters["metric"] - interpolation_axes = self.hyperparameters["interpolation_axes"] - threshold = self.hyperparameters["threshold"] - filter_kinks = self.hyperparameters["filter_kinks"] - extrapolate_threshold = self.hyperparameters["extrapolate_threshold"] - - if filter_kinks: - if np.any(df['fractional_metric'].diff().diff() < filter_kinks): - last_good_cycle = df[ - df['fractional_metric'].diff().diff() < filter_kinks]['cycle_index'].min() - df = df[df['cycle_index'] < last_good_cycle] - - x_axes = [] - for type in interpolation_axes: - x_axes.append(df[type]) - y_interpolation_axis = df['fractional_metric'] - - # Logic around how to deal with cells that have not crossed threshold - if df['fractional_metric'].min() > threshold and \ - not extrapolate_threshold: - BEEPFeaturizationError( - "DiagnosticProperties data has not crossed threshold " - "and extrapolation inaccurate" - ) - elif df['fractional_metric'].min() > threshold and \ - extrapolate_threshold: - fill_value = "extrapolate" - bounds_error = False - x_linspaces = [] - for x_axis in x_axes: - y1 = y_interpolation_axis.iloc[-2] - y2 = y_interpolation_axis.iloc[-1] - x1 = x_axis.iloc[-2] - x2 = x_axis.iloc[-1] - x_thresh_extrap = (threshold - 0.1 - y1) * (x2 - x1) / ( - y2 - y1) + x1 - x_linspaces.append( - np.linspace(x_axis.min(), x_thresh_extrap, num=1000) - ) - else: - fill_value = np.nan - bounds_error = True - x_linspaces = [] - for x_axis in x_axes: - x_linspaces.append( - np.linspace(x_axis.min(), x_axis.max(), num=1000)) - - f_axis = [] - for x_axis in x_axes: - f_axis.append( - interp1d( - x_axis, - y_interpolation_axis, - kind='linear', - bounds_error=bounds_error, - fill_value=fill_value - ) - ) - - x_to_threshold = [] - for indx, x_linspace in enumerate(x_linspaces): - crossing_array = abs(f_axis[indx](x_linspace) - threshold) - x_to_threshold.append(x_linspace[np.argmin(crossing_array)]) - - if ~(x_to_threshold[0] > 0) or ~(x_to_threshold[1] > 0): - raise BEEPFeaturizationError( - "DiagnosticProperties data does not have a positive value " - "to threshold" - ) - - if "normalized_regular_throughput" in interpolation_axes: - real_throughput_to_threshold = x_to_threshold[ - interpolation_axes.index( - "normalized_regular_throughput")] * \ - df[ - 'initial_regular_throughput'].values[ - 0] - x_to_threshold.append(real_throughput_to_threshold) - interpolation_axes = interpolation_axes + ["real_regular_throughput"] - - threshold_dict = { - 'initial_regular_throughput': - df['initial_regular_throughput'].values[0], - } - - for indx, x_axis in enumerate(interpolation_axes): - threshold_dict[ - cycle_type + metric + str(threshold) + '_' + x_axis] = [ - x_to_threshold[indx]] - - return pd.DataFrame(threshold_dict) diff --git a/beep/features/early_cycles/__init__.py b/beep/features/early_cycles/__init__.py new file mode 100644 index 00000000..520baf44 --- /dev/null +++ b/beep/features/early_cycles/__init__.py @@ -0,0 +1,14 @@ +""" +Features for predicting cycling characteristics from early cycles. + +Typically these features are formed by looking at two or more early cycles, +assessing the differences between features of these cycles, then relating +that to the degradation characteristics of the battery. These features +can then be used with any cycler file with few numbers of cycles to predict +the total number of cycles before reaching certain degradation thresholds. +""" + +from beep.features.early_cycles.delta_q import DeltaQFastCharge +from beep.features.early_cycles.hppc import HPPCResistanceVoltage +from beep.features.early_cycles.summary import DiagnosticSummaryStats, CycleSummaryStats +from beep.features.early_cycles.targets import TrajectoryFastCharge, DiagnosticProperties \ No newline at end of file diff --git a/beep/features/early_cycles/delta_q.py b/beep/features/early_cycles/delta_q.py new file mode 100644 index 00000000..64feaac9 --- /dev/null +++ b/beep/features/early_cycles/delta_q.py @@ -0,0 +1,183 @@ +import numpy as np +import pandas as pd +from scipy.stats import skew, kurtosis + +from beep.features.featurizer import BEEPEarlyCyclesFeaturizer + + +class DeltaQFastCharge(BEEPEarlyCyclesFeaturizer): + """ + Object corresponding to feature object. Includes constructors + to create the features, object names and metadata attributes in the + object + name (str): predictor object name. + X (pandas.DataFrame): features in DataFrame format. + metadata (dict): information about the conditions, data + and code used to produce features + """ + DEFAULT_HYPERPARAMETERS = { + "init_pred_cycle": 10, + "mid_pred_cycle": 91, + "final_pred_cycle": 100, + "n_nominal_cycles": 40 + } + + def validate(self): + """ + This function determines if the input data has the necessary attributes for + creation of this feature class. It should test for all of the possible reasons + that feature generation would fail for this particular input data. + + Args: + self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + Returns: + bool: True/False indication of ability to proceed with feature generation + """ + + if not self.datapath.structured_summary.index.max() > \ + self.hyperparameters["final_pred_cycle"]: + return False, "Structured summary index max is less than final pred cycle" + elif not self.datapath.structured_summary.index.min() <= \ + self.hyperparameters["init_pred_cycle"]: + return False, "Structured summary index min is more than initial pred cycle" + elif "cycle_index" not in self.datapath.structured_summary.columns: + return False, "Structured summary missing critical data: 'cycle_index'" + elif "cycle_index" not in self.datapath.structured_data.columns: + return False, "Structured data missing critical data: 'cycle_index'" + elif not self.hyperparameters["mid_pred_cycle"] > 10: + return False, "Middle pred. cycle less than threshold value of 10" + elif not self.hyperparameters["final_pred_cycle"] > \ + self.hyperparameters["mid_pred_cycle"]: + return False, "Final pred cycle less than middle pred cycle" + else: + return True, None + + def create_features(self): + """ + """ + i_final = self.hyperparameters[ + "final_pred_cycle"] - 1 # python indexing + i_mid = self.hyperparameters["mid_pred_cycle"] - 1 + + summary = self.datapath.structured_summary + self.hyperparameters[ + "n_nominal_cycles" + ] = 40 # For nominal capacity, use median discharge capacity of first n cycles + + if "step_type" in self.datapath.structured_data.columns: + interpolated_df = self.datapath.structured_data[ + self.datapath.structured_data.step_type == "discharge" + ] + else: + interpolated_df = self.datapath.structured_data + X = pd.DataFrame(np.zeros((1, 20))) + labels = [] + # Discharge capacity, cycle 2 = Q(n=2) + X[0] = summary.discharge_capacity.iloc[1] + labels.append("discharge_capacity_cycle_2") + + # Max discharge capacity - discharge capacity, cycle 2 = max_n(Q(n)) - Q(n=2) + X[1] = max( + summary.discharge_capacity.iloc[np.arange(i_final + 1)] + - summary.discharge_capacity.iloc[1] + ) + labels.append("max_discharge_capacity_difference") + + # Discharge capacity, cycle 100 = Q(n=100) + X[2] = summary.discharge_capacity.iloc[i_final] + labels.append("discharge_capacity_cycle_100") + + # Feature representing time-temperature integral over cycles 2 to 100 + X[3] = np.nansum( + summary.time_temperature_integrated.iloc[np.arange(i_final + 1)]) + labels.append("integrated_time_temperature_cycles_1:100") + + # Mean of charge times of first 5 cycles + X[4] = np.nanmean(summary.charge_duration.iloc[1:6]) + labels.append("charge_time_cycles_1:5") + + # Descriptors based on capacity loss between cycles 10 and 100. + Qd_final = interpolated_df.discharge_capacity[ + interpolated_df.cycle_index == i_final + ] + Qd_10 = interpolated_df.discharge_capacity[ + interpolated_df.cycle_index == 9] + + Qd_diff = Qd_final.values - Qd_10.values + + # If DeltaQ(V) is not an empty array, compute summary stats, else initialize with np.nan + # Cells discharged rapidly over a narrow voltage window run into have no interpolated discharge steps + if len(Qd_diff): + X[5] = np.log10(np.abs(np.nanmin(Qd_diff))) # Minimum + X[6] = np.log10(np.abs(np.nanmean(Qd_diff))) # Mean + X[7] = np.log10(np.abs(np.nanvar(Qd_diff))) # Variance + X[8] = np.log10(np.abs(skew(Qd_diff))) # Skewness + X[9] = np.log10(np.abs(kurtosis(Qd_diff))) # Kurtosis + X[10] = np.log10(np.abs(Qd_diff[0])) # First difference + else: + X[5:11] = np.nan + + labels.append("abs_min_discharge_capacity_difference_cycles_2:100") + labels.append("abs_mean_discharge_capacity_difference_cycles_2:100") + labels.append("abs_variance_discharge_capacity_difference_cycles_2:100") + labels.append("abs_skew_discharge_capacity_difference_cycles_2:100") + labels.append("abs_kurtosis_discharge_capacity_difference_cycles_2:100") + labels.append("abs_first_discharge_capacity_difference_cycles_2:100") + + X[11] = np.max(summary.temperature_maximum.iloc[ + list(range(1, i_final + 1))]) # Max T + labels.append("max_temperature_cycles_1:100") + + X[12] = np.min(summary.temperature_minimum.iloc[ + list(range(1, i_final + 1))]) # Min T + labels.append("min_temperature_cycles_1:100") + + # Slope and intercept of linear fit to discharge capacity as a fn of cycle #, cycles 2 to 100 + + X[13], X[14] = np.polyfit( + list(range(1, i_final + 1)), + summary.discharge_capacity.iloc[list(range(1, i_final + 1))], + 1, + ) + + labels.append("slope_discharge_capacity_cycle_number_2:100") + labels.append("intercept_discharge_capacity_cycle_number_2:100") + + # Slope and intercept of linear fit to discharge capacity as a fn of cycle #, cycles 91 to 100 + X[15], X[16] = np.polyfit( + list(range(i_mid, i_final + 1)), + summary.discharge_capacity.iloc[list(range(i_mid, i_final + 1))], + 1, + ) + labels.append("slope_discharge_capacity_cycle_number_91:100") + labels.append("intercept_discharge_capacity_cycle_number_91:100") + + IR_trend = summary.dc_internal_resistance.iloc[ + list(range(1, i_final + 1))] + if any(v == 0 for v in IR_trend): + IR_trend[IR_trend == 0] = np.nan + + # Internal resistance minimum + X[17] = np.nanmin(IR_trend) + labels.append("min_internal_resistance_cycles_2:100") + + # Internal resistance at cycle 2 + X[18] = summary.dc_internal_resistance.iloc[1] + labels.append("internal_resistance_cycle_2") + + # Internal resistance at cycle 100 - cycle 2 + X[19] = ( + summary.dc_internal_resistance.iloc[i_final] - + summary.dc_internal_resistance.iloc[1] + ) + labels.append("internal_resistance_difference_cycles_2:100") + + # Nominal capacity + end = self.hyperparameters["n_nominal_cycles"] + X[20] = np.median(summary.discharge_capacity.iloc[0: end]) + labels.append("nominal_capacity_by_median") + + X.columns = labels + self.features = X \ No newline at end of file diff --git a/beep/features/early_cycles/hppc.py b/beep/features/early_cycles/hppc.py new file mode 100644 index 00000000..23c864f9 --- /dev/null +++ b/beep/features/early_cycles/hppc.py @@ -0,0 +1,83 @@ +import pandas as pd + +from beep import PROTOCOL_PARAMETERS_DIR +from beep.features import helper_functions +from beep.features.featurizer import BEEPEarlyCyclesFeaturizer + + +class HPPCResistanceVoltageFeatures(BEEPEarlyCyclesFeaturizer): + DEFAULT_HYPERPARAMETERS = { + "test_time_filter_sec": 1000000, + "cycle_index_filter": 6, + "diag_pos": 1, + "soc_window": 8, + "parameters_path": PROTOCOL_PARAMETERS_DIR + } + + def validate(self): + val, msg = helper_functions.check_diagnostic_validation(self.datapath) + if val: + conditions = [] + conditions.append( + any( + [ + "hppc" in x + for x in + self.datapath.diagnostic_summary.cycle_type.unique() + ] + ) + ) + if all(conditions): + return True, None + else: + return False, "HPPC conditions not met for this cycler run" + else: + return val, msg + + def create_features(self): + # Filter out low cycle numbers at the end of the test, corresponding to the "final" diagnostic + self.datapath.diagnostic_data = self.datapath.diagnostic_data[ + ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[ + 'test_time_filter_sec']) & + (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[ + 'cycle_index_filter'])) + ] + self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby( + ["cycle_index", "step_index", "step_index_counter"] + ).filter(lambda x: ~x["test_time"].isnull().all()) + + # diffusion features + diffusion_features = helper_functions.get_diffusion_features( + self.datapath, + ) + + hppc_r = pd.DataFrame() + # the 9 by 6 dataframe + df_dr = helper_functions.get_dr_df( + self.datapath, self.hyperparameters["diag_pos"] + ) + # transform this dataframe to be 1 by 54 + columns = df_dr.columns + for column in columns: + for r in range(len(df_dr[column])): + name = column + str(r) + hppc_r[name] = [df_dr[column][r]] + + # the variance of ocv features + hppc_ocv = helper_functions.get_hppc_ocv( + self.datapath, + self.hyperparameters["diag_pos"], + parameters_path=self.hyperparameters["parameters_path"] + ) + + # the v_diff features + v_diff = helper_functions.get_v_diff( + self.datapath, + self.hyperparameters["diag_pos"], + self.hyperparameters["soc_window"], + self.hyperparameters["parameters_path"] + ) + + # merge everything together as a final result dataframe + self.features = pd.concat( + [hppc_r, hppc_ocv, v_diff, diffusion_features], axis=1) \ No newline at end of file diff --git a/beep/features/early_cycles/summary.py b/beep/features/early_cycles/summary.py new file mode 100644 index 00000000..e17ceea6 --- /dev/null +++ b/beep/features/early_cycles/summary.py @@ -0,0 +1,393 @@ +import numpy as np +import pandas as pd +from scipy.stats import skew, kurtosis + +from beep.features import helper_functions +from beep import PROTOCOL_PARAMETERS_DIR +from beep.features.featurizer import BEEPEarlyCyclesFeaturizer + + +class CycleSummaryStats(BEEPEarlyCyclesFeaturizer): + DEFAULT_HYPERPARAMETERS = { + "cycle_comp_num": [10, 100], + "statistics": ["var", "min", "mean", "skew", "kurtosis", "abs", + "square"] + } + + def validate(self): + """ + This function determines if the input data has the necessary attributes for + creation of this feature class. It should test for all of the possible reasons + that feature generation would fail for this particular input data. + + Args: + processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + Returns: + bool: True/False indication of ability to proceed with feature generation + """ + + # TODO: not sure this is necessary + # Check for data in each of the selected cycles + index_1, index_2 = self.hyperparameters['cycle_comp_num'] + cycle_1 = self.datapath.structured_data[ + self.datapath.structured_data.cycle_index == index_1] + cycle_2 = self.datapath.structured_data[ + self.datapath.structured_data.cycle_index == index_2] + if len(cycle_1) == 0 or len(cycle_2) == 0: + return False, "Length of one or more comparison cycles is zero" + + # TODO: check whether this is good + # Check for relevant data + required_columns = [ + 'charge_capacity', + 'discharge_capacity', + 'charge_energy', + 'discharge_energy', + ] + pcycler_run_columns = self.datapath.structured_data.columns + if not all( + [column in pcycler_run_columns for column in required_columns]): + return False, f"Required column not present in all structured data " \ + f"(must have all of: {required_columns})" + + return True, None + + def create_features(self): + """ + Generate features listed in early prediction manuscript using both diagnostic and regular cycles + + Args: + processed_cycler_run (beep.structure.ProcessedCyclerRun) + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + parameters_path (str): Root directory storing project parameter files. + + Returns: + X (pd.DataFrame): Dataframe containing the feature + """ + + # TODO: extend this dataframe and uncomment energy features when + # structuring is refactored + X = pd.DataFrame(np.zeros((1, 28))) + + reg_cycle_comp_num = self.hyperparameters.get("cycle_comp_num") + cycle_comp_1 = self.datapath.structured_data[ + self.datapath.structured_data.cycle_index == reg_cycle_comp_num[1] + ] + cycle_comp_0 = self.datapath.structured_data[ + self.datapath.structured_data.cycle_index == reg_cycle_comp_num[0] + ] + Qc100_1 = cycle_comp_1[ + cycle_comp_1.step_type == "charge"].charge_capacity + Qc10_1 = cycle_comp_0[ + cycle_comp_0.step_type == "charge"].charge_capacity + QcDiff = Qc100_1.values - Qc10_1.values + QcDiff = QcDiff[~np.isnan(QcDiff)] + + X.loc[0, 0:6] = self.get_summary_statistics(QcDiff) + + Qd100_1 = cycle_comp_1[ + cycle_comp_1.step_type == "discharge"].discharge_capacity + Qd10_1 = cycle_comp_0[ + cycle_comp_0.step_type == "discharge"].discharge_capacity + QdDiff = Qd100_1.values - Qd10_1.values + QdDiff = QdDiff[~np.isnan(QdDiff)] + + X.loc[0, 7:13] = self.get_summary_statistics(QdDiff) + + # # Charging Energy features + Ec100_1 = cycle_comp_1[cycle_comp_1.step_type == "charge"].charge_energy + Ec10_1 = cycle_comp_0[cycle_comp_0.step_type == "charge"].charge_energy + EcDiff = Ec100_1.values - Ec10_1.values + EcDiff = EcDiff[~np.isnan(EcDiff)] + + X.loc[0, 14:20] = self.get_summary_statistics(EcDiff) + + # # Discharging Energy features + Ed100_1 = cycle_comp_1[ + cycle_comp_1.step_type == "charge"].discharge_energy + Ed10_1 = cycle_comp_0[ + cycle_comp_0.step_type == "charge"].discharge_energy + EdDiff = Ed100_1.values - Ed10_1.values + EdDiff = EdDiff[~np.isnan(EdDiff)] + + X.loc[0, 21:27] = self.get_summary_statistics(EdDiff) + + quantities = [ + "charging_capacity", + "discharging_capacity", + "charging_energy", + "discharging_energy", + ] + + X.columns = [y + "_" + x for x in quantities for y in + self.hyperparameters["statistics"]] + + self.features = X + + def get_summary_statistics(self, array): + """ + Static method for getting values corresponding + to standard 7 operations that many beep features + use, i.e. log of absolute value of each of + variance, min, mean, skew, kurtosis, the sum of + the absolute values and the sum of squares + + Args: + array (list, np.ndarray): array of values to get + standard operation values for, e.g. cycle + discharging capacity, QcDiff, etc. + + Returns: + [float]: list of features + + """ + + stats_names = self.hyperparameters["statistics"] + supported_stats = self.DEFAULT_HYPERPARAMETERS["statistics"] + + if any(s not in supported_stats for s in stats_names): + raise ValueError( + f"Unsupported statistics in {stats_names}: supported statistics are {supported_stats}") + + stats = [] + + if "var" in stats_names: + stats.append(np.log10(np.absolute(np.var(array)))) + if "min" in stats_names: + stats.append(np.log10(np.absolute(min(array)))) + if "mean" in stats_names: + stats.append(np.log10(np.absolute(np.mean(array)))) + if "skew" in stats_names: + stats.append(np.log10(np.absolute(skew(array)))) + if "kurtosis" in stats_names: + stats.append(np.log10( + np.absolute(kurtosis(array, fisher=False, bias=False)))) + if "abs" in stats_names: + stats.append(np.log10(np.sum(np.absolute(array)))) + if "square" in stats_names: + stats.append(np.log10(np.sum(np.square(array)))) + + return np.asarray(stats) + + +class DiagnosticSummaryStats(CycleSummaryStats): + """ + Object corresponding to summary statistics from a diagnostic cycle of + specific type. Includes constructors to create the features, object names + and metadata attributes in the object. Inherits from RegularCycleSummaryStats + to reuse standard feature generation + + name (str): predictor object name. + X (pandas.DataFrame): features in DataFrame format. + metadata (dict): information about the conditions, data + and code used to produce features + """ + DEFAULT_HYPERPARAMETERS = { + "test_time_filter_sec": 1000000, + "cycle_index_filter": 6, + "diagnostic_cycle_type": 'rpt_0.2C', + "diag_pos_list": [0, 1], + "statistics": ["var", "min", "mean", "skew", "kurtosis", "abs", + "square"], + "parameters_path": PROTOCOL_PARAMETERS_DIR + } + + def validate(self): + """ + This function determines if the input data has the necessary attributes for + creation of this feature class. It should test for all of the possible reasons + that feature generation would fail for this particular input data. + + Args: + processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + + Returns: + bool: True/False indication of ability to proceed with feature generation + """ + val, msg = helper_functions.check_diagnostic_validation(self.datapath) + if val: + df = self.datapath.diagnostic_summary + df = df[ + df.cycle_type == self.hyperparameters["diagnostic_cycle_type"]] + if df.cycle_index.nunique() >= max( + self.hyperparameters["diag_pos_list"]) + 1: + return True, None + else: + return False, "Diagnostic cycles insufficient for featurization" + else: + return val, msg + + def get_summary_diff( + self, + pos=None, + cycle_types=("rpt_0.2C", "rpt_1C", "rpt_2C"), + metrics=( + "discharge_capacity", "discharge_energy", "charge_capacity", + "charge_energy") + ): + """ + Helper function to calculate difference between summary values in the diagnostic cycles + + Args: + processed_cycler_run (beep.structure.ProcessedCyclerRun) + pos (list): position of the diagnostics to use in the calculation + cycle_types (list): calculate difference for these diagnostic types + metrics (str): Calculate difference for these metrics + + Returns: + values (list): List of difference values to insert into the dataframe + names (list): List of column headers to use in the creation of the dataframe + """ + pos = self.hyperparameters["diag_pos_list"] if not pos else pos + + values = [] + names = [] + for cycle_type in cycle_types: + diag_type_summary = self.datapath.diagnostic_summary[ + self.datapath.diagnostic_summary.cycle_type == cycle_type] + for metric in metrics: + diff = (diag_type_summary.iloc[pos[1]][metric] - + diag_type_summary.iloc[pos[0]][metric]) \ + / diag_type_summary.iloc[pos[0]][metric] + values.append(diff) + names.append("diag_sum_diff_" + str(pos[0]) + "_" + str( + pos[1]) + "_" + cycle_type + metric) + return values, names + + def create_features(self): + """ + Generate features listed in early prediction manuscript using both diagnostic and regular cycles + + Args: + self.datapathn (beep.structure.ProcessedCyclerRun) + self.hyperparameters (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + parameters_path (str): Root directory storing project parameter files. + + Returns: + X (pd.DataFrame): Dataframe containing the feature + """ + # Filter out "final" diagnostic cycles that have been appended to the end of the file with the wrong + # cycle number(test time is monotonic) + self.datapath.diagnostic_data = self.datapath.diagnostic_data[ + ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[ + 'test_time_filter_sec']) & + (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[ + 'cycle_index_filter'])) + ] + self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby( + ["cycle_index", "step_index", "step_index_counter"] + ).filter(lambda x: ~x["test_time"].isnull().all()) + + diag_intrp = self.datapath.diagnostic_data + + X = pd.DataFrame(np.zeros((1, 54))) + + # Calculate the cycles and the steps for the selected diagnostics + cycles = diag_intrp.cycle_index[diag_intrp.cycle_type == + self.hyperparameters[ + "diagnostic_cycle_type"]].unique() + step_dict_0 = helper_functions.get_step_index( + self.datapath, + cycle_type=self.hyperparameters["diagnostic_cycle_type"], + diag_pos=self.hyperparameters["diag_pos_list"][0], + parameters_path=self.hyperparameters["parameters_path"] + ) + step_dict_1 = helper_functions.get_step_index( + self.datapath, + cycle_type=self.hyperparameters["diagnostic_cycle_type"], + diag_pos=self.hyperparameters["diag_pos_list"][1], + parameters_path=self.hyperparameters["parameters_path"] + ) + + # Create masks for each position in the data + mask_pos_0_charge = ((diag_intrp.cycle_index == cycles[ + self.hyperparameters["diag_pos_list"][0]]) & + (diag_intrp.step_index == step_dict_0[ + self.hyperparameters[ + "diagnostic_cycle_type"] + '_charge'])) + mask_pos_1_charge = ((diag_intrp.cycle_index == cycles[ + self.hyperparameters["diag_pos_list"][1]]) & + (diag_intrp.step_index == step_dict_1[ + self.hyperparameters[ + "diagnostic_cycle_type"] + '_charge'])) + mask_pos_0_discharge = ((diag_intrp.cycle_index == cycles[ + self.hyperparameters["diag_pos_list"][0]]) & + (diag_intrp.step_index == + step_dict_0[self.hyperparameters[ + "diagnostic_cycle_type"] + '_discharge'])) + mask_pos_1_discharge = ((diag_intrp.cycle_index == cycles[ + self.hyperparameters["diag_pos_list"][1]]) & + (diag_intrp.step_index == + step_dict_1[self.hyperparameters[ + "diagnostic_cycle_type"] + '_discharge'])) + + # Charging Capacity features + Qc_1 = diag_intrp.charge_capacity[mask_pos_1_charge] + Qc_0 = diag_intrp.charge_capacity[mask_pos_0_charge] + QcDiff = Qc_1.values - Qc_0.values + QcDiff = QcDiff[~np.isnan(QcDiff)] + + X.loc[0, 0:6] = self.get_summary_statistics(QcDiff) + + # Discharging Capacity features + Qd_1 = diag_intrp.discharge_capacity[mask_pos_1_discharge] + Qd_0 = diag_intrp.discharge_capacity[mask_pos_0_discharge] + QdDiff = Qd_1.values - Qd_0.values + QdDiff = QdDiff[~np.isnan(QdDiff)] + + X.loc[0, 7:13] = self.get_summary_statistics(QdDiff) + + # Charging Energy features + Ec_1 = diag_intrp.charge_energy[mask_pos_1_charge] + Ec_0 = diag_intrp.charge_energy[mask_pos_0_charge] + EcDiff = Ec_1.values - Ec_0.values + EcDiff = EcDiff[~np.isnan(EcDiff)] + + X.loc[0, 14:20] = self.get_summary_statistics(EcDiff) + + # Discharging Energy features + Ed_1 = diag_intrp.discharge_energy[mask_pos_1_discharge] + Ed_0 = diag_intrp.discharge_energy[mask_pos_0_discharge] + EdDiff = Ed_1.values - Ed_0.values + EdDiff = EdDiff[~np.isnan(EdDiff)] + + X.loc[0, 21:27] = self.get_summary_statistics(EdDiff) + + # Charging dQdV features + dQdVc_1 = diag_intrp.charge_dQdV[mask_pos_1_charge] + dQdVc_0 = diag_intrp.charge_dQdV[mask_pos_0_charge] + dQdVcDiff = dQdVc_1.values - dQdVc_0.values + dQdVcDiff = dQdVcDiff[~np.isnan(dQdVcDiff)] + + X.loc[0, 28:34] = self.get_summary_statistics(dQdVcDiff) + + # Discharging Capacity features + dQdVd_1 = diag_intrp.discharge_dQdV[mask_pos_1_discharge] + dQdVd_0 = diag_intrp.discharge_dQdV[mask_pos_0_discharge] + dQdVdDiff = dQdVd_1.values - dQdVd_0.values + dQdVdDiff = dQdVdDiff[~np.isnan(dQdVdDiff)] + + X.loc[0, 35:41] = self.get_summary_statistics(dQdVdDiff) + + X.loc[0, 42:53], names = self.get_summary_diff( + self.hyperparameters["diag_pos_list"] + ) + + quantities = [ + "charging_capacity", + "discharging_capacity", + "charging_energy", + "discharging_energy", + "charging_dQdV", + "discharging_dQdV", + ] + + X.columns = [y + "_" + x for x in quantities for y in + self.hyperparameters["statistics"]] + names + self.features = X diff --git a/beep/features/early_cycles/targets.py b/beep/features/early_cycles/targets.py new file mode 100644 index 00000000..aecd7e80 --- /dev/null +++ b/beep/features/early_cycles/targets.py @@ -0,0 +1,262 @@ +import numpy as np +import pandas as pd +from scipy.interpolate import interp1d + +from beep import PROTOCOL_PARAMETERS_DIR +from beep.features import helper_functions + +from beep.features.featurizer import BEEPFeaturizer, BEEPFeaturizationError + + +class TrajectoryFastCharge(BEEPFeaturizer): + """ + Object corresponding to cycle numbers at which the capacity drops below + specific percentages of the initial capacity. Computed on the discharge + portion of the regular fast charge cycles. + + """ + + DEFAULT_HYPERPARAMETERS = { + "thresh_max_cap": 0.98, + "thresh_min_cap": 0.78, + "interval_cap": 0.03 + } + + def validate(self): + """ + This function determines if the input data has the necessary attributes for + creation of this feature class. It should test for all of the possible reasons + that feature generation would fail for this particular input data. + + Args: + processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + Returns: + bool: True/False indication of ability to proceed with feature generation + """ + cap = self.datapath.structured_summary.discharge_capacity + cap_ratio = cap.min() / cap.max() + max_cap = self.hyperparameters["thresh_max_cap"] + if not cap_ratio < max_cap: + return False, f"thresh_max_cap hyperparameter exceeded: {cap_ratio} !< {max_cap}" + else: + return True, None + + def create_features(self): + """ + Calculate the outcomes from the input data. In particular, the number of cycles + where we expect to reach certain thresholds of capacity loss + Args: + processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + parameters_path (str): Root directory storing project parameter files. + + Returns: + pd.DataFrame: cycles at which capacity/energy degradation exceeds thresholds + """ + y = self.datapath.capacities_to_cycles( + self.hyperparameters["thresh_max_cap"], + self.hyperparameters["thresh_min_cap"], + self.hyperparameters["interval_cap"], + ) + self.features = y + + +class DiagnosticProperties(BEEPFeaturizer): + """ + This class stores fractional levels of degradation in discharge capacity and discharge energy + relative to the first cycle at each diagnostic cycle, grouped by diagnostic cycle type. + + name (str): predictor object name. + X (pandas.DataFrame): features in DataFrame format. + metadata (dict): information about the conditions, data + and code used to produce features + + Hyperparameters: + parameters_dir (str): Full path to directory of parameters to analyse the + diagnostic cycles + quantities ([str]): Quantities to extract/get fractional metrics for + diagnostic cycles + cycle_type (str): Type of diagnostic cycle being used to measure the + fractional metric + metric (str): The metric being used for fractional capacity + interpolation_axes (list): List of column names to use for + x_axis interpolation (distance to threshold) + threshold (float): Value for the fractional metric to be considered above + or below threshold + filter_kinks (float): If set, cutoff value for the second derivative of + the fractional metric (cells with an abrupt change in degradation + rate might have something else going on). Typical value might be 0.04 + extrapolate_threshold (bool): Should threshold crossing point be + extrapolated for cells that have not yet reached the threshold + (warning: this uses a linear extrapolation from the last two + diagnostic cycles) + """ + DEFAULT_HYPERPARAMETERS = { + "parameters_dir": PROTOCOL_PARAMETERS_DIR, + "quantities": ['discharge_energy', 'discharge_capacity'], + "threshold": 0.8, + "metric": "discharge_energy", + "filter_kinks": None, + "interpolation_axes": ["normalized_regular_throughput", "cycle_index"], + "cycle_type": "rpt_1C", + "extrapolate_threshold": True + } + + def validate(self): + """ + This function determines if the input data has the necessary attributes for + creation of this feature class. It should test for all of the possible reasons + that feature generation would fail for this particular input data. + + Args: + processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + Returns: + bool: True/False indication of ability to proceed with feature generation + """ + return helper_functions.check_diagnostic_validation(self.datapath) + + def create_features(self): + """ + Generates diagnostic-property features from processed cycler run, including values for n*x method + Args: + self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + parameters_path (str): Root directory storing project parameter files. + + Returns: + pd.DataFrame: with "cycle_index", "fractional_metric", "x", "n", "cycle_type" and "metric" columns, rows + for each diagnostic cycle of the cell + """ + + parameters_path = self.hyperparameters["parameters_dir"] + + cycle_types = self.datapath.diagnostic_summary.cycle_type.unique() + X = pd.DataFrame() + for quantity in self.hyperparameters["quantities"]: + for cycle_type in cycle_types: + summary_diag_cycle_type = helper_functions.get_fractional_quantity_remaining_nx( + self.datapath, quantity, cycle_type, + parameters_path=parameters_path + ) + + summary_diag_cycle_type.loc[:, "cycle_type"] = cycle_type + summary_diag_cycle_type.loc[:, "metric"] = quantity + X = X.append(summary_diag_cycle_type) + + X_condensed = self.get_threshold_targets(X) + self.features = X_condensed + + def get_threshold_targets(self, df): + """ + Apply a threshold via interpolation for determining various + metrics (e.g., discharge energy) from diagnostic cycles. + + Args: + df (pd.DataFrame): A dataframe of diagnostic cycle data + for a single battery cycler run. + + Returns: + (pd.DataFrame): Contains a vector for interpolated/intercept + data for determining threshold. + + """ + cycle_type = self.hyperparameters["cycle_type"] + metric = self.hyperparameters["metric"] + interpolation_axes = self.hyperparameters["interpolation_axes"] + threshold = self.hyperparameters["threshold"] + filter_kinks = self.hyperparameters["filter_kinks"] + extrapolate_threshold = self.hyperparameters["extrapolate_threshold"] + + if filter_kinks: + if np.any(df['fractional_metric'].diff().diff() < filter_kinks): + last_good_cycle = df[ + df['fractional_metric'].diff().diff() < filter_kinks][ + 'cycle_index'].min() + df = df[df['cycle_index'] < last_good_cycle] + + x_axes = [] + for type in interpolation_axes: + x_axes.append(df[type]) + y_interpolation_axis = df['fractional_metric'] + + # Logic around how to deal with cells that have not crossed threshold + if df['fractional_metric'].min() > threshold and \ + not extrapolate_threshold: + BEEPFeaturizationError( + "DiagnosticProperties data has not crossed threshold " + "and extrapolation inaccurate" + ) + elif df['fractional_metric'].min() > threshold and \ + extrapolate_threshold: + fill_value = "extrapolate" + bounds_error = False + x_linspaces = [] + for x_axis in x_axes: + y1 = y_interpolation_axis.iloc[-2] + y2 = y_interpolation_axis.iloc[-1] + x1 = x_axis.iloc[-2] + x2 = x_axis.iloc[-1] + x_thresh_extrap = (threshold - 0.1 - y1) * (x2 - x1) / ( + y2 - y1) + x1 + x_linspaces.append( + np.linspace(x_axis.min(), x_thresh_extrap, num=1000) + ) + else: + fill_value = np.nan + bounds_error = True + x_linspaces = [] + for x_axis in x_axes: + x_linspaces.append( + np.linspace(x_axis.min(), x_axis.max(), num=1000)) + + f_axis = [] + for x_axis in x_axes: + f_axis.append( + interp1d( + x_axis, + y_interpolation_axis, + kind='linear', + bounds_error=bounds_error, + fill_value=fill_value + ) + ) + + x_to_threshold = [] + for indx, x_linspace in enumerate(x_linspaces): + crossing_array = abs(f_axis[indx](x_linspace) - threshold) + x_to_threshold.append(x_linspace[np.argmin(crossing_array)]) + + if ~(x_to_threshold[0] > 0) or ~(x_to_threshold[1] > 0): + raise BEEPFeaturizationError( + "DiagnosticProperties data does not have a positive value " + "to threshold" + ) + + if "normalized_regular_throughput" in interpolation_axes: + real_throughput_to_threshold = x_to_threshold[ + interpolation_axes.index( + "normalized_regular_throughput")] * \ + df[ + 'initial_regular_throughput'].values[ + 0] + x_to_threshold.append(real_throughput_to_threshold) + interpolation_axes = interpolation_axes + [ + "real_regular_throughput"] + + threshold_dict = { + 'initial_regular_throughput': + df['initial_regular_throughput'].values[0], + } + + for indx, x_axis in enumerate(interpolation_axes): + threshold_dict[ + cycle_type + metric + str(threshold) + '_' + x_axis] = [ + x_to_threshold[indx]] + + return pd.DataFrame(threshold_dict) \ No newline at end of file diff --git a/beep/features/featurizer.py b/beep/features/featurizer.py new file mode 100644 index 00000000..2fb1f181 --- /dev/null +++ b/beep/features/featurizer.py @@ -0,0 +1,233 @@ +# Copyright [2020] [Toyota Research Institute] +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +For creating features and organizing them into datasets. + +""" +import os +import abc +import json +from typing import Union, Tuple + +import pandas as pd +from monty.io import zopen +from monty.json import MSONable +from monty.serialization import dumpfn + +from beep.structure.base import BEEPDatapath + + +class BEEPFeaturizationError(BaseException): + """Raise when a featurization-specific error occurs""" + pass + + +class BEEPFeaturizer(MSONable, abc.ABC): + """ + Base class for all beep feature generation. + + From a structured battery file representing many cycles of one cell, + (AKA a structured datapath), produce a feature vector. + + Works for generating both + - Vectors X to use as training vectors + - Vectors or scalars y to use as ML targets + (as problems may have multiple metrics to predict) + + """ + + DEFAULT_HYPERPARAMETERS = {} + + def __init__(self, structured_datapath: Union[BEEPDatapath, None], + hyperparameters: Union[dict, None] = None): + # If all required hyperparameters are specified, use those + # If some subset of required hyperparameters are specified, throw error + # If no hyperparameters are specified, use defaults + if hyperparameters: + if all(k in hyperparameters for k in self.DEFAULT_HYPERPARAMETERS): + self.hyperparameters = hyperparameters + else: + raise BEEPFeaturizationError( + f"Features cannot be created with incomplete set of " + f"hyperparameters {hyperparameters.keys()} < " + f"{self.DEFAULT_HYPERPARAMETERS.keys()}!") + else: + self.hyperparameters = self.DEFAULT_HYPERPARAMETERS + + if structured_datapath is not None and not structured_datapath.is_structured: + raise BEEPFeaturizationError( + "BEEPDatapath input is not structured!") + self.datapath = structured_datapath + + self.features = None + + # In case these features are loaded from file + # Allow attrs which can hold relevant metadata without having + # to reload the original datapath + self.paths = self.datapath.paths if self.datapath else {} + self.metadata = self.datapath.metadata.raw if self.datapath else {} + self.linked_semiunique_id = self.datapath.semiunique_id if self.datapath else None + + @abc.abstractmethod + def validate(self) -> Tuple[bool, Union[str, None]]: + """ + Validate a featurizer on it's ingested datapath. + + Returns: + (bool, str/None): The validation result and it's message. + + """ + raise NotImplementedError + + @abc.abstractmethod + def create_features(self) -> None: + """ + Should assign a dataframe to self.features. + + Returns: + None + """ + raise NotImplementedError + + def as_dict(self): + """Serialize a BEEPDatapath as a dictionary. + + Must not be loaded from legacy. + + Returns: + (dict): corresponding to dictionary for serialization. + + """ + + if self.features is None: + raise BEEPFeaturizationError( + "Cannot serialize features which have not been generated.") + + features = self.features.to_dict("list") + + return { + "@module": self.__class__.__module__, + "@class": self.__class__.__name__, + + # Core parts of BEEPFeaturizer + "features": features, + "hyperparameters": self.hyperparameters, + "paths": self.paths, + "metadata": self.metadata, + "linked_datapath_semiunique_id": self.linked_semiunique_id + } + + @classmethod + def from_dict(cls, d): + """Create a BEEPDatapath object from a dictionary. + + Args: + d (dict): dictionary represenation. + + Returns: + beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun. + """ + + # no need for original datapath + bf = cls(structured_datapath=None, hyperparameters=d["hyperparameters"]) + bf.features = pd.DataFrame(d["features"]) + bf.paths = d["paths"] + bf.metadata = d["metadata"] + bf.linked_semiunique_id = d["linked_datapath_semiunique_id"] + return bf + + @classmethod + def from_json_file(cls, filename): + """Load a structured run previously saved to file. + + .json.gz files are supported. + + Loads a BEEPFeaturizer from json. + + Can be used in combination with files serialized with BEEPFeatures.to_json_file. + + Args: + filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file. + + Returns: + None + """ + with zopen(filename, "r") as f: + d = json.load(f) + + # Add this structured file path to the paths dict + paths = d.get("paths", {}) + paths["features"] = os.path.abspath(filename) + d["paths"] = paths + return cls.from_dict(d) + + def to_json_file(self, filename): + """Save a BEEPFeatures to disk as a json. + + .json.gz files are supported. + + Not named from_json to avoid conflict with MSONable.from_json(*) + + Args: + filename (str, Pathlike): The filename to save the file to. + omit_raw (bool): If True, saves only structured (NOT RAW) data. + More efficient for saving/writing to disk. + + Returns: + None + """ + d = self.as_dict() + dumpfn(d, filename) + + +class BEEPEarlyCyclesFeaturizer(BEEPFeaturizer): + """Base class for featurizers that return a constant number of features + for any number of cycles in a structured datapath. + + These features are typically used for early prediction. + + A BEEPEarlyCyclesFeaturizer always returns the same number of features + for files for datapaths with any number of samples. Thus, + + + [Datapath w/ 2 cycles] ---> (vector of k features) + + [Datapath w/ 100 cycles] ---> (vector of k features) + """ + PER_CYCLE = False + + +class BEEPPerCycleFeaturizer(BEEPFeaturizer): + """Base class for featurizers that return a vector of features for + EACH cycle in a structured datapath. + + These features are generally used for analysis + + A BEEPPerCycleFeaturizer always returns an (n x k) matrix of features + for datapaths with n cycles each producing k features. Thus, + + [Datapath w/ 2 cycles] ---> (2 x k feature matrix) + + [Datapath w/ 100 cycles] ---> (100 x k feature matrix) + + """ + PER_CYCLE = True + SPECIAL_COLUMNS = ("cycle_index", "diag_pos") + + + + + + diff --git a/beep/features/featurizer_helpers.py b/beep/features/helper_functions.py similarity index 100% rename from beep/features/featurizer_helpers.py rename to beep/features/helper_functions.py diff --git a/beep/features/intracell/__init__.py b/beep/features/intracell/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/beep/features/intracell_analysis.py b/beep/features/intracell/intracell_analysis.py similarity index 100% rename from beep/features/intracell_analysis.py rename to beep/features/intracell/intracell_analysis.py diff --git a/beep/features/intracell/intracell_analysisv2.py b/beep/features/intracell/intracell_analysisv2.py new file mode 100644 index 00000000..bf42f9b7 --- /dev/null +++ b/beep/features/intracell/intracell_analysisv2.py @@ -0,0 +1,1410 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib import cm +from scipy.interpolate import interp1d +from scipy.spatial import distance +from scipy.optimize import differential_evolution + + +class IntracellAnalysisV2: + # IA constants + FC_UPPER_VOLTAGE = 4.20 + FC_LOWER_VOLTAGE = 2.70 + NE_UPPER_VOLTAGE = 0.01 + NE_LOWER_VOLTAGE = 1.50 + PE_UPPER_VOLTAGE = 4.30 + PE_LOWER_VOLTAGE = 2.86 + THRESHOLD = 4.84 * 0.0 + + def __init__(self, + pe_pristine_files_dict={}, + pe_pristine_usecols=['Ecell/V','Capacity/mA.h', + 'SOC_aligned','c_rate', + 'BVV_c_rate','Voltage_aligned'], + ne_1_pristine_files_dict={}, + ne_1_pristine_usecols=['Ecell/V','Capacity/mA.h', + 'SOC_aligned','c_rate', + 'BVV_c_rate','Voltage_aligned'], + Q_fc_nom=4.84, + C_nom=-0.2, + cycle_type='rpt_0.2C', + step_type=0, + error_type='V-Q', + error_weighting='uniform', + dvdq_bound=None, + ne_2pos_file=None, + ne_2neg_file=None + ): + """ + Invokes the cell electrode analysis class. This is a class designed to fit the cell and electrode + parameters in order to determine changes of electrodes within the full cell from only full cell cycling data. + Args: + pe_pristine_dict (str): dictionary for the half cell rate data of the pristine (uncycled) positive + electrode. Keys are the rate and the entries are dataframes of Voltage vs. SOC. + ne_1_pristine_dict (str): dictionary for the half cell rate data of the pristine (uncycled) negative + electrode. Keys are the rate and the entris are dataframes of Voltage vs. SOC. + cycle_type (str): type of diagnostic cycle for the fitting + step_type (int): charge or discharge (0 for charge, 1 for discharge) + error_type (str): defines which error metric is to be used + ne_2neg_file (str): file name of the data for the negative component of the anode + ne_2pos_file (str): file name of the data for the positive component of the anode + """ + + self.pe_pristine_files_dict = pe_pristine_files_dict + self.pe_pristine_dict = {} + [self.pe_pristine_dict.update( + {key:pd.read_csv(pe_pristine_files_dict[key], + usecols=pe_pristine_usecols) + }) for key in pe_pristine_files_dict] + + self.ne_1_pristine_files_dict = ne_1_pristine_files_dict + self.ne_1_pristine_dict = {} + [self.ne_1_pristine_dict.update( + {key:pd.read_csv(ne_1_pristine_files_dict[key], + usecols=ne_1_pristine_usecols) + }) for key in ne_1_pristine_files_dict] + + + + self.Q_fc_nom = Q_fc_nom + self.C_nom = C_nom + + if ne_2neg_file and ne_2pos_file: + self.ne_2_pristine_pos = pd.read_csv(ne_2pos_file) + self.ne_2_pristine_neg = pd.read_csv(ne_2neg_file) + else: + self.ne_2_pristine_pos = pd.DataFrame() + self.ne_2_pristine_neg = pd.DataFrame() + + if step_type == 0: + self.capacity_col = 'charge_capacity' + else: + self.capacity_col = 'discharge_capacity' + + self.cycle_type = cycle_type + self.step_type = step_type + self.error_type = error_type + self.error_weighting = error_weighting + self.dvdq_bound = dvdq_bound + + + def process_beep_cycle_data_for_candidate_halfcell_analysis_ah(self, + cell_struct, + cycle_index): + """ + Ingests BEEP structured cycling data and cycle_index and returns + a Dataframe of evenly spaced capacity with corresponding voltage. + + Inputs: + cell_struct (MaccorDatapath): BEEP structured cycling data + cycle_index (int): cycle number at which to evaluate + + Outputs: + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned (evenly spaced) + and Voltage_aligned + """ + + # filter the data down to the diagnostic type of interest + diag_type_cycles = cell_struct.diagnostic_data.loc[cell_struct.diagnostic_data['cycle_type'] == self.cycle_type] + real_cell_candidate_profile = diag_type_cycles.loc[ + (diag_type_cycles.cycle_index == cycle_index) + & (diag_type_cycles.step_type == self.step_type) # step_type = 0 is charge, 1 is discharge + & (diag_type_cycles.voltage < self.FC_UPPER_VOLTAGE) + & (diag_type_cycles[self.capacity_col] > 0)][['voltage', self.capacity_col]] + + # modify discharge data to match the V-Q directionality of charge data (increasing capacity w/ increasing voltage) + if self.step_type == 1: + real_cell_candidate_profile[self.capacity_col] = np.nanmax(real_cell_candidate_profile[self.capacity_col]) - real_cell_candidate_profile[self.capacity_col].copy() +# real_cell_candidate_profile['voltage'] = np.flipud(real_cell_candidate_profile['voltage'].copy()) + + + # renaming capacity,voltage column + real_cell_candidate_profile['Q'] = real_cell_candidate_profile[self.capacity_col] + + real_cell_candidate_profile['Voltage'] = real_cell_candidate_profile['voltage'] + real_cell_candidate_profile.drop('voltage', axis=1, inplace=True) + + # interpolate voltage along evenly spaced capacity axis + Q_vec = np.linspace(0, np.max(real_cell_candidate_profile['Q']),1001) + + real_cell_candidate_profile_aligned = pd.DataFrame() + real_cell_candidate_profile_interper = interp1d(real_cell_candidate_profile['Q'], + real_cell_candidate_profile['Voltage'], + bounds_error=False, + fill_value=(self.FC_LOWER_VOLTAGE, self.FC_UPPER_VOLTAGE)) + real_cell_candidate_profile_aligned['Voltage_aligned'] = real_cell_candidate_profile_interper( + Q_vec) + + real_cell_candidate_profile_aligned['Q_aligned'] = Q_vec + + return real_cell_candidate_profile_aligned + + def _impose_electrode_scale(self, + pe_pristine=pd.DataFrame(), + ne_1_pristine=pd.DataFrame(), + ne_2_pristine_pos=pd.DataFrame(), + ne_2_pristine_neg=pd.DataFrame(), + lli=0.0, Q_pe=0.0, Q_ne=0.0, x_ne_2=0.0): + + """ + Scales the reference electrodes according to specified capacities and + offsets their capacities according to lli. Blends negative electrode materials. + + Inputs: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + lli (float): Loss of Lithium Inventory - capacity of the misalignment between + cathode and anode zero-capacity + Q_pe (float): capacity of the positive electrode (cathode) + Q_ne (float): capacity of the negative electrode (anode) + x_ne_2 (float): fraction of ne_2_pristine_pos or ne_2_pristine_neg + (positive or negative value, respectively) to ne_1_pristine + + Outputs: + pe_degraded (Dataframe): positive electrode with imposed capacity + scale to emulate degradation + ne_degraded (Dataframe): negative electrode with imposed capacity + scale and capacity offset to emulate degradation + """ + + # Blend negative electrodes + ne_pristine = blend_electrodes(ne_1_pristine, ne_2_pristine_pos, ne_2_pristine_neg, x_ne_2) + + # rescaling pristine electrodes to Q_pe and Q_ne + pe_Q_scaled = pe_pristine.copy() + pe_Q_scaled['Q_aligned'] = (pe_Q_scaled['SOC_aligned']/np.nanmax(pe_Q_scaled['SOC_aligned']))*Q_pe + ne_Q_scaled = ne_pristine.copy() + ne_Q_scaled['Q_aligned'] = (ne_Q_scaled['SOC_aligned']/np.nanmax(ne_Q_scaled['SOC_aligned']))*Q_ne + + # translate pristine ne electrode with lli + ne_Q_scaled['Q_aligned'] = ne_Q_scaled['Q_aligned'] + lli + + # Re-interpolate to align dataframes for differencing + lower_Q = np.min((np.min(pe_Q_scaled['Q_aligned']), + np.min(ne_Q_scaled['Q_aligned']))) + upper_Q = np.max((np.max(pe_Q_scaled['Q_aligned']), + np.max(ne_Q_scaled['Q_aligned']))) + Q_vec = np.linspace(lower_Q, upper_Q, 1001) + + # Actually aligning the electrode Q's + pe_pristine_interper = interp1d(pe_Q_scaled['Q_aligned'], + pe_Q_scaled['Voltage_aligned'], bounds_error=False) + pe_degraded = pe_Q_scaled.copy() + pe_degraded['Q_aligned'] = Q_vec + pe_degraded['Voltage_aligned'] = pe_pristine_interper(Q_vec) + + + ne_pristine_interper = interp1d(ne_Q_scaled['Q_aligned'], + ne_Q_scaled['Voltage_aligned'], bounds_error=False) + ne_degraded = ne_Q_scaled.copy() + ne_degraded['Q_aligned'] = Q_vec + ne_degraded['Voltage_aligned'] = ne_pristine_interper(Q_vec) + + # Returning pe and ne degraded on an Ah basis + return pe_degraded, ne_degraded + + def halfcell_degradation_matching_ah(self, x, *params): + """ + Calls underlying functions to impose degradation through electrode + capacity scale and alignment through LLI. Modifies emulated full cell + data to be within full cell voltage range and calibrates (zeros) capacity + at the lowest permissible voltage. Interpolates real and emulated data onto + a common capacity axis. + + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + + Outputs: + pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, + offset, and aligned along capacity + ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, + offset, and aligned along capacity + + df_real_aligned (Dataframe): capacity and voltage interpolated evenly across + capacity for the real cell data + emulated_full_cell_aligned (Dataframe): capacity and voltage interpolated evenly + across capacity for the emulated cell data + """ + + lli = x[0] + Q_pe = x[1] + Q_ne = x[2] + IR_coef_pe = x[3] + IR_coef_ne = x[4] + x_ne_2 = x[5] + + # Q_fc_nom (scalar) : nominal capacity of pristine cell + # C_nom (scalar) : nominal C-rate of pristine cell + Q_fc_nom = self.Q_fc_nom + C_nom = self.C_nom + + pe_pristine_dict, ne_1_pristine_dict, ne_2_pristine_pos, ne_2_pristine_neg, real_cell_candidate_profile_aligned= params + + if len(pe_pristine_dict) == 1: + pe_pristine = pe_pristine_dict[list(pe_pristine_dict.keys())[0]] + elif len(pe_pristine_dict) > 1: + pe_at_I_eff, R_at_SOC_pe = ( + self._get_effective_electrode_rate_data(pe_pristine_dict, Q_pe, Q_fc_nom, C_nom, IR_coef_pe) + ) + pe_pristine = pe_at_I_eff # the effective rate profile is now the reference profile at pristine state + else: + raise ValueError('No dictionary entries in pe_pristine_dict') + + if len(ne_1_pristine_dict) == 1: + ne_1_pristine = ne_1_pristine_dict[list(ne_1_pristine_dict.keys())[0]] + elif len(ne_1_pristine_dict) > 1: + + ne_at_I_eff, R_at_SOC_ne = ( + self._get_effective_electrode_rate_data( ne_1_pristine_dict, Q_ne, Q_fc_nom, C_nom, IR_coef_ne) + ) + ne_1_pristine = ne_at_I_eff # the effective rate profile is now the reference profile at pristine state + else: + raise ValueError('No dictionary entries in ne_pristine_dict') + + # output degraded ne and pe (on a AH basis, with electrode alignment (NaNs for voltage, when no capacity actually at the corresponding capacity index)) + pe_out, ne_out = self._impose_electrode_scale(pe_pristine, ne_1_pristine, + ne_2_pristine_pos, ne_2_pristine_neg, + lli, Q_pe, + Q_ne, x_ne_2) + + # PE - NE = full cell voltage + emulated_full_cell_with_degradation = pd.DataFrame() + emulated_full_cell_with_degradation['Q_aligned'] = pe_out['Q_aligned'].copy() + emulated_full_cell_with_degradation['Voltage_aligned'] = pe_out['Voltage_aligned'] - ne_out['Voltage_aligned'] + + # Replace emulated full cell values outside of voltage range with NaN + emulated_full_cell_with_degradation['Voltage_aligned'].loc[ + emulated_full_cell_with_degradation['Voltage_aligned'] < self.FC_LOWER_VOLTAGE] = np.nan + emulated_full_cell_with_degradation['Voltage_aligned'].loc[ + emulated_full_cell_with_degradation['Voltage_aligned'] > self.FC_UPPER_VOLTAGE] = np.nan + + ## Center the emulated full cell and half cell curves onto the same Q at which the real (degraded) capacity measurement started (self.FC_LOWER_VOLTAGE) + emulated_full_cell_with_degradation_zeroed = pd.DataFrame() + + emulated_full_cell_with_degradation_zeroed['Voltage_aligned'] = emulated_full_cell_with_degradation[ + 'Voltage_aligned'].copy() + + zeroing_value = emulated_full_cell_with_degradation['Q_aligned'].loc[ + np.nanargmin(emulated_full_cell_with_degradation['Voltage_aligned']) + ] + + emulated_full_cell_with_degradation_zeroed['Q_aligned'] = \ + (emulated_full_cell_with_degradation['Q_aligned'].copy() - zeroing_value) + + pe_out_zeroed = pe_out.copy() + pe_out_zeroed['Q_aligned'] = pe_out['Q_aligned'] - zeroing_value + + ne_out_zeroed = ne_out.copy() + ne_out_zeroed['Q_aligned'] = ne_out['Q_aligned'] - zeroing_value + + # Interpolate full cell profiles across same Q range + min_Q = np.min( + real_cell_candidate_profile_aligned['Q_aligned'].loc[ + ~real_cell_candidate_profile_aligned['Voltage_aligned'].isna()]) + max_Q = np.max( + (real_cell_candidate_profile_aligned['Q_aligned'].loc[ + ~real_cell_candidate_profile_aligned['Voltage_aligned'].isna()].max(), + emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[ + ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()].max()) + ) + emulated_interper = interp1d(emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[ + ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()], + emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].loc[ + ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()], + bounds_error=False) + real_interper = interp1d( + real_cell_candidate_profile_aligned['Q_aligned'].loc[ + ~real_cell_candidate_profile_aligned['Voltage_aligned'].isna()], + real_cell_candidate_profile_aligned['Voltage_aligned'].loc[ + ~real_cell_candidate_profile_aligned['Voltage_aligned'].isna()], + bounds_error=False) + + Q_vec = np.linspace(min_Q, max_Q, 1001) + + emulated_aligned = pd.DataFrame() + emulated_aligned['Q_aligned'] = Q_vec + emulated_aligned['Voltage_aligned'] = emulated_interper(Q_vec) + + real_aligned = pd.DataFrame() + real_aligned['Q_aligned'] = Q_vec + real_aligned['Voltage_aligned'] = real_interper(Q_vec) + + return pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned + + def get_dQdV_over_V_from_degradation_matching_ah(self, x, *params): + """ + This function imposes degradation scaling ,then outputs the dQdV representation of the emulated cell data. + + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + + Outputs: + pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, + offset, and aligned along capacity + ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, + offset, and aligned along capacity + dq_dv_over_v_real (Dataframe): dQdV across voltage for the real cell data + dq_dv_over_v_emulated (Dataframe): dQdV across voltage for the emulated cell data + df_real_interped (Dataframe): capacity and voltage interpolated evenly across + capacity for the real cell data + emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly + across capacity for the emulated cell data + """ + + pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped = \ + self.halfcell_degradation_matching_ah(x, *params) + + # Calculate dQdV from full cell profiles + dq_dv_real = pd.DataFrame(np.gradient(df_real_interped['Q_aligned'], df_real_interped['Voltage_aligned']), + columns=['dQdV']).ewm(0.1).mean() + dq_dv_emulated = pd.DataFrame( + np.gradient(emulated_full_cell_interped['Q_aligned'], emulated_full_cell_interped['Voltage_aligned']), + columns=['dQdV']).ewm(0.1).mean() + + # Include original data + dq_dv_real['Q_aligned'] = df_real_interped['Q_aligned'] + dq_dv_real['Voltage_aligned'] = df_real_interped['Voltage_aligned'] + + dq_dv_emulated['Q_aligned'] = emulated_full_cell_interped['Q_aligned'] + dq_dv_emulated['Voltage_aligned'] = emulated_full_cell_interped['Voltage_aligned'] + + # Interpolate dQdV and Q over V, aligns real and emulated over V + voltage_vec = np.linspace(self.FC_LOWER_VOLTAGE, self.FC_UPPER_VOLTAGE, 1001) + + v_dq_dv_interper_real = interp1d(dq_dv_real['Voltage_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()], + dq_dv_real['dQdV'].loc[~dq_dv_real['Voltage_aligned'].isna()], + bounds_error=False, fill_value=0) + v_Q_interper_real = interp1d(dq_dv_real['Voltage_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()], + dq_dv_real['Q_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()], + bounds_error=False, fill_value=(0, np.max(df_real_interped['Q_aligned']))) + + v_dq_dv_interper_emulated = interp1d(dq_dv_emulated['Voltage_aligned'].loc[ + ~dq_dv_emulated['Voltage_aligned'].isna()], + dq_dv_emulated['dQdV'].loc[~dq_dv_emulated['Voltage_aligned'].isna()], + bounds_error=False, fill_value=0) + v_Q_interper_emulated = interp1d(dq_dv_emulated['Voltage_aligned'].loc[ + ~dq_dv_emulated['Voltage_aligned'].isna()], + dq_dv_emulated['Q_aligned'].loc[~dq_dv_emulated['Voltage_aligned'].isna()], + bounds_error=False, fill_value=(0,np.max(df_real_interped['Q_aligned']))) + + dq_dv_over_v_real = pd.DataFrame(v_dq_dv_interper_real(voltage_vec), columns=['dQdV']).fillna(0) + dq_dv_over_v_real['Q_aligned'] = v_Q_interper_real(voltage_vec) + dq_dv_over_v_real['Voltage_aligned'] = voltage_vec + + dq_dv_over_v_emulated = pd.DataFrame(v_dq_dv_interper_emulated(voltage_vec), columns=['dQdV']).fillna(0) + dq_dv_over_v_emulated['Q_aligned'] = v_Q_interper_emulated(voltage_vec) + dq_dv_over_v_emulated['Voltage_aligned'] = voltage_vec + + return (pe_out_zeroed, + ne_out_zeroed, + dq_dv_over_v_real, + dq_dv_over_v_emulated, + df_real_interped, + emulated_full_cell_interped) + + def get_dVdQ_over_Q_from_degradation_matching_ah(self, x, *params): + """ + This function imposes degradation scaling ,then outputs the dVdQ representation of the emulated cell data. + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + Outputs: + pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, + offset, and aligned along capacity + ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, + offset, and aligned along capacity + dv_dq_real (Dataframe): dVdQ across capacity for the real cell data + dv_dq_emulated (Dataframe): dVdQ across capacity for the emulated cell data + df_real_interped (Dataframe): capacity and voltage interpolated evenly across + capacity for the real cell data + emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly + across capacity for the emulated cell data + """ + + pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped = \ + self.halfcell_degradation_matching_ah(x, *params) + + # Calculate dVdQ from full cell profiles +# dv_dq_real = pd.DataFrame(np.gradient(df_real_interped['Voltage_aligned'], df_real_interped['Q_aligned']), +# columns=['dVdQ']).ewm(alpha=0.3).mean() +# dv_dq_emulated = pd.DataFrame( +# np.gradient(emulated_full_cell_interped['Voltage_aligned'], emulated_full_cell_interped['Q_aligned']), +# columns=['dVdQ']).ewm(alpha=0.3).mean() + dv_dq_real = pd.DataFrame(np.gradient(df_real_interped['Voltage_aligned'], df_real_interped['Q_aligned']), + columns=['dVdQ']) + dv_dq_emulated = pd.DataFrame( + np.gradient(emulated_full_cell_interped['Voltage_aligned'], emulated_full_cell_interped['Q_aligned']), + columns=['dVdQ']) + + # Include original data + dv_dq_real['Q_aligned'] = df_real_interped['Q_aligned'] + dv_dq_real['Voltage_aligned'] = df_real_interped['Voltage_aligned'] + + dv_dq_emulated['Q_aligned'] = emulated_full_cell_interped['Q_aligned'] + dv_dq_emulated['Voltage_aligned'] = emulated_full_cell_interped['Voltage_aligned'] + + # Q interpolation not needed, as interpolated over Q by default + + return (pe_out_zeroed, + ne_out_zeroed, + dv_dq_real, + dv_dq_emulated, + df_real_interped, + emulated_full_cell_interped) + + def get_V_over_Q_from_degradation_matching_ah(self, x, *params): + """ + This function imposes degradation scaling ,then outputs the V-Q representation of the emulated cell data. + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + Outputs: + pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, + offset, and aligned along capacity + ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, + offset, and aligned along capacity + df_real_interped (Dataframe): capacity and voltage interpolated evenly across + capacity for the real cell data + emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly + across capacity for the emulated cell data + """ + (pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned) = \ + self.halfcell_degradation_matching_ah(x, *params) + + min_soc_full_cell = np.min(real_aligned.loc[~real_aligned.Voltage_aligned.isna()].Q_aligned) + max_soc_full_cell = np.max((real_aligned.loc[~real_aligned.Voltage_aligned.isna()].Q_aligned.max(), + emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned.max())) + + soc_vec_full_cell = np.linspace(min_soc_full_cell, max_soc_full_cell, 1001) + + emulated_full_cell_interper = interp1d( + emulated_aligned.Q_aligned.loc[~emulated_aligned.Voltage_aligned.isna()], + emulated_aligned.Voltage_aligned.loc[~emulated_aligned.Voltage_aligned.isna()], + bounds_error=False) + real_full_cell_interper = interp1d(real_aligned.Q_aligned.loc[~real_aligned.Voltage_aligned.isna()], + real_aligned.Voltage_aligned.loc[~real_aligned.Voltage_aligned.isna()], + bounds_error=False) + + # Interpolate the emulated full-cell profile + emulated_full_cell_interped = pd.DataFrame() + emulated_full_cell_interped['Q_aligned'] = soc_vec_full_cell + emulated_full_cell_interped['Voltage_aligned'] = emulated_full_cell_interper(soc_vec_full_cell) + + # Interpolate the true full-cell profile + df_real_interped = emulated_full_cell_interped.copy() + df_real_interped['Q_aligned'] = soc_vec_full_cell + df_real_interped['Voltage_aligned'] = real_full_cell_interper(soc_vec_full_cell) + return pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped + + + def get_V_over_Q_from_degradation_matching_ah_no_real(self, x, *params): + """ + This function imposes degradation scaling ,then outputs the V-Q representation of the emulated cell data, in the absence of real cell data. + + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + + Outputs: + pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, + offset, and aligned along capacity + ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, + offset, and aligned along capacity + emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly + across capacity for the emulated cell data + + """ + (pe_out_zeroed, ne_out_zeroed, emulated_aligned) = \ + self.halfcell_degradation_matching_ah_no_real(x, *params) + + min_Q_full_cell = np.min(emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned) + max_Q_full_cell = np.max(emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned) + + Q_vec_full_cell = np.linspace(min_Q_full_cell, max_Q_full_cell, 1001) + + emulated_full_cell_interper = interp1d( + emulated_aligned.Q_aligned.loc[~emulated_aligned.Voltage_aligned.isna()], + emulated_aligned.Voltage_aligned.loc[~emulated_aligned.Voltage_aligned.isna()], + bounds_error=False) + + # Interpolate the emulated full-cell profile + emulated_full_cell_interped = pd.DataFrame() + emulated_full_cell_interped['Q_aligned'] = Q_vec_full_cell + emulated_full_cell_interped['Voltage_aligned'] = emulated_full_cell_interper(Q_vec_full_cell) + + return pe_out_zeroed, ne_out_zeroed, emulated_full_cell_interped + + def halfcell_degradation_matching_ah_no_real(self, x, *params): + """ + Calls underlying functions to impose degradation through electrode + capacity scale and alignment through LLI. Modifies emulated full cell + data to be within full cell voltage range and calibrates (zeros) capacity + at the lowest permissible voltage. + + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + + Outputs: + pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, + offset, and aligned along capacity + ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, + offset, and aligned along capacity + emulated_aligned (Dataframe): full cell data corresponding to the imposed degradation + """ + lli = x[0] + Q_pe = x[1] + Q_ne = x[2] + x_ne_2 = x[3] + + pe_pristine, ne_1_pristine, ne_2_pristine_pos, ne_2_pristine_neg = params + + pe_out, ne_out = self._impose_electrode_scale(pe_pristine, ne_1_pristine, + ne_2_pristine_pos, ne_2_pristine_neg, + lli, Q_pe, + Q_ne, x_ne_2) #outputs degraded ne and pe (on a AH basis, with electrode alignment (NaNs for voltage, when no overlap)) + + emulated_full_cell_with_degradation = pd.DataFrame() + emulated_full_cell_with_degradation['Q_aligned'] = pe_out['Q_aligned'].copy() + emulated_full_cell_with_degradation['Voltage_aligned'] = pe_out['Voltage_aligned'] - ne_out['Voltage_aligned'] + + # Replace emulated full cell values outside of voltage range with NaN + emulated_full_cell_with_degradation['Voltage_aligned'].loc[ + emulated_full_cell_with_degradation['Voltage_aligned'] < self.FC_LOWER_VOLTAGE] = np.nan + emulated_full_cell_with_degradation['Voltage_aligned'].loc[ + emulated_full_cell_with_degradation['Voltage_aligned'] > self.FC_UPPER_VOLTAGE] = np.nan + + + ## Center the emulated full cell and half cell curves onto the same Q at which the real (degraded) capacity measurement started (self.FC_LOWER_VOLTAGE) + emulated_full_cell_with_degradation_zeroed = pd.DataFrame() + + emulated_full_cell_with_degradation_zeroed['Voltage_aligned'] = emulated_full_cell_with_degradation[ + 'Voltage_aligned'] + + zeroing_value = emulated_full_cell_with_degradation['Q_aligned'].loc[ + np.nanargmin(emulated_full_cell_with_degradation['Voltage_aligned']) + ] + + emulated_full_cell_with_degradation_zeroed['Q_aligned'] = \ + (emulated_full_cell_with_degradation['Q_aligned'] - zeroing_value) + + pe_out_zeroed = pe_out.copy() + pe_out_zeroed['Q_aligned'] = pe_out['Q_aligned'] - zeroing_value + + ne_out_zeroed = ne_out.copy() + ne_out_zeroed['Q_aligned'] = ne_out['Q_aligned'] - zeroing_value + + # Interpolate full profiles across same Q range + min_Q = np.min( + emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[ + ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()]) + max_Q = np.max( + emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[ + ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()]) + + emulated_interper = interp1d(emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[ + ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()], + emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].loc[ + ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()], + bounds_error=False) + + Q_vec = np.linspace(min_Q, max_Q, 1001) + + emulated_aligned = pd.DataFrame() + emulated_aligned['Q_aligned'] = Q_vec + emulated_aligned['Voltage_aligned'] = emulated_interper(Q_vec) + + return pe_out_zeroed, ne_out_zeroed, emulated_aligned + + def _get_error_from_degradation_matching_ah(self, x, *params): + """ + Wrapper function which selects the correct error sub routine and returns its error value. + + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + + Outputs: + error value (float) - output of the specified error sub function + """ + error_type = self.error_type + if error_type == 'V-Q': + return self._get_error_from_degradation_matching_V_Q(x,*params)[0] + elif error_type == 'dVdQ': + return self._get_error_from_degradation_matching_dVdQ(x,*params)[0] + elif error_type == 'dQdV': + return self._get_error_from_degradation_matching_dQdV(x,*params)[0] + else: + return self._get_error_from_degradation_matching_V_Q(x,*params)[0] + + def _get_error_from_degradation_matching_V_Q(self, x, *params): + """ + Error function returning the mean standardized Euclidean distance of each point of the real curve to + the closest value on the emulated curve in the V-Q representation. + + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + + Outputs: + error (float): output of the specified error sub function + error_vector (array): vector containingEuclidean distance of each point of the real curve to + the closest value on the emulated curve in the V-Q representation + XA (Dataframe): real full cell data used for error analysis + XB (Dataframe): emulated full cell data used for error analysis + """ + + try: + (pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned + ) = self.get_V_over_Q_from_degradation_matching_ah(x, *params) + + XA = real_aligned.dropna() + XB = emulated_aligned.dropna() + error_matrix = distance.cdist(XA, XB, 'seuclidean') + error_vector = error_matrix.min(axis = 1) + error = error_vector.mean() + except ValueError: + error = 100 + return error, None, None, None + return error, error_vector, XA, XB + + # Pairwise euclidean from premade dQdV + def _get_error_from_degradation_matching_dQdV(self, x, *params): + """ + Error function returning the mean standardized Euclidean distance of each point of the real curve to + the closest value on the emulated curve in the dQdV representation. + + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + + Outputs: + error (float): output of the specified error sub function + error_vector (array): vector containing Euclidean distance of each point of the real curve to + the closest value on the emulated curve in the dQdV representation + XA (Dataframe): real full cell data used for error analysis + XB (Dataframe): emulated full cell data used for error analysis + """ + + try: + # Call dQdV generating function + (PE_out_zeroed, + NE_out_zeroed, + dQdV_over_v_real, + dQdV_over_v_emulated, + df_real_interped, + emulated_full_cell_interped) = self.get_dQdV_over_V_from_degradation_matching_ah(x, *params) + + XA = dQdV_over_v_real[[ 'Voltage_aligned','dQdV']].dropna() + XB = dQdV_over_v_emulated[['Voltage_aligned', 'dQdV']].dropna() + error_matrix = distance.cdist(XA, XB, 'seuclidean') + error_vector = error_matrix.min(axis = 1) + error = error_vector.mean() + + except ValueError: + error = 100 + return error, None, None, None + return error, error_vector, XA, XB + + def _get_error_from_degradation_matching_dVdQ(self, x, *params): + """ + Error function returning the mean standardized Euclidean distance of each point of the real curve to + the closest value on the emulated curve in the dVdQ representation. + + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + + Outputs: + error (float): output of the specified error sub function + error_vector (array): vector containing Euclidean distance of each point of the real curve to + the closest value on the emulated curve in the dVdQ representation + error_vector_weighted (array): error_vector multiplied by dQdV values as weighting + XA (Dataframe): real full cell data used for error analysis + XB (Dataframe): emulated full cell data used for error analysis + """ + +# try: + (PE_out_zeroed, + NE_out_zeroed, + dVdQ_over_Q_real, + dVdQ_over_Q_emulated, + df_real_interped, + emulated_full_cell_interped) = self.get_dVdQ_over_Q_from_degradation_matching_ah(x, *params) + + XA = dVdQ_over_Q_real[[ 'Q_aligned','dVdQ']].dropna() + XB = dVdQ_over_Q_emulated[['Q_aligned', 'dVdQ']].dropna() + + if self.dvdq_bound is not None: + # down-select to values with dVdQ less than 0.8 V/Ahr to eliminate high-slope/high-valued region of dVdQ + XA = XA.loc[XA.dVdQ < self.dvdq_bound] + XB = XB.loc[XB.dVdQ < self.dvdq_bound] + + error_matrix = distance.cdist(XA, XB, 'seuclidean') + error_vector_raw = error_matrix.min(axis = 1) + + # apply weighting scheme + if self.error_weighting == 'dQdV': + dQdV_over_v_real = self.get_dQdV_over_V_from_degradation_matching_ah(x, *params)[2] + if self.dvdq_bound is not None: + error_vector_weighted = np.multiply( + error_vector_raw,dQdV_over_v_real['dQdV'].loc[(XA.dVdQ < 0.8).index]) + elif self.dvdq_bound is None: + error_vector_weighted = np.multiply( + error_vector_raw,dQdV_over_v_real['dQdV']) + elif self.error_weighting == 'uniform': + error_vector_weighted = error_vector_raw + else: + raise ValueError('Unknown weighting scheme provided. Check the value of "error_weighting".') + + error = error_vector_weighted.mean() + +# except ValueError: +# print('ValueError') +# error = 100 +# return error, None, None, None, None + return error, error_vector_raw, error_vector_weighted, XA, XB + + def _get_error_from_synthetic_fitting_ah(self, x, *params): + """ + Wrapper function which selects the correct error sub routine and returns its error value. + This function is specific to fitting synthetic data rather than real cycling data. + + Inputs: + x (list): [LLI, Q_pe, Q_ne, x_ne_2] + *params: + pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive + electrode + ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative + electrode + ne_2_pos (Dataframe): half cell data for the positive component of the anode + ne_2_neg (Dataframe): half cell data for the negative component of the anode + real_cell_candidate_profile_aligned (Dataframe): columns Q_aligned + (evenly spaced) and Voltage_aligned + + Outputs: + error value (float) - output of the specified error sub function + """ + + error_type = self.error_type + + try: + if error_type == 'V-Q': + return self._get_error_from_degradation_matching_V_Q(x, *params)[0] + elif error_type == 'dVdQ': + return self._get_error_from_degradation_matching_dVdQ(x, *params)[0] + elif error_type == 'dQdV': + return self._get_error_from_degradation_matching_dQdV(x, *params)[0] + else: + return self._get_error_from_degradation_matching_V_Q(x, *params)[0] + except: + print("Can't return error") + return 100 + + def intracell_values_wrapper_ah(self, + cycle_index, + cell_struct, + degradation_bounds=None + ): + """ + Wrapper function to solve capacity sizing and offset of reference electrodes to real full cell cycle data. + + Inputs: + cycle_index (int): the index of the cycle of interest of the structured real cycling data + cell_struct (MaccorDatapath): BEEP structured cycling data + + Outputs: + loss_dict (dict): dictionary with key of cycle index and entry of a list of + error, LLI_opt, Q_pe_opt, Q_ne_opt, x_NE_2, Q_li + profiles_dict (dict): dictionary with key of cycle index and entry of a dictionary + containing various key/entry pairs of resulting from the fitting + """ + + if degradation_bounds is None: + degradation_bounds = ((0, 2.0), # LLI + (2.5, 6.5), # Q_pe (Q_pe must be > LLI) + (2.5, 6.5), # Q_ne + (1,1), #IR coef pe + (1,1), #IR coef ne + (1, 1), # (-1,1) x_NE_2 + ) + + real_cell_candidate_profile_aligned = self.process_beep_cycle_data_for_candidate_halfcell_analysis_ah( + cell_struct, + cycle_index) + + + + degradation_optimization_result = differential_evolution(self._get_error_from_degradation_matching_ah, + degradation_bounds, + args=(self.pe_pristine_dict, + self.ne_1_pristine_dict, + self.ne_2_pristine_pos, + self.ne_2_pristine_neg, + real_cell_candidate_profile_aligned, + ), + strategy='best1bin', maxiter=1000, + popsize=15, tol=0.01, mutation=0.5, + recombination=0.7, + seed=0, + callback=None, disp=False, polish=True, + init='latinhypercube', + atol=0, updating='deferred', workers=-1, + constraints=() + ) +# print(degradation_optimization_result.x) #BVV + (PE_out_zeroed, + NE_out_zeroed, + dVdQ_over_Q_real, + dVdQ_over_Q_emulated, + df_real_interped, + emulated_full_cell_interped) = self.get_dVdQ_over_Q_from_degradation_matching_ah( + degradation_optimization_result.x, + self.pe_pristine_dict, + self.ne_1_pristine_dict, + self.ne_2_pristine_pos, + self.ne_2_pristine_neg, + real_cell_candidate_profile_aligned) + + (dQdV_over_v_real, + dQdV_over_v_emulated) = self.get_dQdV_over_V_from_degradation_matching_ah( + degradation_optimization_result.x, + self.pe_pristine_dict, + self.ne_1_pristine_dict, + self.ne_2_pristine_pos, + self.ne_2_pristine_neg, + real_cell_candidate_profile_aligned)[2:4] + # + electrode_info_df = self.get_electrode_info_ah(PE_out_zeroed, NE_out_zeroed) + # + error = degradation_optimization_result.fun + LLI_opt = degradation_optimization_result.x[0] + Q_pe_opt = degradation_optimization_result.x[1] + Q_ne_opt = degradation_optimization_result.x[2] + IR_coef_pe_opt = degradation_optimization_result.x[3] + IR_coef_ne_opt = degradation_optimization_result.x[4] + x_NE_2 = degradation_optimization_result.x[5] + + loss_dict = {cycle_index: np.append([error, LLI_opt, Q_pe_opt, Q_ne_opt,IR_coef_pe_opt,IR_coef_ne_opt, + x_NE_2], + electrode_info_df.iloc[-1].values) + } + + + error, error_vector_raw, error_vector_weighted, XA, XB = self._get_error_from_degradation_matching_dVdQ( + [LLI_opt, Q_pe_opt, Q_ne_opt,IR_coef_pe_opt,IR_coef_ne_opt, x_NE_2], + self.pe_pristine_dict, + self.ne_1_pristine_dict, + self.ne_2_pristine_pos, + self.ne_2_pristine_neg, + real_cell_candidate_profile_aligned) + + profiles_per_cycle_dict = {'NE_zeroed' : NE_out_zeroed, + 'PE_zeroed' : PE_out_zeroed, + 'dVdQ_over_Q_real': dVdQ_over_Q_real, + 'dVdQ_over_Q_emulated': dVdQ_over_Q_emulated, + 'dQdV_over_v_real':dQdV_over_v_real, + 'dQdV_over_v_emulated':dQdV_over_v_emulated, + 'df_real_interped': df_real_interped, + 'emulated_full_cell_interped': emulated_full_cell_interped , + 'real_cell_candidate_profile_aligned': real_cell_candidate_profile_aligned, + 'error_vector_raw':error_vector_raw, + 'error_vector_weighted':error_vector_weighted, + 'XA':XA, + 'XB':XB + } + + profiles_dict = {cycle_index: profiles_per_cycle_dict} + + return loss_dict, profiles_dict + + def solve_emulated_degradation(self, + forward_simulated_profile, + degradation_bounds=None + ): + + """ + + + """ + + if degradation_bounds is None: + degradation_bounds = ((0, 2.0), # LLI + (2.5, 6.5), # Q_pe + (2.5, 6.5), # Q_ne + (1, 1), # (-1,1) x_NE_2 + ) + + degradation_optimization_result = differential_evolution(self._get_error_from_synthetic_fitting_ah, + degradation_bounds, + args=(self.pe_pristine_dict, + self.ne_1_pristine_dict, + self.ne_2_pristine_pos, + self.ne_2_pristine_neg, + forward_simulated_profile, + ), + strategy='best1bin', maxiter=100000, + popsize=15, tol=0.001, mutation=0.5, + recombination=0.7, + seed=1, + callback=None, disp=False, polish=True, + init='latinhypercube', + atol=0, updating='deferred', workers=-1, + constraints=() + ) + + return degradation_optimization_result + + def get_electrode_info_ah(self, pe_out_zeroed, ne_out_zeroed): + """ + Calculates a variety of half-cell metrics at various positions in the full-cell profile. + + Inputs: + pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, + offset, and aligned along capacity + ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, + offset, and aligned along capacity + + Outputs: + electrode_info_df (Dataframe): dataframe containing a variety of half-cell metrics + at various positions in the emulated full-cell profile. + + pe_voltage_FC4p2V: voltage of the positive electrode (catahode) corresponding + to the full cell at 4.2V + ... + pe_voltage_FC2p7V: voltage of the positive electrode (catahode) corresponding + to the full cell at 2.7V + + pe_soc_FC4p2V: state of charge of the positive electrode corresponding + to the full cell at 4.2V + ... + pe_soc_FC2p7V: state of charge of the positive electrode corresponding + to the full cell at 2.7V + + ne_voltage_FC4p2V: voltage of the negative electrode (anode) corresponding + to the full cell at 4.2V + ... + ne_voltage_FC2p7V: voltage of the negative electrode (anode) corresponding + to the full cell at 2.7V + + ne_soc_FC4p2V: state of charge of the anode electrode corresponding + to the full cell at 4.2V + ... + ne_soc_FC2p7V: state of charge of the anode electrode corresponding + to the full cell at 2.7V + + Q_fc: capacity of the full cecll within the full cell voltage limits + Q_pe: capacity of the cathode + Q_ne: capacity of the anode [Ahr] + Q_li + """ + pe_minus_ne_zeroed = pd.DataFrame(pe_out_zeroed['Voltage_aligned'] - ne_out_zeroed['Voltage_aligned'], + columns=['Voltage_aligned']) + pe_minus_ne_zeroed['Q_aligned'] = pe_out_zeroed['Q_aligned'] + + electrode_info_df = pd.DataFrame(index=[0]) + + electrode_info_df['pe_voltage_FC4p2V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Voltage_aligned + electrode_info_df['pe_voltage_FC4p1V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Voltage_aligned + electrode_info_df['pe_voltage_FC4p0V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p9V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p8V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p7V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p6V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p5V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p4V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p3V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p2V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p1V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Voltage_aligned + electrode_info_df['pe_voltage_FC3p0V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Voltage_aligned + electrode_info_df['pe_voltage_FC2p9V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Voltage_aligned + electrode_info_df['pe_voltage_FC2p8V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Voltage_aligned + electrode_info_df['pe_voltage_FC2p7V'] = pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Voltage_aligned + + electrode_info_df['pe_soc_FC4p2V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 4.2V + electrode_info_df['pe_soc_FC4p1V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 4.1V + electrode_info_df['pe_soc_FC4p0V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 4.0V + electrode_info_df['pe_soc_FC3p9V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.9V + electrode_info_df['pe_soc_FC3p8V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.8V + electrode_info_df['pe_soc_FC3p7V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.7V + electrode_info_df['pe_soc_FC3p6V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.6V + electrode_info_df['pe_soc_FC3p5V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.5V + electrode_info_df['pe_soc_FC3p4V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.4V + electrode_info_df['pe_soc_FC3p3V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.3V + electrode_info_df['pe_soc_FC3p2V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.2V + electrode_info_df['pe_soc_FC3p1V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.1V + electrode_info_df['pe_soc_FC3p0V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.0V + electrode_info_df['pe_soc_FC2p9V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 2.9V + electrode_info_df['pe_soc_FC2p8V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 2.8V + electrode_info_df['pe_soc_FC2p7V'] = ((pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Q_aligned - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - + np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) + ) # 2.7V + + electrode_info_df['ne_voltage_FC4p2V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Voltage_aligned + electrode_info_df['ne_voltage_FC4p1V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Voltage_aligned + electrode_info_df['ne_voltage_FC4p0V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p9V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p8V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p7V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p6V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p5V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p4V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p3V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p2V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p1V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Voltage_aligned + electrode_info_df['ne_voltage_FC3p0V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Voltage_aligned + electrode_info_df['ne_voltage_FC2p9V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Voltage_aligned + electrode_info_df['ne_voltage_FC2p8V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Voltage_aligned + electrode_info_df['ne_voltage_FC2p7V'] = ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Voltage_aligned + + electrode_info_df['ne_soc_FC4p2V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 4.2V + electrode_info_df['ne_soc_FC4p1V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 4.1V + electrode_info_df['ne_soc_FC4p0V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 4.0V + electrode_info_df['ne_soc_FC3p9V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.9V + electrode_info_df['ne_soc_FC3p8V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3Q_aligned.8V + electrode_info_df['ne_soc_FC3p7V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.7V + electrode_info_df['ne_soc_FC3p6V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.6V + electrode_info_df['ne_soc_FC3p5V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.5V + electrode_info_df['ne_soc_FC3p4V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.4V + electrode_info_df['ne_soc_FC3p3V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.3V + electrode_info_df['ne_soc_FC3p2V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.2V + electrode_info_df['ne_soc_FC3p1V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.1V + electrode_info_df['ne_soc_FC3p0V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 3.0V + electrode_info_df['ne_soc_FC2p9V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 2.9V + electrode_info_df['ne_soc_FC2p8V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 2.8V + electrode_info_df['ne_soc_FC2p7V'] = ((ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Q_aligned - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( + np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - + np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) + ) # 2.7V + + electrode_info_df['Q_fc'] = pe_minus_ne_zeroed.loc[ + np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.20))].Q_aligned + + electrode_info_df['Q_pe'] = np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) + + electrode_info_df['Q_ne'] = np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) + + electrode_info_df['Q_li'] = np.max(pe_minus_ne_zeroed['Q_aligned'].loc[~pe_minus_ne_zeroed.Voltage_aligned.isna()]) - np.min(pe_minus_ne_zeroed['Q_aligned'].loc[~pe_minus_ne_zeroed.Voltage_aligned.isna()]) + + + return electrode_info_df + + def _get_effective_electrode_rate_data(self,electrode_rate_dict=dict(), + Q_electrode=4.84,Q_fc_nom=4.84, + C_nom=-0.2,IR_coef=1.0): + """ + Inputs: + electrode_rate_dict (dict): dictonary of rate data for an electrode. Keys are float representing C rate; entries are dataframe of rate data with columns Voltage_aligned and SOC_aligned. Sign of cathode rate keys should be same as full cell (positive for charge, negative for discharge). Sign of anode rate keys should be opposite of the full cell. + Q_electrode (float): capacity of the electrode in Ahr + Q_fc_nom (float): nominal capacity of full cell which was used to define the nominal C rate + C_nom (float): nominal current rate in full cell (negative for discharge) + IR_coef (float): scaling coefficient of overpotential term, used in fitting + + Outputs: + electrode_at_I_eff (DataFrame): electrode data modified for effective rate, based on the supplied dictionary of rates and an overpotential scaling term. + + """ + + I_eff = C_nom / (Q_electrode/Q_fc_nom) + C_nom_sign = np.sign(list(electrode_rate_dict.keys())[0]) + + R_at_SOC = [] + for SOC in electrode_rate_dict[list(electrode_rate_dict.keys())[0]]['SOC_aligned']: + + V_at_SOC_for_rates = [] + for rate in electrode_rate_dict.keys(): + V_at_SOC_for_rates = np.append(V_at_SOC_for_rates, + electrode_rate_dict[rate]['Voltage_aligned'].loc[ + electrode_rate_dict[rate]['SOC_aligned'] == SOC + ]) + R_at_SOC = np.append(R_at_SOC, + np.polynomial.polynomial.Polynomial.fit( + list(electrode_rate_dict.keys()), + V_at_SOC_for_rates,deg=2).deriv(m=1)(I_eff) + ) + electrode_at_I_eff = pd.DataFrame() + electrode_at_I_eff['SOC_aligned'] = electrode_rate_dict[ + list(electrode_rate_dict.keys())[0]]['SOC_aligned'] + + closest_rate_key = list(electrode_rate_dict.keys())[ + np.argmin(np.abs( + np.array(list(electrode_rate_dict.keys())) - I_eff))] + electrode_at_I_eff['Voltage_aligned'] = electrode_rate_dict[ + closest_rate_key]['Voltage_aligned'] + IR_coef*(I_eff - (C_nom*C_nom_sign)*R_at_SOC) + + return electrode_at_I_eff, R_at_SOC + +def blend_electrodes(electrode_1, electrode_2_pos, electrode_2_neg, x_2): ## this function needs revisited + """ + Blends two electrode materials from their SOC-V profiles to form a blended electrode. + + Inputs: + electrode_1: Primary material in electrode, typically Gr. DataFrame supplied with SOC evenly spaced and voltage. + electrode_2: Secondary material in electrode, typically Si. DataFrame supplied with SOC evenly spaced and + voltage as an additional column. + x_2: Fraction of electrode_2 material's capacity (not mass). Supplied as scalar value. + + Outputs: + df_blended_soc_mod (Dataframe): blended electrode with SOC_aligned and Voltage_aligned columns + """ + if electrode_2_pos.empty: + df_blended = electrode_1 + return df_blended + + if electrode_2_neg.empty: + electrode_2 = electrode_2_pos + x_2 = np.abs(x_2) + elif x_2 > 0: + electrode_2 = electrode_2_pos + else: + electrode_2 = electrode_2_neg + x_2 = np.abs(x_2) + + electrode_1_interper = interp1d(electrode_1['Voltage_aligned'], electrode_1['SOC_aligned'], bounds_error=False, + fill_value='extrapolate') + electrode_2_interper = interp1d(electrode_2['Voltage_aligned'], electrode_2['SOC_aligned'], bounds_error=False, + fill_value='extrapolate') + + voltage_vec = np.linspace(np.min((np.min(electrode_1['Voltage_aligned']), + np.min(electrode_2['Voltage_aligned']))), + np.max((np.max(electrode_1['Voltage_aligned']), + np.max(electrode_2['Voltage_aligned']))), + 1001) + electrode_1_voltage_aligned = pd.DataFrame(electrode_1_interper(voltage_vec), columns=['SOC']) + electrode_2_voltage_aligned = pd.DataFrame(electrode_2_interper(voltage_vec), columns=['SOC']) + electrode_1_voltage_aligned['Voltage'] = voltage_vec + electrode_2_voltage_aligned['Voltage'] = voltage_vec + + df_blend_voltage_aligned = pd.DataFrame( + (1 - x_2) * electrode_1_voltage_aligned['SOC'] + x_2 * electrode_2_voltage_aligned['SOC'], columns=['SOC']) + df_blend_voltage_aligned['Voltage'] = electrode_1_voltage_aligned.merge(electrode_2_voltage_aligned, + on='Voltage')['Voltage'] + + df_blended_interper = interp1d(df_blend_voltage_aligned['SOC'], df_blend_voltage_aligned['Voltage'], + bounds_error=False) + soc_vec = np.linspace(0, 100, 1001) + + df_blended = pd.DataFrame(df_blended_interper(soc_vec), columns=['Voltage_aligned']) + df_blended['SOC_aligned'] = soc_vec + + # Modify NE to fully span 100% SOC within its valid voltage window + df_blended_soc_mod_interper = interp1d(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()], + df_blended['Voltage_aligned'].loc[~df_blended['Voltage_aligned'].isna()], + bounds_error=False) + soc_vec = np.linspace(np.min(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()]), + np.max(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()]), + 1001) + df_blended_soc_mod = pd.DataFrame(df_blended_soc_mod_interper(soc_vec), columns=['Voltage_aligned']) + df_blended_soc_mod['SOC_aligned'] = soc_vec / np.max(soc_vec) * 100 + return df_blended_soc_mod + diff --git a/beep/features/intracell_losses.py b/beep/features/intracell/intracell_losses.py similarity index 96% rename from beep/features/intracell_losses.py rename to beep/features/intracell/intracell_losses.py index e8ce7746..0ac1da92 100644 --- a/beep/features/intracell_losses.py +++ b/beep/features/intracell/intracell_losses.py @@ -3,15 +3,15 @@ import pandas as pd from beep import PROTOCOL_PARAMETERS_DIR -from beep.features import featurizer_helpers -from beep.features.base import BEEPFeaturizer -from beep.features.intracell_analysis import IntracellAnalysis +from beep.features import helper_functions +from beep.features.featurizer import BEEPEarlyCyclesFeaturizer +from beep.features.intracell.intracell_analysis import IntracellAnalysis DEFAULT_CELL_INFO_DIR = os.path.join(PROTOCOL_PARAMETERS_DIR, "intracell_info") -class IntracellCycles(BEEPFeaturizer): +class IntracellAllCycles(BEEPEarlyCyclesFeaturizer): """ Object corresponding to the fitted material parameters of the cell. Material parameters are determined by using high resolution half cell data to fit full cell dQdV curves. Rows @@ -52,7 +52,7 @@ def validate(self): Returns: bool: True/False indication of ability to proceed with feature generation """ - val, msg = featurizer_helpers.check_diagnostic_validation(self.datapath) + val, msg = helper_functions.check_diagnostic_validation(self.datapath) if val: conditions = [] @@ -131,7 +131,7 @@ def create_features(self): self.features = degradation_df -class IntracellFeatures(IntracellCycles): +class IntracellAllCyclesFeatures(IntracellAllCycles): """ Object corresponding to the fitted material parameters of the cell. Material parameters are determined by using high resolution half cell data to fit full cell dQdV curves. The diff --git a/beep/features/intracell_losses_v2.py b/beep/features/intracell/intracell_lossesv2.py similarity index 77% rename from beep/features/intracell_losses_v2.py rename to beep/features/intracell/intracell_lossesv2.py index ab0413d1..fdad7202 100644 --- a/beep/features/intracell_losses_v2.py +++ b/beep/features/intracell/intracell_lossesv2.py @@ -1,17 +1,16 @@ import os - +import numpy as np import pandas as pd from beep import PROTOCOL_PARAMETERS_DIR -from beep.features import featurizer_helpers -from beep.features.base import BEEPFeaturizer -from beep.features.intracell_analysis_v2 import IntracellAnalysisV2 - +from beep.features import helper_functions +from beep.features.featurizer import BEEPPerCycleFeaturizer +from beep.features.intracell.intracell_analysisv2 import IntracellAnalysisV2 DEFAULT_CELL_INFO_DIR = os.path.join(PROTOCOL_PARAMETERS_DIR, "intracell_info") -class IntracellCyclesV2(BEEPFeaturizer): +class IntracellCyclesV2(BEEPPerCycleFeaturizer): """ Object corresponding to the fitted material parameters of the cell. Material parameters are determined by using high resolution half cell data to fit full cell dQdV curves. Rows @@ -24,19 +23,24 @@ class IntracellCyclesV2(BEEPFeaturizer): """ DEFAULT_HYPERPARAMETERS = { - "diagnostic_cycle_type": 'rpt_0.2C', - "step_type": 0, - # Paths for anode files should be absolute - # Defaults are for the specified names in the current dir - "anode_file": os.path.join( - DEFAULT_CELL_INFO_DIR, - 'anode_test.csv' - ), - "cathode_file": os.path.join( - DEFAULT_CELL_INFO_DIR, - 'cathode_test.csv' - ), - } + 'pe_pristine_files_dict':{}, + 'pe_pristine_usecols':['Ecell/V','Capacity/mA.h', + 'SOC_aligned','c_rate', + 'BVV_c_rate','Voltage_aligned'], + 'ne_1_pristine_files_dict':{}, + 'ne_1_pristine_usecols':['Ecell/V','Capacity/mA.h', + 'SOC_aligned','c_rate', + 'BVV_c_rate','Voltage_aligned'], + 'Q_fc_nom':4.84, + 'C_nom':-0.2, + 'cycle_type':'rpt_0.2C', + 'step_type': 1, + 'error_type':'dVdQ', + 'error_weighting':'dQdV', + 'dvdq_bound':None, + 'ne_2pos_file':None, + 'ne_2neg_file':None + } def validate(self): """ @@ -52,13 +56,13 @@ def validate(self): Returns: bool: True/False indication of ability to proceed with feature generation """ - val, msg = featurizer_helpers.check_diagnostic_validation(self.datapath) + val, msg = helper_functions.check_diagnostic_validation(self.datapath) if val: conditions = [] # Ensure overlap of cycle indices above threshold and matching cycle type eol_cycle_index_list = self.datapath.diagnostic_summary[ - (self.datapath.diagnostic_summary.cycle_type == self.hyperparameters["diagnostic_cycle_type"]) & + (self.datapath.diagnostic_summary.cycle_type == self.hyperparameters["cycle_type"]) & (self.datapath.diagnostic_summary.discharge_capacity > IntracellAnalysisV2.THRESHOLD) ].cycle_index.to_list() if not eol_cycle_index_list: @@ -93,10 +97,19 @@ def create_features(self): (pd.DataFrame) containing the cell material parameters as a function of cycle index """ ia = IntracellAnalysisV2( - self.hyperparameters["cathode_file"], - self.hyperparameters["anode_file"], - cycle_type=self.hyperparameters["diagnostic_cycle_type"], - step_type=self.hyperparameters["step_type"] + pe_pristine_files_dict=self.hyperparameters["pe_pristine_files_dict"], + pe_pristine_usecols=self.hyperparameters["pe_pristine_usecols"], + ne_1_pristine_files_dict=self.hyperparameters["ne_1_pristine_files_dict"], + ne_1_pristine_usecols=self.hyperparameters["ne_1_pristine_usecols"], + Q_fc_nom=self.hyperparameters["Q_fc_nom"], + C_nom=self.hyperparameters["C_nom"], + cycle_type=self.hyperparameters["cycle_type"], + step_type=self.hyperparameters["step_type"], + error_type=self.hyperparameters["error_type"], + error_weighting=self.hyperparameters["error_weighting"], + dvdq_bound=self.hyperparameters["dvdq_bound"], + ne_2pos_file=self.hyperparameters["ne_2pos_file"], + ne_2neg_file=self.hyperparameters["ne_2neg_file"] ) # (cell_init_aligned, cell_init_profile, PE_matched, NE_matched) = ia.intracell_wrapper_init( @@ -111,15 +124,17 @@ def create_features(self): # initialize dicts before for loop dataset_dict_of_cell_degradation_path = dict() real_cell_dict_of_profiles = dict() - for i, cycle_index in enumerate(eol_cycle_index_list): + for diag_pos, cycle_index in enumerate(eol_cycle_index_list): loss_dict, profiles_dict = ia.intracell_values_wrapper_ah(cycle_index, self.datapath ) + loss_dict[cycle_index] = np.append(diag_pos,loss_dict[cycle_index]) dataset_dict_of_cell_degradation_path.update(loss_dict) - real_cell_dict_of_profiles.update(profiles_dict) +# real_cell_dict_of_profiles.update(profiles_dict) degradation_df = pd.DataFrame(dataset_dict_of_cell_degradation_path, - index=['rmse_error', 'LLI_opt', 'Q_pe_opt', 'Q_ne_opt', 'x_NE_2', + index=['diag_pos','rmse_error', 'LLI_opt', 'Q_pe_opt', 'Q_ne_opt', 'x_NE_2', + 'IR_coef_pe_opt','IR_coef_ne_opt', 'pe_voltage_FC4p2V', 'pe_voltage_FC4p1V', 'pe_voltage_FC4p0V', 'pe_voltage_FC3p9V', 'pe_voltage_FC3p8V', 'pe_voltage_FC3p7V', 'pe_voltage_FC3p6V', 'pe_voltage_FC3p5V', 'pe_voltage_FC3p4V', @@ -143,6 +158,8 @@ def create_features(self): 'ne_soc_FC3p2V', 'ne_soc_FC3p1V', 'ne_soc_FC3p0V', 'ne_soc_FC2p9V', 'ne_soc_FC2p8V', 'ne_soc_FC2p7V', 'Q_fc', 'Q_pe', 'Q_ne', 'Q_li']).T + degradation_df = degradation_df.reset_index().rename(columns={'index':'cycle_index'}) + degradation_df['diag_pos'] = degradation_df['diag_pos'].astype(int) self.features = degradation_df @@ -173,10 +190,17 @@ def create_features(self): """ ia = IntracellAnalysisV2( - self.hyperparameters["cathode_file"], - self.hyperparameters["anode_file"], - cycle_type=self.hyperparameters["diagnostic_cycle_type"], - step_type=self.hyperparameters["step_type"] + pe_pristine_dict=self.hyperparameters["pe_pristine_dict"], + ne_1_pristine_dict=self.hyperparameters["ne_1_pristine_dict"], + Q_fc_nom=self.hyperparameters["Q_fc_nom"], + C_nom=self.hyperparameters["C_nom"], + cycle_type=self.hyperparameters["cycle_type"], + step_type=self.hyperparameters["step_type"], + error_type=self.hyperparameters["error_type"], + error_weighting=self.hyperparameters["error_weighting"], + dvdq_bound=self.hyperparameters["dvdq_bound"], + ne_2pos_file=self.hyperparameters["ne_2pos_file"], + ne_2neg_file=self.hyperparameters["ne_2neg_file"] ) # (cell_init_aligned, cell_init_profile, PE_matched, NE_matched) = ia.intracell_wrapper_init( @@ -228,4 +252,4 @@ def create_features(self): diag_1_names = ["diag_1_" + name for name in degradation_df.columns] values = {0: degradation_df.iloc[0].tolist() + degradation_df.iloc[1].tolist()} features_df = pd.DataFrame(values, index=diag_0_names+diag_1_names).T - self.features = features_df + self.features = features_df \ No newline at end of file diff --git a/beep/features/intracell_analysis_v2.py b/beep/features/intracell_analysis_v2.py deleted file mode 100644 index a88b44ee..00000000 --- a/beep/features/intracell_analysis_v2.py +++ /dev/null @@ -1,1320 +0,0 @@ -import numpy as np -import pandas as pd -from scipy.interpolate import interp1d -from scipy.spatial import distance -from scipy.optimize import differential_evolution - - -class IntracellAnalysisV2: - # IA constants - FC_UPPER_VOLTAGE = 4.20 - FC_LOWER_VOLTAGE = 2.70 - NE_UPPER_VOLTAGE = 0.01 - NE_LOWER_VOLTAGE = 1.50 - PE_UPPER_VOLTAGE = 4.30 - PE_LOWER_VOLTAGE = 2.86 - THRESHOLD = 4.84 * 0.0 - - def __init__(self, - pe_pristine_file, - ne_pristine_file, - cycle_type='rpt_0.2C', - step_type=0, - error_type='V-Q', - ne_2pos_file=None, - ne_2neg_file=None - ): - """ - Invokes the cell electrode analysis class. This is a class designed to fit the cell and electrode - parameters in order to determine changes of electrodes within the full cell from only full cell cycling data. - Args: - pe_pristine_file (str): file name for the half cell data of the pristine (uncycled) positive - electrode - ne_pristine_file (str): file name for the half cell data of the pristine (uncycled) negative - electrode - cycle_type (str): type of diagnostic cycle for the fitting - step_type (int): charge or discharge (0 for charge, 1 for discharge) - error_type (str): defines which error metric is to be used - ne_2neg_file (str): file name of the data for the negative component of the anode - ne_2pos_file (str): file name of the data for the positive component of the anode - """ - self.pe_pristine = pd.read_csv(pe_pristine_file, usecols=['SOC_aligned', 'Voltage_aligned']) - self.ne_1_pristine = pd.read_csv(ne_pristine_file, usecols=['SOC_aligned', 'Voltage_aligned']) - - if ne_2neg_file and ne_2pos_file: - self.ne_2_pristine_pos = pd.read_csv(ne_2pos_file) - self.ne_2_pristine_neg = pd.read_csv(ne_2neg_file) - else: - self.ne_2_pristine_pos = pd.DataFrame() - self.ne_2_pristine_neg = pd.DataFrame() - - if step_type == 0: - self.capacity_col = 'charge_capacity' - else: - self.capacity_col = 'discharge_capacity' - - self.cycle_type = cycle_type - self.step_type = step_type - self.error_type = error_type - - def process_beep_cycle_data_for_candidate_halfcell_analysis_ah(self, - cell_struct, - cycle_index): - """ - Ingests BEEP structured cycling data and cycle_index and returns - a Dataframe of evenly spaced capacity with corresponding voltage. - - Inputs: - cell_struct (MaccorDatapath): BEEP structured cycling data - cycle_index (int): cycle number at which to evaluate - - Outputs: - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned (evenly spaced) - and Voltage_aligned - """ - - # filter the data down to the diagnostic type of interest - diag_type_cycles = cell_struct.diagnostic_data.loc[cell_struct.diagnostic_data['cycle_type'] == self.cycle_type] - real_cell_candidate_charge_profile = diag_type_cycles.loc[ - (diag_type_cycles.cycle_index == cycle_index) - & (diag_type_cycles.step_type == 0) # step_type = 0 is charge, 1 is discharge - & (diag_type_cycles.voltage < self.FC_UPPER_VOLTAGE) - & (diag_type_cycles[self.capacity_col] > 0)][['voltage', 'charge_capacity']] - - # renaming capacity,voltage column - real_cell_candidate_charge_profile['Q'] = real_cell_candidate_charge_profile['charge_capacity'] - - real_cell_candidate_charge_profile['Voltage'] = real_cell_candidate_charge_profile['voltage'] - real_cell_candidate_charge_profile.drop('voltage', axis=1, inplace=True) - - # interpolate voltage along evenly spaced capacity axis - q_vec = np.linspace(0, np.max(real_cell_candidate_charge_profile['Q']), 1001) - - real_cell_candidate_charge_profile_aligned = pd.DataFrame() - real_cell_candidate_charge_profile_interper = interp1d(real_cell_candidate_charge_profile['Q'], - real_cell_candidate_charge_profile['Voltage'], - bounds_error=False, - fill_value=( - self.FC_LOWER_VOLTAGE, self.FC_UPPER_VOLTAGE)) - real_cell_candidate_charge_profile_aligned['Voltage_aligned'] = real_cell_candidate_charge_profile_interper( - q_vec) - - real_cell_candidate_charge_profile_aligned['Q_aligned'] = q_vec - - return real_cell_candidate_charge_profile_aligned - - def _impose_electrode_scale(self, - pe_pristine=pd.DataFrame(), - ne_1_pristine=pd.DataFrame(), - ne_2_pristine_pos=pd.DataFrame(), - ne_2_pristine_neg=pd.DataFrame(), - lli=0.0, q_pe=0.0, q_ne=0.0, x_ne_2=0.0): - """ - Scales the reference electrodes according to specified capacities and - offsets their capacities according to lli. Blends negative electrode materials. - - Inputs: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - lli (float): Loss of Lithium Inventory - capacity of the misalignment between - cathode and anode zero-capacity - q_pe (float): capacity of the positive electrode (cathode) - q_ne (float): capacity of the negative electrode (anode) - x_ne_2 (float): fraction of ne_2_pristine_pos or ne_2_pristine_neg - (positive or negative value, respectively) to ne_1_pristine - - Outputs: - pe_degraded (Dataframe): positive electrode with imposed capacity - scale to emulate degradation - ne_degraded (Dataframe): negative electrode with imposed capacity - scale and capacity offset to emulate degradation - """ - # Blend negative electrodes - ne_pristine = blend_electrodes(ne_1_pristine, ne_2_pristine_pos, ne_2_pristine_neg, x_ne_2) - - # rescaling pristine electrodes to q_pe and q_ne - pe_q_scaled = pe_pristine.copy() - pe_q_scaled['Q_aligned'] = (pe_q_scaled['SOC_aligned'] / 100) * q_pe - ne_q_scaled = ne_pristine.copy() - ne_q_scaled['Q_aligned'] = (ne_q_scaled['SOC_aligned'] / 100) * q_ne - - # translate pristine ne electrode with lli - ne_q_scaled['Q_aligned'] = ne_q_scaled['Q_aligned'] + lli - - # Re-interpolate to align dataframes for differencing - lower_q = np.min((np.min(pe_q_scaled['Q_aligned']), - np.min(ne_q_scaled['Q_aligned']))) - upper_q = np.max((np.max(pe_q_scaled['Q_aligned']), - np.max(ne_q_scaled['Q_aligned']))) - q_vec = np.linspace(lower_q, upper_q, 1001) - - # Actually aligning the electrode Q's - pe_pristine_interper = interp1d(pe_q_scaled['Q_aligned'], - pe_q_scaled['Voltage_aligned'], bounds_error=False) - pe_degraded = pe_q_scaled.copy() - pe_degraded['Q_aligned'] = q_vec - pe_degraded['Voltage_aligned'] = pe_pristine_interper(q_vec) - - ne_pristine_interper = interp1d(ne_q_scaled['Q_aligned'], - ne_q_scaled['Voltage_aligned'], bounds_error=False) - ne_degraded = ne_q_scaled.copy() - ne_degraded['Q_aligned'] = q_vec - ne_degraded['Voltage_aligned'] = ne_pristine_interper(q_vec) - - # Returning pe and ne degraded on an Ah basis - return pe_degraded, ne_degraded - - def halfcell_degradation_matching_ah(self, x, *params): - """ - Calls underlying functions to impose degradation through electrode - capacity scale and alignment through LLI. Modifies emulated full cell - data to be within full cell voltage range and calibrates (zeros) capacity - at the lowest permissible voltage. Interpolates real and emulated data onto - a common capacity axis. - - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - Outputs: - pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, - offset, and aligned along capacity - ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, - offset, and aligned along capacity - - df_real_aligned (Dataframe): capacity and voltage interpolated evenly across - capacity for the real cell data - emulated_full_cell_aligned (Dataframe): capacity and voltage interpolated evenly - across capacity for the emulated cell data - """ - - lli = x[0] - q_pe = x[1] - q_ne = x[2] - x_ne_2 = x[3] - - (pe_pristine, - ne_1_pristine, - ne_2_pristine_pos, - ne_2_pristine_neg, - real_cell_candidate_charge_profile_aligned) = params - - # output degraded ne and pe (on a AH basis, with electrode alignment - # (NaNs for voltage, when no capacity actually at the corresponding capacity index)) - pe_out, ne_out = self._impose_electrode_scale(pe_pristine, ne_1_pristine, - ne_2_pristine_pos, ne_2_pristine_neg, - lli, q_pe, - q_ne, x_ne_2) - - # PE - NE = full cell voltage - emulated_full_cell_with_degradation = pd.DataFrame() - emulated_full_cell_with_degradation['Q_aligned'] = pe_out['Q_aligned'].copy() - emulated_full_cell_with_degradation['Voltage_aligned'] = pe_out['Voltage_aligned'] - ne_out['Voltage_aligned'] - - # Replace emulated full cell values outside of voltage range with NaN - emulated_full_cell_with_degradation['Voltage_aligned'].loc[ - emulated_full_cell_with_degradation['Voltage_aligned'] < self.FC_LOWER_VOLTAGE] = np.nan - emulated_full_cell_with_degradation['Voltage_aligned'].loc[ - emulated_full_cell_with_degradation['Voltage_aligned'] > self.FC_UPPER_VOLTAGE] = np.nan - - # Center the emulated full cell and half cell curves onto the same Q at which the real (degraded) - # capacity measurement started (self.FC_LOWER_VOLTAGE) - emulated_full_cell_with_degradation_zeroed = pd.DataFrame() - - emulated_full_cell_with_degradation_zeroed['Voltage_aligned'] = emulated_full_cell_with_degradation[ - 'Voltage_aligned'].copy() - - zeroing_value = emulated_full_cell_with_degradation['Q_aligned'].loc[ - np.nanargmin(emulated_full_cell_with_degradation['Voltage_aligned']) - ] - - emulated_full_cell_with_degradation_zeroed['Q_aligned'] = \ - (emulated_full_cell_with_degradation['Q_aligned'].copy() - zeroing_value) - - pe_out_zeroed = pe_out.copy() - pe_out_zeroed['Q_aligned'] = pe_out['Q_aligned'] - zeroing_value - - ne_out_zeroed = ne_out.copy() - ne_out_zeroed['Q_aligned'] = ne_out['Q_aligned'] - zeroing_value - - # Interpolate full cell profiles across same Q range - min_q = np.min( - real_cell_candidate_charge_profile_aligned['Q_aligned'].loc[ - ~real_cell_candidate_charge_profile_aligned['Voltage_aligned'].isna()]) - max_q = np.max( - real_cell_candidate_charge_profile_aligned['Q_aligned'].loc[ - ~real_cell_candidate_charge_profile_aligned['Voltage_aligned'].isna()]) - emulated_interper = interp1d(emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[ - ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()], - emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].loc[ - ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()], - bounds_error=False) - real_interper = interp1d( - real_cell_candidate_charge_profile_aligned['Q_aligned'].loc[ - ~real_cell_candidate_charge_profile_aligned['Voltage_aligned'].isna()], - real_cell_candidate_charge_profile_aligned['Voltage_aligned'].loc[ - ~real_cell_candidate_charge_profile_aligned['Voltage_aligned'].isna()], - bounds_error=False) - - q_vec = np.linspace(min_q, max_q, 1001) - - emulated_aligned = pd.DataFrame() - emulated_aligned['Q_aligned'] = q_vec - emulated_aligned['Voltage_aligned'] = emulated_interper(q_vec) - - real_aligned = pd.DataFrame() - real_aligned['Q_aligned'] = q_vec - real_aligned['Voltage_aligned'] = real_interper(q_vec) - - return pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned - - def get_dqdv_over_v_from_degradation_matching_ah(self, x, *params): - """ - This function imposes degradation scaling ,then outputs the dqdv representation of the emulated cell data. - - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - - Outputs: - pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, - offset, and aligned along capacity - ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, - offset, and aligned along capacity - dq_dv_over_v_real (Dataframe): dqdv across voltage for the real cell data - dq_dv_over_v_emulated (Dataframe): dqdv across voltage for the emulated cell data - df_real_interped (Dataframe): capacity and voltage interpolated evenly across - capacity for the real cell data - emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly - across capacity for the emulated cell data - """ - - pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped = \ - self.halfcell_degradation_matching_ah(x, *params) - - # Calculate dqdv from full cell profiles - dq_dv_real = pd.DataFrame(np.gradient(df_real_interped['Q_aligned'], df_real_interped['Voltage_aligned']), - columns=['dQdV']).ewm(0.1).mean() - dq_dv_emulated = pd.DataFrame( - np.gradient(emulated_full_cell_interped['Q_aligned'], emulated_full_cell_interped['Voltage_aligned']), - columns=['dQdV']).ewm(0.1).mean() - - # Include original data - dq_dv_real['Q_aligned'] = df_real_interped['Q_aligned'] - dq_dv_real['Voltage_aligned'] = df_real_interped['Voltage_aligned'] - - dq_dv_emulated['Q_aligned'] = emulated_full_cell_interped['Q_aligned'] - dq_dv_emulated['Voltage_aligned'] = emulated_full_cell_interped['Voltage_aligned'] - - # Interpolate dQdV and Q over V, aligns real and emulated over V - voltage_vec = np.linspace(self.FC_LOWER_VOLTAGE, self.FC_UPPER_VOLTAGE, 1001) - - v_dq_dv_interper_real = interp1d(dq_dv_real['Voltage_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()], - dq_dv_real['dQdV'].loc[~dq_dv_real['Voltage_aligned'].isna()], - bounds_error=False, fill_value=0) - v_q_interper_real = interp1d(dq_dv_real['Voltage_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()], - dq_dv_real['Q_aligned'].loc[~dq_dv_real['Voltage_aligned'].isna()], - bounds_error=False, fill_value=(0, np.max(df_real_interped['Q_aligned']))) - - v_dq_dv_interper_emulated = interp1d(dq_dv_emulated['Voltage_aligned'].loc[ - ~dq_dv_emulated['Voltage_aligned'].isna()], - dq_dv_emulated['dQdV'].loc[~dq_dv_emulated['Voltage_aligned'].isna()], - bounds_error=False, fill_value=0) - v_q_interper_emulated = interp1d(dq_dv_emulated['Voltage_aligned'].loc[ - ~dq_dv_emulated['Voltage_aligned'].isna()], - dq_dv_emulated['Q_aligned'].loc[~dq_dv_emulated['Voltage_aligned'].isna()], - bounds_error=False, fill_value=(0, np.max(df_real_interped['Q_aligned']))) - - dq_dv_over_v_real = pd.DataFrame(v_dq_dv_interper_real(voltage_vec), columns=['dQdV']).fillna(0) - dq_dv_over_v_real['Q_aligned'] = v_q_interper_real(voltage_vec) - dq_dv_over_v_real['Voltage_aligned'] = voltage_vec - - dq_dv_over_v_emulated = pd.DataFrame(v_dq_dv_interper_emulated(voltage_vec), columns=['dQdV']).fillna(0) - dq_dv_over_v_emulated['Q_aligned'] = v_q_interper_emulated(voltage_vec) - dq_dv_over_v_emulated['Voltage_aligned'] = voltage_vec - - return (pe_out_zeroed, - ne_out_zeroed, - dq_dv_over_v_real, - dq_dv_over_v_emulated, - df_real_interped, - emulated_full_cell_interped) - - def get_dvdq_over_q_from_degradation_matching_ah(self, x, *params): - """ - This function imposes degradation scaling ,then outputs the dVdQ representation of the emulated cell data. - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - Outputs: - pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, - offset, and aligned along capacity - ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, - offset, and aligned along capacity - dv_dq_real (Dataframe): dVdQ across capacity for the real cell data - dv_dq_emulated (Dataframe): dVdQ across capacity for the emulated cell data - df_real_interped (Dataframe): capacity and voltage interpolated evenly across - capacity for the real cell data - emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly - across capacity for the emulated cell data - """ - - pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped = \ - self.halfcell_degradation_matching_ah(x, *params) - - # Calculate dQdV from full cell profiles - dv_dq_real = pd.DataFrame(np.gradient(df_real_interped['Voltage_aligned'], df_real_interped['Q_aligned']), - columns=['dVdQ']).ewm(0.1).mean() - dv_dq_emulated = pd.DataFrame( - np.gradient(emulated_full_cell_interped['Voltage_aligned'], emulated_full_cell_interped['Q_aligned']), - columns=['dVdQ']).ewm(0.1).mean() - - # Include original data - dv_dq_real['Q_aligned'] = df_real_interped['Q_aligned'] - dv_dq_real['Voltage_aligned'] = df_real_interped['Voltage_aligned'] - - dv_dq_emulated['Q_aligned'] = emulated_full_cell_interped['Q_aligned'] - dv_dq_emulated['Voltage_aligned'] = emulated_full_cell_interped['Voltage_aligned'] - - # Q interpolation not needed, as interpolated over Q by default - - return (pe_out_zeroed, - ne_out_zeroed, - dv_dq_real, - dv_dq_emulated, - df_real_interped, - emulated_full_cell_interped) - - def get_v_over_q_from_degradation_matching_ah(self, x, *params): - """ - This function imposes degradation scaling ,then outputs the V-Q representation of the emulated cell data. - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - Outputs: - pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, - offset, and aligned along capacity - ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, - offset, and aligned along capacity - df_real_interped (Dataframe): capacity and voltage interpolated evenly across - capacity for the real cell data - emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly - across capacity for the emulated cell data - """ - (pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned) = \ - self.halfcell_degradation_matching_ah(x, *params) - - min_soc_full_cell = np.min(real_aligned.loc[~real_aligned.Voltage_aligned.isna()].Q_aligned) - max_soc_full_cell = np.max(real_aligned.loc[~real_aligned.Voltage_aligned.isna()].Q_aligned) - - soc_vec_full_cell = np.linspace(min_soc_full_cell, max_soc_full_cell, 1001) - - emulated_full_cell_interper = interp1d( - emulated_aligned.Q_aligned.loc[~real_aligned.Voltage_aligned.isna()], - emulated_aligned.Voltage_aligned.loc[~real_aligned.Voltage_aligned.isna()], - bounds_error=False) - real_full_cell_interper = interp1d(real_aligned.Q_aligned.loc[~real_aligned.Voltage_aligned.isna()], - real_aligned.Voltage_aligned.loc[~real_aligned.Voltage_aligned.isna()], - bounds_error=False) - - # Interpolate the emulated full-cell profile - emulated_full_cell_interped = pd.DataFrame() - emulated_full_cell_interped['Q_aligned'] = soc_vec_full_cell - emulated_full_cell_interped['Voltage_aligned'] = emulated_full_cell_interper(soc_vec_full_cell) - - # Interpolate the true full-cell profile - df_real_interped = emulated_full_cell_interped.copy() - df_real_interped['Q_aligned'] = soc_vec_full_cell - df_real_interped['Voltage_aligned'] = real_full_cell_interper(soc_vec_full_cell) - return pe_out_zeroed, ne_out_zeroed, df_real_interped, emulated_full_cell_interped - - def get_v_over_q_from_degradation_matching_ah_no_real(self, x, *params): - """ - This function imposes degradation scaling ,then outputs the V-Q representation of the - emulated cell data, in the absence of real cell data. - - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - - Outputs: - pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, - offset, and aligned along capacity - ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, - offset, and aligned along capacity - emulated_full_cell_interped (Dataframe): capacity and voltage interpolated evenly - across capacity for the emulated cell data - - """ - (pe_out_zeroed, ne_out_zeroed, emulated_aligned) = \ - self.halfcell_degradation_matching_ah_no_real(x, *params) - - min_q_full_cell = np.min(emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned) - max_q_full_cell = np.max(emulated_aligned.loc[~emulated_aligned.Voltage_aligned.isna()].Q_aligned) - - q_vec_full_cell = np.linspace(min_q_full_cell, max_q_full_cell, 1001) - - emulated_full_cell_interper = interp1d( - emulated_aligned.Q_aligned.loc[~emulated_aligned.Voltage_aligned.isna()], - emulated_aligned.Voltage_aligned.loc[~emulated_aligned.Voltage_aligned.isna()], - bounds_error=False) - - # Interpolate the emulated full-cell profile - emulated_full_cell_interped = pd.DataFrame() - emulated_full_cell_interped['Q_aligned'] = q_vec_full_cell - emulated_full_cell_interped['Voltage_aligned'] = emulated_full_cell_interper(q_vec_full_cell) - - return pe_out_zeroed, ne_out_zeroed, emulated_full_cell_interped - - def halfcell_degradation_matching_ah_no_real(self, x, *params): - """ - Calls underlying functions to impose degradation through electrode - capacity scale and alignment through LLI. Modifies emulated full cell - data to be within full cell voltage range and calibrates (zeros) capacity - at the lowest permissible voltage. - - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - - Outputs: - pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, - offset, and aligned along capacity - ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, - offset, and aligned along capacity - emulated_aligned (Dataframe): full cell data corresponding to the imposed degradation - """ - lli = x[0] - q_pe = x[1] - q_ne = x[2] - x_ne_2 = x[3] - - pe_pristine, ne_1_pristine, ne_2_pristine_pos, ne_2_pristine_neg = params - - pe_out, ne_out = self._impose_electrode_scale(pe_pristine, ne_1_pristine, - ne_2_pristine_pos, ne_2_pristine_neg, - lli, q_pe, - q_ne, - x_ne_2) - # outputs degraded ne and pe (on a AH basis, with electrode alignment (NaNs for voltage, when no overlap)) - - emulated_full_cell_with_degradation = pd.DataFrame() - emulated_full_cell_with_degradation['Q_aligned'] = pe_out['Q_aligned'].copy() - emulated_full_cell_with_degradation['Voltage_aligned'] = pe_out['Voltage_aligned'] - ne_out['Voltage_aligned'] - - # Replace emulated full cell values outside of voltage range with NaN - emulated_full_cell_with_degradation['Voltage_aligned'].loc[ - emulated_full_cell_with_degradation['Voltage_aligned'] < self.FC_LOWER_VOLTAGE] = np.nan - emulated_full_cell_with_degradation['Voltage_aligned'].loc[ - emulated_full_cell_with_degradation['Voltage_aligned'] > self.FC_UPPER_VOLTAGE] = np.nan - - # Center the emulated full cell and half cell curves onto the same Q at which the real (degraded) - # capacity measurement started (self.FC_LOWER_VOLTAGE) - emulated_full_cell_with_degradation_zeroed = pd.DataFrame() - - emulated_full_cell_with_degradation_zeroed['Voltage_aligned'] = emulated_full_cell_with_degradation[ - 'Voltage_aligned'] - - zeroing_value = emulated_full_cell_with_degradation['Q_aligned'].loc[ - np.nanargmin(emulated_full_cell_with_degradation['Voltage_aligned']) - ] - - emulated_full_cell_with_degradation_zeroed['Q_aligned'] = \ - (emulated_full_cell_with_degradation['Q_aligned'] - zeroing_value) - - pe_out_zeroed = pe_out.copy() - pe_out_zeroed['Q_aligned'] = pe_out['Q_aligned'] - zeroing_value - - ne_out_zeroed = ne_out.copy() - ne_out_zeroed['Q_aligned'] = ne_out['Q_aligned'] - zeroing_value - - # Interpolate full profiles across same Q range - min_q = np.min( - emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[ - ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()]) - max_q = np.max( - emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[ - ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()]) - - emulated_interper = interp1d(emulated_full_cell_with_degradation_zeroed['Q_aligned'].loc[ - ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()], - emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].loc[ - ~emulated_full_cell_with_degradation_zeroed['Voltage_aligned'].isna()], - bounds_error=False) - - q_vec = np.linspace(min_q, max_q, 1001) - - emulated_aligned = pd.DataFrame() - emulated_aligned['Q_aligned'] = q_vec - emulated_aligned['Voltage_aligned'] = emulated_interper(q_vec) - - return pe_out_zeroed, ne_out_zeroed, emulated_aligned - - def _get_error_from_degradation_matching_ah(self, x, *params): - """ - Wrapper function which selects the correct error sub routine and returns its error value. - - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - - Outputs: - error value (float) - output of the specified error sub function - """ - error_type = self.error_type - if error_type == 'V-Q': - return self._get_error_from_degradation_matching_v_q(x, *params)[0] - elif error_type == 'dVdQ': - return self._get_error_from_degradation_matching_dvdq(x, *params)[0] - elif error_type == 'dQdV': - return self._get_error_from_degradation_matching_dqdv(x, *params)[0] - else: - return self._get_error_from_degradation_matching_v_q(x, *params)[0] - - def _get_error_from_degradation_matching_v_q(self, x, *params): - """ - Error function returning the mean standardized Euclidean distance of each point of the real curve to - the closest value on the emulated curve in the V-Q representation. - - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - - Outputs: - error (float): output of the specified error sub function - error_vector (array): vector containingEuclidean distance of each point of the real curve to - the closest value on the emulated curve in the V-Q representation - xa (Dataframe): real full cell data used for error analysis - xb (Dataframe): emulated full cell data used for error analysis - """ - - try: - (pe_out_zeroed, ne_out_zeroed, real_aligned, emulated_aligned - ) = self.get_v_over_q_from_degradation_matching_ah(x, *params) - - xa = real_aligned.dropna() - xb = emulated_aligned.dropna() - error_matrix = distance.cdist(xa, xb, 'seuclidean') - error_vector = error_matrix.min(axis=1) - error = error_vector.mean() - except ValueError: - error = 100 - return error, None, None, None - return error, error_vector, xa, xb - - # Pairwise euclidean from premade dQdV - - def _get_error_from_degradation_matching_dqdv(self, x, *params): - """ - Error function returning the mean standardized Euclidean distance of each point of the real curve to - the closest value on the emulated curve in the dQdV representation. - - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - - Outputs: - error (float): output of the specified error sub function - error_vector (array): vector containing Euclidean distance of each point of the real curve to - the closest value on the emulated curve in the dQdV representation - xa (Dataframe): real full cell data used for error analysis - xb (Dataframe): emulated full cell data used for error analysis - """ - - try: - # Call dQdV generating function - (pe_out_zeroed, - ne_out_zeroed, - dqdv_over_v_real, - dqdv_over_v_emulated, - df_real_interped, - emulated_full_cell_interped) = self.get_dqdv_over_v_from_degradation_matching_ah(x, *params) - - xa = dqdv_over_v_real[['Voltage_aligned', 'dQdV']].dropna() - xb = dqdv_over_v_emulated[['Voltage_aligned', 'dQdV']].dropna() - error_matrix = distance.cdist(xa, xb, 'seuclidean') - error_vector = error_matrix.min(axis=1) - error = error_vector.mean() - - except ValueError: - error = 100 - return error, None, None, None - return error, error_vector, xa, xb - - def _get_error_from_degradation_matching_dvdq(self, x, *params): - """ - Error function returning the mean standardized Euclidean distance of each point of the real curve to - the closest value on the emulated curve in the dVdQ representation. - - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - - Outputs: - error (float): output of the specified error sub function - error_vector (array): vector containing Euclidean distance of each point of the real curve to - the closest value on the emulated curve in the dVdQ representation - xa (Dataframe): real full cell data used for error analysis - xb (Dataframe): emulated full cell data used for error analysis - """ - - try: - (pe_out_zeroed, - ne_out_zeroed, - dvdq_over_q_real, - dvdq_over_q_emulated, - df_real_interped, - emulated_full_cell_interped) = self.get_dvdq_over_q_from_degradation_matching_ah(x, *params) - - xa = dvdq_over_q_real[['Q_aligned', 'dVdQ']].dropna() - xb = dvdq_over_q_emulated[['Q_aligned', 'dVdQ']].dropna() - - # down-select to values with capacity more than 0.5 Ahr to eliminate high-slope region of dVdQ - xa = xa.loc[(xa.Q_aligned > 0.5)] - xb = xb.loc[(xb.Q_aligned > 0.5)] - - error_matrix = distance.cdist(xa, xb, 'seuclidean') - error_vector = error_matrix.min(axis=1) - error = error_vector.mean() - - except ValueError: - error = 100 - return error, None, None, None - return error, error_vector, xa, xb - - def _get_error_from_synthetic_fitting_ah(self, x, *params): - """ - Wrapper function which selects the correct error sub routine and returns its error value. - This function is specific to fitting synthetic data rather than real cycling data. - - Inputs: - x (list): [LLI, q_pe, q_ne, x_ne_2] - *params: - pe_pristine (Dataframe): half cell data of the pristine (uncycled) positive - electrode - ne_pristine (Dataframe): half cell data of the pristine (uncycled) negative - electrode - ne_2_pos (Dataframe): half cell data for the positive component of the anode - ne_2_neg (Dataframe): half cell data for the negative component of the anode - real_cell_candidate_charge_profile_aligned (Dataframe): columns Q_aligned - (evenly spaced) and Voltage_aligned - - Outputs: - error value (float) - output of the specified error sub function - """ - - error_type = self.error_type - - try: - if error_type == 'V-Q': - return self._get_error_from_degradation_matching_v_q(x, *params)[0] - elif error_type == 'dVdQ': - return self._get_error_from_degradation_matching_dvdq(x, *params)[0] - elif error_type == 'dQdV': - return self._get_error_from_degradation_matching_dvdq(x, *params)[0] - else: - return self._get_error_from_degradation_matching_v_q(x, *params)[0] - except RuntimeError: - print("Can't return error") - return 100 - - def intracell_values_wrapper_ah(self, - cycle_index, - cell_struct, - degradation_bounds=None - ): - """ - Wrapper function to solve capacity sizing and offset of reference electrodes to real full cell cycle data. - - Inputs: - cycle_index (int): the index of the cycle of interest of the structured real cycling data - cell_struct (MaccorDatapath): BEEP structured cycling data - - Outputs: - loss_dict (dict): dictionary with key of cycle index and entry of a list of - error, lli_opt, q_pe_opt, q_ne_opt, x_ne_2, Q_li - profiles_dict (dict): dictionary with key of cycle index and entry of a dictionary - containing various key/entry pairs of resulting from the fitting - """ - if degradation_bounds is None: - degradation_bounds = ((0, 3), # LLI - (2.5, 6.5), # q_pe - (2.5, 6.5), # q_ne - (1, 1), # (-1,1) x_ne_2 - ) - - real_cell_candidate_charge_profile_aligned = self.process_beep_cycle_data_for_candidate_halfcell_analysis_ah( - cell_struct, - cycle_index) - - degradation_optimization_result = differential_evolution(self._get_error_from_degradation_matching_ah, - degradation_bounds, - args=(self.pe_pristine, - self.ne_1_pristine, - self.ne_2_pristine_pos, - self.ne_2_pristine_neg, - real_cell_candidate_charge_profile_aligned - ), - strategy='best1bin', maxiter=100000, - popsize=15, tol=0.001, mutation=0.5, - recombination=0.7, - seed=1, - callback=None, disp=False, polish=True, - init='latinhypercube', - atol=0, updating='deferred', workers=-1, - constraints=() - ) - # print(degradation_optimization_result.x) #BVV - - (pe_out_zeroed, - ne_out_zeroed, - dqdv_over_v_real, - dqdv_over_v_emulated, - df_real_interped, - emulated_full_cell_interped) = self.get_dqdv_over_v_from_degradation_matching_ah( - degradation_optimization_result.x, - self.pe_pristine, - self.ne_1_pristine, - self.ne_2_pristine_pos, - self.ne_2_pristine_neg, - real_cell_candidate_charge_profile_aligned) - # - electrode_info_df = get_electrode_info_ah(pe_out_zeroed, ne_out_zeroed) - # - error = degradation_optimization_result.fun - lli_opt = degradation_optimization_result.x[0] - q_pe_opt = degradation_optimization_result.x[1] - q_ne_opt = degradation_optimization_result.x[2] - x_ne_2 = degradation_optimization_result.x[3] - - loss_dict = {cycle_index: np.append([error, lli_opt, q_pe_opt, q_ne_opt, - x_ne_2], - electrode_info_df.iloc[-1].values) - } - - profiles_per_cycle_dict = { - 'NE_zeroed': ne_out_zeroed, - 'PE_zeroed': pe_out_zeroed, - 'dQdV_over_v_real': dqdv_over_v_real, - 'dQdV_over_v_emulated': dqdv_over_v_emulated, - 'df_real_interped': df_real_interped, - 'emulated_full_cell_interped': emulated_full_cell_interped, - 'real_cell_candidate_charge_profile_aligned': real_cell_candidate_charge_profile_aligned - } - - profiles_dict = {cycle_index: profiles_per_cycle_dict} - - return loss_dict, profiles_dict - - def solve_emulated_degradation(self, - forward_simulated_profile, - degradation_bounds=None - ): - - """ - - - """ - - if degradation_bounds is None: - degradation_bounds = ((0, 3), # LLI - (2.5, 6.5), # q_pe - (2.5, 6.5), # q_ne - (1, 1), # (-1,1) x_ne_2 - ) - - degradation_optimization_result = differential_evolution(self._get_error_from_synthetic_fitting_ah, - degradation_bounds, - args=(self.pe_pristine, - self.ne_1_pristine, - self.ne_2_pristine_pos, - self.ne_2_pristine_neg, - forward_simulated_profile, - ), - strategy='best1bin', maxiter=100000, - popsize=15, tol=0.001, mutation=0.5, - recombination=0.7, - seed=1, - callback=None, disp=False, polish=True, - init='latinhypercube', - atol=0, updating='deferred', workers=-1, - constraints=() - ) - - return degradation_optimization_result - - -# TODO revisit this function -def blend_electrodes(electrode_1, electrode_2_pos, electrode_2_neg, x_2): - """ - Blends two electrode materials from their SOC-V profiles to form a blended electrode. - - Inputs: - electrode_1: Primary material in electrode, typically Gr. DataFrame supplied with SOC evenly spaced and voltage. - electrode_2: Secondary material in electrode, typically Si. DataFrame supplied with SOC evenly spaced and - voltage as an additional column. - x_2: Fraction of electrode_2 material's capacity (not mass). Supplied as scalar value. - - Outputs: - df_blended_soc_mod (Dataframe): blended electrode with SOC_aligned and Voltage_aligned columns - """ - if electrode_2_pos.empty: - df_blended = electrode_1 - return df_blended - - if electrode_2_neg.empty: - electrode_2 = electrode_2_pos - x_2 = np.abs(x_2) - elif x_2 > 0: - electrode_2 = electrode_2_pos - else: - electrode_2 = electrode_2_neg - x_2 = np.abs(x_2) - - electrode_1_interper = interp1d(electrode_1['Voltage_aligned'], electrode_1['SOC_aligned'], bounds_error=False, - fill_value='extrapolate') - electrode_2_interper = interp1d(electrode_2['Voltage_aligned'], electrode_2['SOC_aligned'], bounds_error=False, - fill_value='extrapolate') - - voltage_vec = np.linspace(np.min((np.min(electrode_1['Voltage_aligned']), - np.min(electrode_2['Voltage_aligned']))), - np.max((np.max(electrode_1['Voltage_aligned']), - np.max(electrode_2['Voltage_aligned']))), - 1001) - electrode_1_voltage_aligned = pd.DataFrame(electrode_1_interper(voltage_vec), columns=['SOC']) - electrode_2_voltage_aligned = pd.DataFrame(electrode_2_interper(voltage_vec), columns=['SOC']) - electrode_1_voltage_aligned['Voltage'] = voltage_vec - electrode_2_voltage_aligned['Voltage'] = voltage_vec - - df_blend_voltage_aligned = pd.DataFrame( - (1 - x_2) * electrode_1_voltage_aligned['SOC'] + x_2 * electrode_2_voltage_aligned['SOC'], columns=['SOC']) - df_blend_voltage_aligned['Voltage'] = electrode_1_voltage_aligned.merge(electrode_2_voltage_aligned, - on='Voltage')['Voltage'] - - df_blended_interper = interp1d(df_blend_voltage_aligned['SOC'], df_blend_voltage_aligned['Voltage'], - bounds_error=False) - soc_vec = np.linspace(0, 100, 1001) - - df_blended = pd.DataFrame(df_blended_interper(soc_vec), columns=['Voltage_aligned']) - df_blended['SOC_aligned'] = soc_vec - - # Modify NE to fully span 100% SOC within its valid voltage window - df_blended_soc_mod_interper = interp1d(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()], - df_blended['Voltage_aligned'].loc[~df_blended['Voltage_aligned'].isna()], - bounds_error=False) - soc_vec = np.linspace(np.min(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()]), - np.max(df_blended['SOC_aligned'].loc[~df_blended['Voltage_aligned'].isna()]), - 1001) - df_blended_soc_mod = pd.DataFrame(df_blended_soc_mod_interper(soc_vec), columns=['Voltage_aligned']) - df_blended_soc_mod['SOC_aligned'] = soc_vec / np.max(soc_vec) * 100 - return df_blended_soc_mod - - -def get_electrode_info_ah(pe_out_zeroed, ne_out_zeroed): - """ - Calculates a variety of half-cell metrics at various positions in the full-cell profile. - - Inputs: - pe_out_zeroed (Dataframe): cathode capacity and voltage columns scaled, - offset, and aligned along capacity - ne_out_zeroed (Dataframe): anode capacity and voltage columns scaled, - offset, and aligned along capacity - - Outputs: - electrode_info_df (Dataframe): dataframe containing a variety of half-cell metrics - at various positions in the emulated full-cell profile. - - pe_voltage_FC4p2V: voltage of the positive electrode (catahode) corresponding - to the full cell at 4.2V - ... - pe_voltage_FC2p7V: voltage of the positive electrode (catahode) corresponding - to the full cell at 2.7V - - pe_soc_FC4p2V: state of charge of the positive electrode corresponding - to the full cell at 4.2V - ... - pe_soc_FC2p7V: state of charge of the positive electrode corresponding - to the full cell at 2.7V - - ne_voltage_FC4p2V: voltage of the negative electrode (anode) corresponding - to the full cell at 4.2V - ... - ne_voltage_FC2p7V: voltage of the negative electrode (anode) corresponding - to the full cell at 2.7V - - ne_soc_FC4p2V: state of charge of the anode electrode corresponding - to the full cell at 4.2V - ... - ne_soc_FC2p7V: state of charge of the anode electrode corresponding - to the full cell at 2.7V - - Q_fc: capacity of the full cecll within the full cell voltage limits - q_pe: capacity of the cathode - q_ne: capacity of the anode [Ahr] - Q_li - """ - pe_minus_ne_zeroed = pd.DataFrame(pe_out_zeroed['Voltage_aligned'] - ne_out_zeroed['Voltage_aligned'], - columns=['Voltage_aligned']) - pe_minus_ne_zeroed['Q_aligned'] = pe_out_zeroed['Q_aligned'] - - electrode_info_df = pd.DataFrame(index=[0]) - - electrode_info_df['pe_voltage_FC4p2V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Voltage_aligned - electrode_info_df['pe_voltage_FC4p1V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Voltage_aligned - electrode_info_df['pe_voltage_FC4p0V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p9V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p8V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p7V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p6V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p5V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p4V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p3V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p2V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p1V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Voltage_aligned - electrode_info_df['pe_voltage_FC3p0V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Voltage_aligned - electrode_info_df['pe_voltage_FC2p9V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Voltage_aligned - electrode_info_df['pe_voltage_FC2p8V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Voltage_aligned - electrode_info_df['pe_voltage_FC2p7V'] = pe_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Voltage_aligned - - electrode_info_df['pe_soc_FC4p2V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 4.2V - electrode_info_df['pe_soc_FC4p1V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 4.1V - electrode_info_df['pe_soc_FC4p0V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 4.0V - electrode_info_df['pe_soc_FC3p9V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.9V - electrode_info_df['pe_soc_FC3p8V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.8V - electrode_info_df['pe_soc_FC3p7V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.7V - electrode_info_df['pe_soc_FC3p6V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.6V - electrode_info_df['pe_soc_FC3p5V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.5V - electrode_info_df['pe_soc_FC3p4V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.4V - electrode_info_df['pe_soc_FC3p3V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.3V - electrode_info_df['pe_soc_FC3p2V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.2V - electrode_info_df['pe_soc_FC3p1V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.1V - electrode_info_df['pe_soc_FC3p0V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.0V - electrode_info_df['pe_soc_FC2p9V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 2.9V - electrode_info_df['pe_soc_FC2p8V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 2.8V - electrode_info_df['pe_soc_FC2p7V'] = ( - (pe_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Q_aligned - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - np.min(pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()])) - ) # 2.7V - - electrode_info_df['ne_voltage_FC4p2V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Voltage_aligned - electrode_info_df['ne_voltage_FC4p1V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Voltage_aligned - electrode_info_df['ne_voltage_FC4p0V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p9V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p8V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p7V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p6V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p5V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p4V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p3V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p2V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p1V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Voltage_aligned - electrode_info_df['ne_voltage_FC3p0V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Voltage_aligned - electrode_info_df['ne_voltage_FC2p9V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Voltage_aligned - electrode_info_df['ne_voltage_FC2p8V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Voltage_aligned - electrode_info_df['ne_voltage_FC2p7V'] = ne_out_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Voltage_aligned - - electrode_info_df['ne_soc_FC4p2V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.2))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 4.2V - electrode_info_df['ne_soc_FC4p1V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.1))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 4.1V - electrode_info_df['ne_soc_FC4p0V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.0))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 4.0V - electrode_info_df['ne_soc_FC3p9V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.9))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.9V - electrode_info_df['ne_soc_FC3p8V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.8))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3Q_aligned.8V - electrode_info_df['ne_soc_FC3p7V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.7))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.7V - electrode_info_df['ne_soc_FC3p6V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.6))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.6V - electrode_info_df['ne_soc_FC3p5V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.5))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.5V - electrode_info_df['ne_soc_FC3p4V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.4))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.4V - electrode_info_df['ne_soc_FC3p3V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.3))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.3V - electrode_info_df['ne_soc_FC3p2V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.2))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.2V - electrode_info_df['ne_soc_FC3p1V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.1))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.1V - electrode_info_df['ne_soc_FC3p0V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 3.0))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 3.0V - electrode_info_df['ne_soc_FC2p9V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.9))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 2.9V - electrode_info_df['ne_soc_FC2p8V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.8))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 2.8V - electrode_info_df['ne_soc_FC2p7V'] = ( - (ne_out_zeroed.loc[np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 2.7))].Q_aligned - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) / ( - np.max(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - np.min(ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()])) - ) # 2.7V - - electrode_info_df['Q_fc'] = pe_minus_ne_zeroed.loc[ - np.argmin(np.abs(pe_minus_ne_zeroed.Voltage_aligned - 4.20))].Q_aligned - - electrode_info_df['Q_pe'] = np.max( - pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - np.min( - pe_out_zeroed['Q_aligned'].loc[~pe_out_zeroed['Voltage_aligned'].isna()]) - - electrode_info_df['Q_ne'] = np.max( - ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - np.min( - ne_out_zeroed['Q_aligned'].loc[~ne_out_zeroed['Voltage_aligned'].isna()]) - - electrode_info_df['Q_li'] = np.max( - pe_minus_ne_zeroed['Q_aligned'].loc[~pe_minus_ne_zeroed.Voltage_aligned.isna()]) - np.min( - pe_minus_ne_zeroed['Q_aligned'].loc[~pe_minus_ne_zeroed.Voltage_aligned.isna()]) - - return electrode_info_df diff --git a/beep/features/matrix.py b/beep/features/matrix.py new file mode 100644 index 00000000..872c93e2 --- /dev/null +++ b/beep/features/matrix.py @@ -0,0 +1,490 @@ + +import copy +import hashlib +from typing import List, Union + +import pandas as pd +from monty.json import MSONable, MontyDecoder +from monty.serialization import loadfn, dumpfn + +from beep.features.featurizer import BEEPFeaturizer, BEEPPerCycleFeaturizer, BEEPEarlyCyclesFeaturizer + + +class BEEPFeatureMatrixError(BaseException): + """ Raise when there is a BEEP-specific problem with a dataset""" + pass + + +class BEEPFeatureMatrix(MSONable): + """ + Create an array composed of BEEPFeaturizer objects. + + The array may either be: + + PER-CYCLER-RUN, using BEEPEarlyCyclesFeaturizer. + One feature vector per cycler file, resulting in an array w. dimenions: + (n battery cycler files) x (k features) + + OR: + + PER-CYCLE, using BEEPPerCycleFeaturizer. + One feature vector per cycle, resulting in an array w. dimensions: + (n total cycles) x (k features) + + Sets of featurizer objects must exclusively belong to EITHER of these + two paradigms (base classes), but may not be mixed. + + So a set of featurizrs may be -per-cycler-file OR per-cycle, but not + both. + + Args: + beepfeaturizers ([BEEPFeaturizer]): A list of BEEPFeaturizer objects, + either ALL BEEPEarlyCyclesFeaturizer child objects OR ALL + BEEPPerCycleFeaturizer child objects. + + """ + + OP_DELIMITER = "::" + + def __init__( + self, + beepfeaturizers: List[Union[BEEPPerCycleFeaturizer, BEEPEarlyCyclesFeaturizer]] + ): + + if beepfeaturizers: + bfs_types_per_cycle = [bf.PER_CYCLE for bf in beepfeaturizers] + + # the array should be either all True or all False + if all(bfs_types_per_cycle): + self.per_cycle = True + elif not any(bfs_types_per_cycle): + self.per_cycle = False + else: + raise TypeError( + f"Featurizer types are mixed!\n" + f"BEEPFeatureMatrix can only use EITHER a set of ALL " + f"BEEPAllCyclesFeaturizers OR a set of ALL " + f"BEEPPerCycleFeaturizers.") + dfs_by_file = {bf.paths.get("structured", "no file found"): [] for bf in beepfeaturizers} + + unique_features = {} + for i, bf in enumerate(beepfeaturizers): + + if bf.features is None: + raise BEEPFeatureMatrixError( + f"BEEPFeaturizer {bf} has not created features") + else: + bfcn = bf.__class__.__name__ + + fname = bf.paths.get("structured", None) + if not fname: + raise BEEPFeatureMatrixError( + "Cannot join features automatically as no linking can be done " + "based on original structured filename." + ) + + + # Ensure no per-cycle featurizer is missing required special columns for merge + if self.per_cycle: + missing_special_columns = set() + for sf in BEEPPerCycleFeaturizer.SPECIAL_COLUMNS: + if sf not in bf.features.columns: + missing_special_columns.add(sf) + if missing_special_columns: + raise BEEPFeatureMatrixError( + f"Per cycle featurizer object missing special columns: {missing_special_columns}" + ) + + # Check for any possible feature collisions using identical featurizers + # on identical files + + # sort params for this featurizer obj by key + params = sorted(list(bf.hyperparameters.items()), key=lambda x: x[0]) + + # Prevent identical features from identical input files + # create a unique operation string for the application of this featurizer + # on a specific file, this op string will be the same as long as + # the featurizer class name, hyperparameters, and class are the same + + param_str = "-".join([f"{k}:{v}" for k, v in params]) + param_hash = hashlib.sha256( + param_str.encode("utf-8")).hexdigest() + + # Get an id for this featurizer operation (including hyperparameters) + # regardless of the file it is applied on + feature_op_id = f"{bfcn}{self.OP_DELIMITER}{param_hash}" + + # Get an id for this featurizer operation (including hyperparameters) + # on THIS SPECIFIC file. + file_feature_op_id = f"{fname}{self.OP_DELIMITER}{bfcn}{self.OP_DELIMITER}{param_hash}" + + # Get a unique id for every feature generated by a specific + # featurizer on a specific file. + this_file_feature_columns_ids = \ + [ + f"{file_feature_op_id}{self.OP_DELIMITER}{c}" for c + in bf.features.columns + ] + + # Check to make sure there are no duplicates of the exact same feature for + # the exact same featurizer with the exact same hyperparameters on the exact + # same file. + collisions = {c: f for c, f in unique_features.items() if + c in this_file_feature_columns_ids} + if collisions: + raise BEEPFeatureMatrixError( + f"Multiple features generated with identical classes and identical hyperparameters" + f" attempted to be joined into same dataset; \n" + f"{bfcn} features collide with existing: \n{collisions}" + ) + for c in this_file_feature_columns_ids: + unique_features[c] = bfcn + + # Create consistent scheme for naming features regardless of file + # Only rename non-special column names + df = copy.deepcopy(bf.features) + special_column_names = BEEPPerCycleFeaturizer.SPECIAL_COLUMNS if self.per_cycle else set() + consistent_column_names = [] + + for c in df.columns: + if c in special_column_names: + consistent_column_names.append(c) + else: + consistent_column_names.append(f"{c}{self.OP_DELIMITER}{feature_op_id}") + df.columns = consistent_column_names + + # ensure cycle_index and diag_pos are integers + if self.per_cycle: + for col in BEEPPerCycleFeaturizer.SPECIAL_COLUMNS: + df[col] = df[col].astype(int) + df["filename"] = [fname] * df.shape[0] + + else: + df.index = [fname] * df.shape[0] + df.index.rename("filename", inplace=True) + + dfs_by_file[fname].append(df) + + blocks = [] + self.dfs_by_file = dfs_by_file + indexing_cols = ["filename"] + list(BEEPPerCycleFeaturizer.SPECIAL_COLUMNS) + + # concat dfs by file across columns + for filename, dfs in dfs_by_file.items(): + if self.per_cycle: + rows = dfs[0] + for df in dfs[1:]: + rows = pd.merge( + rows, + df, + how="outer", + on=indexing_cols + ) + rows = rows.reset_index().sort_values(["cycle_index"]) + feature_cols = sorted([f for f in rows.columns if f not in indexing_cols]) + rows = rows[indexing_cols + feature_cols] + + else: + rows = pd.concat(dfs, axis=1) + rows = rows[sorted(rows.columns)] + + blocks.append(rows) + + # concat all dfs for all files across rows + self.matrix = pd.concat(blocks, axis=0) + + else: + self.matrix = None + + self.featurizers = beepfeaturizers + + def as_dict(self): + """Serialize a BEEPFeatureMatrix as a dictionary. + + Must not be loaded from legacy. + + Returns: + (dict): corresponding to dictionary for serialization. + + """ + + return { + "@module": self.__class__.__module__, + "@class": self.__class__.__name__, + + # Core parts of BEEPFeaturizer + "featurizers": [f.as_dict() for f in self.featurizers], + "matrix": self.matrix.to_dict("list"), + } + + @classmethod + def from_dict(cls, d): + """Create a BEEPFeatureMatrix object from a dictionary. + + Args: + d (dict): dictionary represenation. + + Returns: + beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun. + """ + # no need for original datapaths, as their ref paths should + # be in the subobjects + featurizers = [MontyDecoder().process_decoded(f) for f in + d["featurizers"]] + return cls(featurizers) + + @classmethod + def from_json_file(cls, filename): + """Load a structured run previously saved to file. + + .json.gz files are supported. + + Loads a BEEPFeatureMatrix from json. + + Can be used in combination with files serialized with BEEPFeatures.to_json_file. + + Args: + filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file. + + Returns: + None + """ + return loadfn(filename) + + def to_json_file(self, filename): + """Save a BEEPFeatureMatrix to disk as a json. + + .json.gz files are supported. + + Not named from_json to avoid conflict with MSONable.from_json(*) + + Args: + filename (str, Pathlike): The filename to save the file to. + omit_raw (bool): If True, saves only structured (NOT RAW) data. + More efficient for saving/writing to disk. + + Returns: + None + """ + d = self.as_dict() + dumpfn(d, filename) + + +# class BEEPCycleFeatureMatrix(MSONable): +# """ +# Create an ((n battery cycler files) x (j cycles)) x (k features) array composed of +# m BEEPFeaturizer objects. +# +# Args: +# beepfeaturizers ([BEEPFeaturizer]): A list of BEEPFeaturizer objects +# +# """ +# +# OP_DELIMITER = "::" +# +# def __init__(self, beepfeaturizers: List[BEEPFeaturizer]): +# +# if beepfeaturizers: +# # initialize emtpy dict of file names +# dfs_by_file = {os.path.basename( +# bf.paths.get("structured", "no file found") +# )[0:-19]: pd.DataFrame( +# columns=['filename', 'cycle_index', 'diag_pos'] +# ) for bf in beepfeaturizers} +# # big_df_rows = {bf.__class__.__name__: [] for bf in beepfeaturizers} +# unique_features = {} +# for i, bf in enumerate(beepfeaturizers): +# if bf.features is None: +# raise BEEPFeatureMatrixError( +# f"BEEPFeaturizer {bf} has not created features") +# +# # elif bf.features.shape[0] != 1: +# # raise BEEPFeatureMatrixError(f"BEEPFeaturizer {bf} features are not 1-dimensional.") +# else: +# bfcn = bf.__class__.__name__ +# +# # fname = bf.paths.get("structured", None) +# fname = os.path.basename(bf.paths['structured'])[0:-19] +# if not fname: +# raise BEEPFeatureMatrixError( +# "Cannot join features automatically as no linking can be done " +# "based on original structured filename." +# ) +# +# # Check for any possible feature collisions using identical featurizers +# # on identical files +# +# # sort params for this featurizer obj by key +# params = sorted(list(bf.hyperparameters.items()), +# key=lambda x: x[0]) +# +# # Prevent identical features from identical input files +# # create a unique operation string for the application of this featurizer +# # on a specific file, this op string will be the same as long as +# # the featurizer class name, hyperparameters, and class are the same +# +# param_str = "-".join([f"{k}:{v}" for k, v in params]) +# param_hash = hashlib.sha256( +# param_str.encode("utf-8")).hexdigest() +# +# # Get an id for this featurizer operation (including hyperparameters) +# # regardless of the file it is applied on +# feature_op_id = f"{bfcn}{self.OP_DELIMITER}{param_hash}" +# +# # Get an id for this featurizer operation (including hyperparameters) +# # on THIS SPECIFIC file. +# file_feature_op_id = f"{fname}{self.OP_DELIMITER}{bfcn}{self.OP_DELIMITER}{param_hash}" +# +# # Get a unique id for every feature generated by a specific +# # featurizer on a specific file. +# this_file_feature_columns_ids = \ +# [ +# f"{file_feature_op_id}{self.OP_DELIMITER}{c}" for c +# in bf.features.columns +# ] +# +# # Check to make sure there are no duplicates of the exact same feature for +# # the exact same featurizer with the exact same hyperparameters on the exact +# # same file. +# collisions = {c: f for c, f in unique_features.items() if +# c in this_file_feature_columns_ids} +# if collisions: +# raise BEEPFeatureMatrixError( +# f"Multiple features generated with identical classes and identical hyperparameters" +# f" attempted to be joined into same dataset; \n" +# f"{bfcn} features collide with existing: \n{collisions}" +# ) +# for c in this_file_feature_columns_ids: +# unique_features[c] = bfcn +# +# # Create consistent scheme for naming features regardless of file +# df = copy.deepcopy(bf.features) +# consistent_column_names = [ +# f"{c}{self.OP_DELIMITER}{feature_op_id}" for c in +# df.columns] +# df.columns = consistent_column_names +# +# # df.index = [fname] * df.shape[0] +# # df.index.rename("filename", inplace=True) +# +# # create filename column to merge on +# df['filename'] = os.path.basename(bf.paths['structured'])[ +# 0:-19] +# +# # df = df.reset_index(drop=True) +# +# # remove hash from cycle_index and diag_pos column +# cycle_index_col = [col for col in df.columns if +# 'cycle_index' in col] +# df.rename(columns={cycle_index_col[0]: 'cycle_index'}, +# inplace=True) +# +# # remove hash from diag_pos column +# diag_pos_col = [col for col in df.columns if +# 'diag_pos' in col] +# df.rename(columns={diag_pos_col[0]: 'diag_pos'}, +# inplace=True) +# +# # ensure cycle_index and diag_pos are integers +# df['cycle_index'] = df['cycle_index'].astype(int) +# df['diag_pos'] = df['diag_pos'].astype(int) +# +# # append each BEEPFeaturizer df to the corresponding cell dict entry +# # dfs_by_file[fname].append(df) +# dfs_by_file[fname] = dfs_by_file[fname].merge( +# df, how='outer', +# on=['filename', 'cycle_index', 'diag_pos']).sort_values( +# 'cycle_index').reset_index(drop=True) +# # dfs_by_file[fname] = pd.concat( +# # [dfs_by_file[fname],df], +# # axis=1,join='outer',ignore_index=True, +# # keys=['filename']) +# # self.dfs_by_file = dfs_by_file +# # self.df = df +# # return None +# +# rows = [] +# self.matrix = pd.DataFrame() +# for filename, dfs in dfs_by_file.items(): +# # row = pd.concat([row,dfs], axis=1) +# # row = row[sorted(row.columns)] +# # rows.append(row) +# self.matrix = pd.concat([self.matrix, dfs], axis=0, +# ignore_index=True, +# join='outer') # , keys=['filename'] +# +# else: +# self.matrix = None +# +# self.featurizers = beepfeaturizers +# +# def as_dict(self): +# """Serialize a BEEPDatapath as a dictionary. +# +# Must not be loaded from legacy. +# +# Returns: +# (dict): corresponding to dictionary for serialization. +# +# """ +# +# return { +# "@module": self.__class__.__module__, +# "@class": self.__class__.__name__, +# +# # Core parts of BEEPFeaturizer +# "featurizers": [f.as_dict() for f in self.featurizers], +# "matrix": self.matrix.to_dict("list"), +# } +# +# @classmethod +# def from_dict(cls, d): +# """Create a BEEPDatapath object from a dictionary. +# +# Args: +# d (dict): dictionary represenation. +# +# Returns: +# beep.structure.ProcessedCyclerRun: deserialized ProcessedCyclerRun. +# """ +# # no need for original datapaths, as their ref paths should +# # be in the subobjects +# featurizers = [MontyDecoder().process_decoded(f) for f in +# d["featurizers"]] +# return cls(featurizers) +# +# @classmethod +# def from_json_file(cls, filename): +# """Load a structured run previously saved to file. +# +# .json.gz files are supported. +# +# Loads a BEEPFeatureMatrix from json. +# +# Can be used in combination with files serialized with BEEPFeatures.to_json_file. +# +# Args: +# filename (str, Pathlike): a json file from a structured run, serialzed with to_json_file. +# +# Returns: +# None +# """ +# return loadfn(filename) +# +# def to_json_file(self, filename): +# """Save a BEEPFeatureMatrix to disk as a json. +# +# .json.gz files are supported. +# +# Not named from_json to avoid conflict with MSONable.from_json(*) +# +# Args: +# filename (str, Pathlike): The filename to save the file to. +# omit_raw (bool): If True, saves only structured (NOT RAW) data. +# More efficient for saving/writing to disk. +# +# Returns: +# None +# """ +# d = self.as_dict() +# dumpfn(d, filename) \ No newline at end of file diff --git a/beep/features/per_cycle/__init__.py b/beep/features/per_cycle/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/beep/features/per_cycle/diagnostic.py b/beep/features/per_cycle/diagnostic.py new file mode 100644 index 00000000..f4a1b0ef --- /dev/null +++ b/beep/features/per_cycle/diagnostic.py @@ -0,0 +1,177 @@ + +import pandas as pd + +from beep import PROTOCOL_PARAMETERS_DIR +from beep.features import helper_functions +from functools import reduce + +from beep.features.featurizer import BEEPPerCycleFeaturizer + + +class DiagnosticFeaturesPerCycle(BEEPPerCycleFeaturizer): + """ + This class stores fractional levels of degradation in discharge capacity and discharge energy + relative to the first cycle at each diagnostic cycle, grouped by diagnostic cycle type. + + name (str): predictor object name. + X (pandas.DataFrame): features in DataFrame format. + metadata (dict): information about the conditions, data + and code used to produce features + + Hyperparameters: + parameters_dir (str): Full path to directory of parameters to analyse the + diagnostic cycles + """ + DEFAULT_HYPERPARAMETERS = { + "parameters_dir": PROTOCOL_PARAMETERS_DIR, + "nominal_capacity": 4.84, + + } + + def validate(self): + """ + This function determines if the input data has the necessary attributes for + creation of this feature class. It should test for all of the possible reasons + that feature generation would fail for this particular input data. + + Args: + processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + Returns: + bool: True/False indication of ability to proceed with feature generation + """ + return helper_functions.check_diagnostic_validation(self.datapath) + + def create_features(self): + """ + Generates diagnostic-property features from processed cycler run, including values for n*x method + Args: + self.datapath (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + parameters_path (str): Root directory storing project parameter files. + + Returns: + pd.DataFrame: cycle_index, RPT discharge capacities and energies, aging cycle discharge capacity and energy, + equivalent full cycles of aging cycle discharge, cumulative discharge throughput. + for each diagnostic cycle of the cell + """ + + parameters_path = self.hyperparameters["parameters_dir"] + + # RPT discharge capacities + data_rpt_02C = self.datapath.diagnostic_data.loc[ + self.datapath.diagnostic_data.cycle_type == 'rpt_0.2C'] + Q_rpt_02C = data_rpt_02C.groupby('cycle_index')[ + ['discharge_capacity', 'discharge_energy']].max().reset_index( + drop=False) + Q_rpt_02C.rename( + columns={'discharge_capacity': 'rpt_0.2C_discharge_capacity', + 'discharge_energy': 'rpt_0.2C_discharge_energy'}, + inplace=True) + Q_rpt_02C = Q_rpt_02C.reset_index(drop=False).rename( + columns={'index': 'diag_pos'}) + + rpt_02C_cycles = data_rpt_02C.cycle_index.unique() # for referencing last regular cycle before diagnostic + + data_rpt_1C = self.datapath.diagnostic_data.loc[ + self.datapath.diagnostic_data.cycle_type == 'rpt_1C'] + Q_rpt_1C = data_rpt_1C.groupby('cycle_index')[ + ['discharge_capacity', 'discharge_energy']].max().reset_index( + drop=False) + Q_rpt_1C.rename( + columns={'discharge_capacity': 'rpt_1C_discharge_capacity', + 'discharge_energy': 'rpt_1C_discharge_energy'}, + inplace=True) + Q_rpt_1C = Q_rpt_1C.reset_index(drop=False).rename( + columns={'index': 'diag_pos'}) + + data_rpt_2C = self.datapath.diagnostic_data.loc[ + self.datapath.diagnostic_data.cycle_type == 'rpt_2C'] + Q_rpt_2C = data_rpt_2C.groupby('cycle_index')[ + ['discharge_capacity', 'discharge_energy']].max().reset_index( + drop=False) + Q_rpt_2C.rename( + columns={'discharge_capacity': 'rpt_2C_discharge_capacity', + 'discharge_energy': 'rpt_2C_discharge_energy'}, + inplace=True) + Q_rpt_2C = Q_rpt_2C.reset_index(drop=False).rename( + columns={'index': 'diag_pos'}) + + # cumuative discharge throughput + aging_df = self.datapath.structured_summary[ + ['cycle_index', 'charge_throughput', 'energy_throughput', + 'energy_efficiency', 'charge_duration', 'CV_time', 'CV_current', + 'energy_efficiency']] + aging_df = aging_df.loc[aging_df.cycle_index.isin(rpt_02C_cycles - 3)] + + cumulative_discharge_throughput = aging_df[ + ['cycle_index', 'charge_throughput']].rename( + columns={'charge_throughput': 'discharge_throughput'}).reset_index( + drop=True) + cumulative_discharge_throughput = cumulative_discharge_throughput.reset_index( + drop=False).rename(columns={'index': 'diag_pos'}) + + cumulative_energy_throughput = aging_df[ + ['cycle_index', 'energy_throughput']].reset_index(drop=True) + cumulative_energy_throughput = cumulative_energy_throughput.reset_index( + drop=False).rename(columns={'index': 'diag_pos'}) + + equivalent_full_cycles = cumulative_discharge_throughput.copy() + equivalent_full_cycles.rename( + columns={'discharge_throughput': 'equivalent_full_cycles'}, + inplace=True) + equivalent_full_cycles['equivalent_full_cycles'] = \ + equivalent_full_cycles['equivalent_full_cycles'] / self.hyperparameters[ + 'nominal_capacity'] + + # Q_aging_pre_diag - discharge capacity of aging cycle before diagnostic + Q_aging_pre_diag = self.datapath.structured_data.groupby('cycle_index')[ + 'discharge_capacity'].max().loc[rpt_02C_cycles[1:] - 3].reset_index( + drop=False) # ignore first diagnostic, adjust cycle index to Q_aging_pre_diag + Q_aging_pre_diag.rename( + columns={'discharge_capacity': 'Q_aging_pre_diag'}, inplace=True) + Q_aging_pre_diag = Q_aging_pre_diag.reset_index( + drop=False).rename(columns={'index': 'diag_pos'}) + Q_aging_pre_diag['diag_pos'] = Q_aging_pre_diag[ + 'diag_pos'] + 1 # since, first diag is ignored, add one to diag_pos + + # Q_aging_post_diag - discharge capacity of aging cycle after diagnostic + Q_aging_post_diag = \ + self.datapath.structured_data.groupby('cycle_index')[ + 'discharge_capacity'].max().loc[rpt_02C_cycles + 3].reset_index( + drop=False) # does not ignore first diag since Q_aging exists after first diag + Q_aging_post_diag.rename( + columns={'discharge_capacity': 'Q_aging_post_diag'}, inplace=True) + Q_aging_post_diag = Q_aging_post_diag.reset_index( + drop=False).rename(columns={'index': 'diag_pos'}) + + # Diagnostic time + diagnostic_time = data_rpt_02C.groupby('cycle_index')[ + 'test_time'].min().reset_index(drop=False).rename( + columns={'test_time': 'diagnostic_time'}) + diagnostic_time = diagnostic_time.reset_index( + drop=False).rename(columns={'index': 'diag_pos'}) + + # Combine dataframes + df_list = [Q_rpt_02C, Q_rpt_1C, Q_rpt_2C, + cumulative_discharge_throughput, + cumulative_energy_throughput, + equivalent_full_cycles, + Q_aging_pre_diag, + Q_aging_post_diag, + diagnostic_time] + + for df in df_list: + df['cycle_index'] = df['cycle_index'].copy().astype(int) + df['diag_pos'] = df['diag_pos'].copy().astype(int) + + cycle_features = reduce( + lambda x, y: pd.merge(x, y, on=['cycle_index', 'diag_pos'], + how='outer'), df_list) + self.features = cycle_features.sort_values('cycle_index').reset_index( + drop=True) + + + diff --git a/beep/features/per_cycle/hppc.py b/beep/features/per_cycle/hppc.py new file mode 100644 index 00000000..d95f4c3c --- /dev/null +++ b/beep/features/per_cycle/hppc.py @@ -0,0 +1,80 @@ +import pandas as pd + +from beep import PROTOCOL_PARAMETERS_DIR +from beep.features import helper_functions + +from beep.features.featurizer import BEEPPerCycleFeaturizer + + + +class HPPCResistanceVoltagePerCycle(BEEPPerCycleFeaturizer): + DEFAULT_HYPERPARAMETERS = { + "test_time_filter_sec": 1000000, + "cycle_index_filter": 6, + "soc_window": 8, + "parameters_path": PROTOCOL_PARAMETERS_DIR + } + + def validate(self): + val, msg = helper_functions.check_diagnostic_validation(self.datapath) + if val: + conditions = [] + conditions.append( + any( + [ + "hppc" in x + for x in + self.datapath.diagnostic_summary.cycle_type.unique() + ] + ) + ) + if all(conditions): + return True, None + else: + return False, "HPPC conditions not met for this cycler run" + else: + return val, msg + + def create_features(self): + # Filter out low cycle numbers at the end of the test, corresponding to the "final" diagnostic + self.datapath.diagnostic_data = self.datapath.diagnostic_data[ + ~((self.datapath.diagnostic_data.test_time > self.hyperparameters[ + 'test_time_filter_sec']) & + (self.datapath.diagnostic_data.cycle_index < self.hyperparameters[ + 'cycle_index_filter'])) + ] + self.datapath.diagnostic_data = self.datapath.diagnostic_data.groupby( + ["cycle_index", "step_index", "step_index_counter"] + ).filter(lambda x: ~x["test_time"].isnull().all()) + + # Only hppc_resistance_features are able to be calculated without error. + # Xiao Cui should be pulled in to understand the issue with the others features. + + # diffusion features + # diffusion_features = featurizer_helpers.get_diffusion_cycle_features( + # self.datapath, + # ) + + # hppc resistance features + hppc_resistance_features = helper_functions.get_hppc_resistance_cycle_features( + self.datapath, + ) + + # the variance of ocv features + # hppc_ocv_features = featurizer_helpers.get_hppc_ocv_cycle_features( + # self.datapath, + # ) + + # the v_diff features + # v_diff = featurizer_helpers.get_v_diff_cycle_features( + # self.datapath, + # self.hyperparameters["soc_window"], + # self.hyperparameters["parameters_path"] + # ) + + # merge everything together as a final result dataframe + self.features = pd.concat( + [hppc_resistance_features, + # hppc_ocv_features, + # v_diff, #diffusion_features + ], axis=1) diff --git a/beep/features/per_cycle/protocol.py b/beep/features/per_cycle/protocol.py new file mode 100644 index 00000000..63c78926 --- /dev/null +++ b/beep/features/per_cycle/protocol.py @@ -0,0 +1,64 @@ + +from beep import PROTOCOL_PARAMETERS_DIR +from beep.features import helper_functions +from beep.utils.parameters_lookup import get_protocol_parameters + +from beep.features.featurizer import BEEPPerCycleFeaturizer + + +class CyclingProtocolPerCycle(BEEPPerCycleFeaturizer): + """ + This class stores information about the charging protocol used + name (str): predictor object name. + X (pandas.DataFrame): features in DataFrame format. + metadata (dict): information about the conditions, data + and code used to produce features + Hyperparameters: + parameters_dir (str): Full path to directory of charging protocol parameters + quantities ([str]): list of parameters to return + """ + DEFAULT_HYPERPARAMETERS = { + "parameters_dir": PROTOCOL_PARAMETERS_DIR, + "quantities": ["charge_constant_current_1", "charge_constant_current_2", + "charge_cutoff_voltage", "charge_constant_voltage_time", + "discharge_constant_current", + "discharge_cutoff_voltage"], + } + + def validate(self): + """ + This function determines if the input data has the necessary attributes for + creation of this feature class. It should test for all of the possible reasons + that feature generation would fail for this particular input data. + Args: + processed_cycler_run (beep.structure.ProcessedCyclerRun): data from cycler run + params_dict (dict): dictionary of parameters governing how the ProcessedCyclerRun object + gets featurized. These could be filters for column or row operations + Returns: + bool: True/False indication of ability to proceed with feature generation + """ + if not ( + 'raw' in self.datapath.paths.keys() or 'structured' in self.datapath.paths.keys()): + message = "datapath paths not set, unable to fetch charging protocol" + return False, message + else: + return helper_functions.check_diagnostic_validation(self.datapath) + + def create_features(self): + """ + Fetches charging protocol features + """ + + parameters_path = self.hyperparameters["parameters_dir"] + file_path = self.datapath.paths[ + 'raw'] if 'raw' in self.datapath.paths.keys() else \ + self.datapath.paths['structured'] + + parameters, _ = get_protocol_parameters(file_path, parameters_path) + + parameters = parameters[self.hyperparameters["quantities"]] + parameters['cycle_index'] = int( + 0) # create a cycle index column for merging with other featurizers + parameters['diag_pos'] = int( + 0) # create a diag_pos column for merging with other featurizers + self.features = parameters \ No newline at end of file diff --git a/beep/features/tests/test_features.py b/beep/features/tests/test_features.py index 59171edb..9423e331 100644 --- a/beep/features/tests/test_features.py +++ b/beep/features/tests/test_features.py @@ -29,7 +29,7 @@ from beep.structure.maccor import MaccorDatapath from beep.structure.cli import auto_load_processed, auto_load -from beep.features import featurizer_helpers +from beep.features import helper_functions from beep.utils import parameters_lookup from monty.serialization import dumpfn, loadfn from monty.tempfile import ScratchDir @@ -270,9 +270,9 @@ def test_get_fractional_quantity_remaining_nx(self): structured_datapath.structured_summary = structured_datapath.structured_summary[ ~structured_datapath.structured_summary.cycle_index.isin(structured_datapath.diagnostic_summary.cycle_index)] - sum_diag = featurizer_helpers.get_fractional_quantity_remaining_nx(structured_datapath, - metric="discharge_energy", - diagnostic_cycle_type="hppc") + sum_diag = helper_functions.get_fractional_quantity_remaining_nx(structured_datapath, + metric="discharge_energy", + diagnostic_cycle_type="hppc") # print(sum_diag["normalized_regular_throughput"]) self.assertEqual(len(sum_diag.index), 16) self.assertEqual(sum_diag.cycle_index.max(), 1507) @@ -284,9 +284,9 @@ def test_get_fractional_quantity_remaining_nx(self): self.assertEqual(sum_diag['diagnostic_interval'].iloc[0], 100) self.assertEqual(sum_diag['epoch_time'].iloc[0], 1576641695) - sum_diag = featurizer_helpers.get_fractional_quantity_remaining_nx(structured_datapath, - metric="discharge_energy", - diagnostic_cycle_type="rpt_1C") + sum_diag = helper_functions.get_fractional_quantity_remaining_nx(structured_datapath, + metric="discharge_energy", + diagnostic_cycle_type="rpt_1C") self.assertEqual(len(sum_diag.index), 16) self.assertEqual(sum_diag.cycle_index.max(), 1509) self.assertEqual(np.around(sum_diag["initial_regular_throughput"].iloc[0], 3), np.around(237.001769, 3)) @@ -301,9 +301,9 @@ def test_get_fractional_quantity_remaining_nx(self): ) structured_datapath = auto_load_processed(processed_cycler_run_path_2) - sum_diag = featurizer_helpers.get_fractional_quantity_remaining_nx(structured_datapath, - metric="discharge_energy", - diagnostic_cycle_type="hppc") + sum_diag = helper_functions.get_fractional_quantity_remaining_nx(structured_datapath, + metric="discharge_energy", + diagnostic_cycle_type="hppc") self.assertEqual(len(sum_diag.index), 3) self.assertEqual(sum_diag.cycle_index.max(), 242) self.assertEqual(np.around(sum_diag["initial_regular_throughput"].iloc[0], 3), np.around(331.428, 3)) @@ -329,7 +329,7 @@ def test_get_v_diff(self): with ScratchDir("."): # processed_cycler_run_path_1 structured_datapath = auto_load_processed(processed_cycler_run_path_1) - v_vars_df = featurizer_helpers.get_v_diff(structured_datapath, 1, 8) + v_vars_df = helper_functions.get_v_diff(structured_datapath, 1, 8) # print(v_vars_df) self.assertEqual(np.round(v_vars_df.iloc[0]['var_v_diff'], decimals=8), np.round(0.00472705, decimals=8)) @@ -344,7 +344,7 @@ def test_get_v_diff(self): # processed_cycler_run_path_2 structured_datapath = auto_load_processed(processed_cycler_run_path_2) - v_vars_df = featurizer_helpers.get_v_diff(structured_datapath, 1, 8) + v_vars_df = helper_functions.get_v_diff(structured_datapath, 1, 8) # print(v_vars_df) self.assertEqual(np.round(v_vars_df.iloc[0]['var_v_diff'], decimals=8), np.round(2.664e-05, decimals=8)) @@ -359,7 +359,7 @@ def test_get_v_diff(self): # processed_cycler_run_path_3 structured_datapath = auto_load_processed(processed_cycler_run_path_3) - v_vars_df = featurizer_helpers.get_v_diff(structured_datapath, 1, 8) + v_vars_df = helper_functions.get_v_diff(structured_datapath, 1, 8) # print(v_vars_df) self.assertEqual(np.round(v_vars_df.iloc[0]['var_v_diff'], decimals=8), np.round(4.82e-06, decimals=8)) @@ -374,7 +374,7 @@ def test_get_v_diff(self): # processed_cycler_run_path_4 structured_datapath = auto_load_processed(processed_cycler_run_path_4) - v_vars_df = featurizer_helpers.get_v_diff(structured_datapath, 1, 8) + v_vars_df = helper_functions.get_v_diff(structured_datapath, 1, 8) # print(v_vars_df) self.assertEqual(np.round(v_vars_df.iloc[0]['var_v_diff'], decimals=8), np.round(9.71e-06, decimals=8)) @@ -392,7 +392,7 @@ def test_get_hppc_ocv(self): TEST_FILE_DIR, "PreDiag_000240_000227_truncated_structure.json" ) structured_datapath = auto_load_processed(structured_datapath_loc) - hppc_ocv_features = featurizer_helpers.get_hppc_ocv(structured_datapath, 1) + hppc_ocv_features = helper_functions.get_hppc_ocv(structured_datapath, 1) self.assertAlmostEqual(hppc_ocv_features['var_ocv'].iloc[0], 0.000016, 6) self.assertAlmostEqual(hppc_ocv_features['min_ocv'].iloc[0], -0.001291, 6) self.assertAlmostEqual(hppc_ocv_features['mean_ocv'].iloc[0], 0.002221, 6) @@ -425,9 +425,9 @@ def test_get_step_index(self): parameter_row["capacity_nominal"].iloc[0], 2) # print(step, median_crate, duration) - step_ind = featurizer_helpers.get_step_index(structured_datapath, - cycle_type="hppc", - diag_pos=0) + step_ind = helper_functions.get_step_index(structured_datapath, + cycle_type="hppc", + diag_pos=0) self.assertEqual(len(step_ind.values()), 6) print([step_ind["hppc_long_rest"], step_ind["hppc_discharge_pulse"], @@ -443,9 +443,9 @@ def test_get_step_index(self): 'hppc_charge_pulse': 14, 'hppc_discharge_to_next_soc': 15 }) - step_ind = featurizer_helpers.get_step_index(structured_datapath, - cycle_type="hppc", - diag_pos=1) + step_ind = helper_functions.get_step_index(structured_datapath, + cycle_type="hppc", + diag_pos=1) self.assertEqual(len(step_ind.values()), 6) self.assertEqual(step_ind, { 'hppc_charge_to_soc': 41, @@ -465,9 +465,9 @@ def test_get_step_index_2(self): _, protocol_name = os.path.split(structured_datapath.metadata.protocol) parameter_row, _ = parameters_lookup.get_protocol_parameters(protocol_name, parameters_path=parameters_path) - step_ind = featurizer_helpers.get_step_index(structured_datapath, - cycle_type="hppc", - diag_pos=0) + step_ind = helper_functions.get_step_index(structured_datapath, + cycle_type="hppc", + diag_pos=0) self.assertEqual(len(step_ind.values()), 7) self.assertEqual(step_ind, { @@ -479,9 +479,9 @@ def test_get_step_index_2(self): 'hppc_discharge_to_next_soc': 15, 'hppc_final_discharge': 17 }) - step_ind = featurizer_helpers.get_step_index(structured_datapath, - cycle_type="hppc", - diag_pos=1) + step_ind = helper_functions.get_step_index(structured_datapath, + cycle_type="hppc", + diag_pos=1) self.assertEqual(len(step_ind.values()), 7) self.assertEqual(step_ind, { 'hppc_charge_to_soc': 41, @@ -492,24 +492,24 @@ def test_get_step_index_2(self): 'hppc_discharge_to_next_soc': 47, 'hppc_final_discharge': 49 }) - step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="reset", diag_pos=0) + step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="reset", diag_pos=0) self.assertEqual(step_ind, {'reset_charge': 5, 'reset_discharge': 6}) - step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="reset", diag_pos=1) + step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="reset", diag_pos=1) self.assertEqual(step_ind, {'reset_charge': 38, 'reset_discharge': 39}) - step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_0.2C", diag_pos=0) + step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_0.2C", diag_pos=0) self.assertEqual(step_ind, {'rpt_0.2C_charge': 19, 'rpt_0.2C_discharge': 20}) - step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_0.2C", diag_pos=1) + step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_0.2C", diag_pos=1) self.assertEqual(step_ind, {'rpt_0.2C_charge': 51, 'rpt_0.2C_discharge': 52}) - step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_1C", diag_pos=0) + step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_1C", diag_pos=0) self.assertEqual(step_ind, {'rpt_1C_charge': 22, 'rpt_1C_discharge': 23}) - step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_1C", diag_pos=1) + step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_1C", diag_pos=1) self.assertEqual(step_ind, {'rpt_1C_charge': 54, 'rpt_1C_discharge': 55}) - step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_2C", diag_pos=0) + step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_2C", diag_pos=0) self.assertEqual(step_ind, {'rpt_2C_charge': 25, 'rpt_2C_discharge': 26}) - step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="rpt_2C", diag_pos=1) + step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="rpt_2C", diag_pos=1) self.assertEqual(step_ind, {'rpt_2C_charge': 57, 'rpt_2C_discharge': 58}) def test_get_step_index_3(self): @@ -517,7 +517,7 @@ def test_get_step_index_3(self): TEST_FILE_DIR, "PredictionDiagnostics_000136_00002D_truncated_structure.json" ) structured_datapath = auto_load_processed(structured_datapath_loc) - step_ind = featurizer_helpers.get_step_index(structured_datapath, cycle_type="hppc", diag_pos=0) + step_ind = helper_functions.get_step_index(structured_datapath, cycle_type="hppc", diag_pos=0) self.assertEqual(len(step_ind.values()), 6) def test_get_diffusion_coeff(self): @@ -526,7 +526,7 @@ def test_get_diffusion_coeff(self): TEST_FILE_DIR, "PreDiag_000240_000227_truncated_structure.json" ) structured_datapath = auto_load_processed(structured_datapath_loc) - diffusion_df = featurizer_helpers.get_diffusion_coeff(structured_datapath, 1) + diffusion_df = helper_functions.get_diffusion_coeff(structured_datapath, 1) print(np.round(diffusion_df.iloc[0].to_list(), 3)) self.assertEqual(np.round(diffusion_df.iloc[0].to_list(), 3)[0], -0.016) self.assertEqual(np.round(diffusion_df.iloc[0].to_list(), 3)[5], -0.011)