From b3435aa7347ffeb9959d5f59c3973bcdae7b108e Mon Sep 17 00:00:00 2001
From: Matteo Tiberti <matteo.tiberti@gmail.com>
Date: Mon, 13 Jan 2025 11:56:36 +0100
Subject: [PATCH 1/4] [WIP] partial implementation

---
 mavisp/methods.py | 122 +++++++++++++++++++++++++++-------------------
 mavisp/modules.py |  16 ++----
 2 files changed, 77 insertions(+), 61 deletions(-)

diff --git a/mavisp/methods.py b/mavisp/methods.py
index d5c7a9b..33cd0bb 100644
--- a/mavisp/methods.py
+++ b/mavisp/methods.py
@@ -99,6 +99,8 @@ class MutateXBinding(Method):
     target_chain       = 'A'
     measure = "Binding with"
     complex_status = "heterodimer"
+    averages_filename = 'energies.csv'
+    stds_filename = 'energies_std.csv'
 
     def __init__(self, version, complex_status=None):
 
@@ -109,6 +111,57 @@ def __init__(self, version, complex_status=None):
 
         self.interactors = []
 
+    def _parse_mutatex_energy_file(self, fname, data_type, interactor):
+
+        try:
+            df = pd.read_csv(fname)
+        except Exception as e:
+            this_error = f"Exception {type(e).__name__} occurred when parsing the MutateX csv file. Arguments:{e.args}"
+            raise MAVISpMultipleError(warning=warnings,
+                                      critical=[MAVISpCriticalError(this_error)])
+
+        # create residue column
+        df['residue'] = df['WT residue type'] + df['Residue #'].astype(str)
+
+        # detect and handle homodimer case
+        chains = set(df['chain ID'].unique())
+
+        if self.target_chain in chains:
+            df = df[ df['chain ID'] == self.target_chain ]
+
+        elif set(df['chain ID'].unique()) != self.homodimer_chains:
+            message = "chain ID in FoldX energy file must be either A or B (heterodimer case) or AB (homodimer case)"
+            raise MAVISpMultipleError(critical=[MAVISpCriticalError(message)],
+                                        warning=[])
+
+        df = df.drop(['WT residue type', 'Residue #', 'chain ID'], axis=1)
+
+        # stack remaining columns
+        df = df.set_index('residue')
+        df = df.stack()
+        df = df.reset_index()
+
+        # create mutation column
+        df['mutations'] = df['residue'] + df['level_1']
+        df = df.set_index('mutations')
+
+        # drop now useless columns, rename
+        df = df.drop(['residue', 'level_1'], axis=1)
+
+        # handle space around measure
+        if self.measure == "":
+            measure = ""
+        else:
+            measure = f"{self.measure} "
+
+        # handle data type
+        if data_type is None or data_type == '':
+            colname =  f"{self.type} ({measure}{interactor}, {self.complex_status}, {self.version}, {self.unit})"
+        else:
+            colname =  f"{self.type} ({measure}{interactor}, {self.complex_status}, {self.version}, {self.unit}, {data_type})"
+
+        return df.rename(columns={0 : colname})
+
     def parse(self, dir_path):
 
         warnings = []
@@ -128,63 +181,30 @@ def parse(self, dir_path):
 
             mutatex_files = os.listdir(interactor_dir)
 
-            if len(mutatex_files) != 1:
-                raise MAVISpMultipleError(critical=[MAVISpCriticalError(f"zero or multiple files found in {interactor_dir}; exactly one expected")],
-                                        warning=warnings)
-
-            mutatex_file = mutatex_files[0]
-
-            try:
-                df = pd.read_csv(os.path.join(interactor_dir, mutatex_file))
-            except Exception as e:
-                this_error = f"Exception {type(e).__name__} occurred when parsing the MutateX csv file. Arguments:{e.args}"
+            if self.averages_filename not in mutatex_files:
+                this_error = f"energies.csv file not found in {interactor_dir}"
                 raise MAVISpMultipleError(warning=warnings,
-                                        critical=[MAVISpCriticalError(this_error)])
-
-            # create residue column
-            df['residue'] = df['WT residue type'] + df['Residue #'].astype(str)
-
-            # detect and handle homodimer case
-            chains = set(df['chain ID'].unique())
-
-            if self.target_chain in chains:
-                df = df[ df['chain ID'] == self.target_chain ]
-
-            elif set(df['chain ID'].unique()) != self.homodimer_chains:
-                message = "chain ID in FoldX energy file must be either A or B (heterodimer case) or AB (homodimer case)"
-                raise MAVISpMultipleError(critical=[MAVISpCriticalError(message)],
-                                          warning=[])
-
-            df = df.drop(['WT residue type', 'Residue #', 'chain ID'], axis=1)
-
-            # stack remaining columns
-            df = df.set_index('residue')
-            df = df.stack()
-            df = df.reset_index()
-
-            # create mutation column
-            df['mutations'] = df['residue'] + df['level_1']
-            df = df.set_index('mutations')
+                                            critical=[MAVISpCriticalError(this_error)])
 
-            # drop now useless columns, rename
-            df = df.drop(['residue', 'level_1'], axis=1)
+            averages_df = self._parse_mutatex_energy_file(os.path.join(interactor_dir, self.averages_filename), '', interactor)
 
-            # handle space around measure
-            if self.measure == "":
-                measure = ""
+            if self.stds_filename in mutatex_files:
+                stds_df = self._parse_mutatex_energy_file(os.path.join(interactor_dir, self.stds_filename), 'st. dev.', interactor)
             else:
-                measure = f"{self.measure} "
-
-            df = df.rename(columns={0 : f"{self.type} ({measure}{interactor}, {self.complex_status}, {self.version}, {self.unit})"})
+                warnings.append(MAVISpWarningError("standard deviation file not found for MutateX data"))
+                stds_df = None
 
             if all_data is None:
-                all_data = df
+                all_data = averages_df
             else:
-                all_data = all_data.join(df, how='outer')
+                all_data = all_data.join(averages_df, how='outer')
+
+            if stds_df is not None:
+                all_data = all_dat.join(stds_df, how='outer')
 
         return all_data, warnings
 
-class MutateXDNABinding(Method):
+class MutateXDNABinding(MutateXBinding):
 
     unit = "kcal/mol"
     type = "Local Int. With DNA"
@@ -194,8 +214,6 @@ class MutateXDNABinding(Method):
     measure = ""
     complex_status = "heterodimer"
 
-    parse = MutateXBinding.parse
-
 class RosettaDDGPredictionStability(Method):
 
     unit = "kcal/mol"
@@ -349,6 +367,7 @@ def parse(self, dir_path):
 
                 rosetta_file = rosetta_files[0]
                 mutation_data = self._parse_aggregate_csv(os.path.join(interactor_dir, rosetta_file), warnings)
+                mutation_data_std = None
 
             elif len(rosetta_files) > 1 and all( [ os.path.isdir(os.path.join(interactor_dir, f)) for f in rosetta_files] ):
                 mutation_data = None
@@ -371,6 +390,7 @@ def parse(self, dir_path):
                         mutation_data = mutation_data.join(conformer_data)
 
                 mutation_data = pd.DataFrame(mutation_data.mean(axis=1), columns=['total_score'])
+                mutation_data_std = pd.DataFrame(mutation_data.std(axis=1), columns=['total_score'])
 
             else:
                 text = f"dataset {interactor_dir} was not either a single files, or multiple directories containing one file"
@@ -384,6 +404,10 @@ def parse(self, dir_path):
             else:
                 all_data = all_data.join(mutation_data, how='outer')
 
+            if mutation_data_std is not None:
+                mutation_data_std = mutation_data_std.rename(columns={'total_score':f'{self.type} (Binding with {interactor}, {self.complex_status}, {self.version}, {self.unit}, st. dev.)'})
+                all_data = all_data.join(mutation_data_std, how='outer')
+
         return all_data, warnings
 
 class AlloSigma(Method):
diff --git a/mavisp/modules.py b/mavisp/modules.py
index afabd52..95545c0 100644
--- a/mavisp/modules.py
+++ b/mavisp/modules.py
@@ -405,8 +405,7 @@ def ingest(self, mutations):
         except MAVISpMultipleError as e:
             if len(e.critical) > 0:
                 raise
-        else:
-            e = None
+            warnings += e.warning
 
         module_dir_files = os.listdir(os.path.join(self.data_dir, self.module_dir))
 
@@ -425,12 +424,9 @@ def ingest(self, mutations):
 
         self.data = self.data.drop(columns=['res_num', 'sas_sc_rel'])
 
-        if e is None and len(warnings) > 0:
+        if len(warnings) > 0:
             raise MAVISpMultipleError(warning=warnings,
                                       critical=[])
-        elif len(warnings) > 0:
-            e.warning.extend(warnings)
-            raise e
 
     def _generate_local_interactions_classification(self, row, ci, stab_co=1.0):
 
@@ -507,8 +503,7 @@ def ingest(self, mutations):
         except MAVISpMultipleError as e:
             if len(e.critical) > 0:
                 raise
-        else:
-            e = None
+            warnings += e.warning
 
         rsa = self._parse_sas(os.path.join(self.data_dir, self.module_dir, self.sas_filename), warnings)
 
@@ -525,12 +520,9 @@ def ingest(self, mutations):
 
         self.data = self.data.drop(columns=['res_num', 'sas_sc_rel'])
 
-        if e is None and len(warnings) > 0:
+        if len(warnings) > 0:
             raise MAVISpMultipleError(warning=warnings,
                                       critical=[])
-        elif len(warnings) > 0:
-            e.warning.extend(warnings)
-            raise e
 
     def _generate_local_interactions_DNA_classification(self, row, ci, stab_co=1.0):
 

From e532801790ffd0c5f99cc1b83574720676f8ad90 Mon Sep 17 00:00:00 2001
From: Konstantina Gkopi <s243692@dtu.dk>
Date: Sun, 14 Dec 2025 14:14:36 +0100
Subject: [PATCH 2/4] Added st.dev column for MutateXBinding and
 RosettaDDGPredictionBinding

---
 mavisp/methods.py | 191 ++++++++++++++++++++++++++++++----------------
 1 file changed, 126 insertions(+), 65 deletions(-)

diff --git a/mavisp/methods.py b/mavisp/methods.py
index b2c6fa5..66c549d 100644
--- a/mavisp/methods.py
+++ b/mavisp/methods.py
@@ -91,17 +91,18 @@ def parse(self, dir_path):
 
         return averages_df, stds_df, warnings
 
-class MutateXBinding(Method):
-
+class MutateXBinding(Method): 
+    #a parsing class; produces binding ΔΔG data
+    
     unit = "kcal/mol"
     type = "Local Int."
-    heterodimer_chains = set(['A'])
-    homodimer_chains   = set(['AB'])
+    heterodimer_chains = set(['A']) # If chain A exists → keep A only.
+    homodimer_chains   = set(['AB']) # If chain A does NOT exist → the ONLY valid alternative is homodimer AB.
     target_chain       = 'A'
     measure = "Binding with"
-    complex_status = "heterodimer"
     averages_filename = 'energies.csv'
     stds_filename = 'energies_std.csv'
+    complex_status = "heterodimer"
 
     def __init__(self, version, complex_status=None):
 
@@ -112,7 +113,9 @@ def __init__(self, version, complex_status=None):
 
         self.interactors = []
 
-    def _parse_mutatex_energy_file(self, fname, data_type, interactor):
+    # data_type is either '' or 'st. dev.'
+    def _parse_mutatex_binding_file(self, fname, interactor, data_type):
+        """Parse a single MutateX binding file (average or std)."""
 
         try:
             df = pd.read_csv(fname)
@@ -120,90 +123,98 @@ def _parse_mutatex_energy_file(self, fname, data_type, interactor):
             this_error = f"Exception {type(e).__name__} occurred when parsing the MutateX csv file. Arguments:{e.args}"
             raise MAVISpMultipleError(warning=warnings,
                                       critical=[MAVISpCriticalError(this_error)])
-
-        # create residue column
+            
+        # Create residue column
         df['residue'] = df['WT residue type'] + df['Residue #'].astype(str)
 
-        # detect and handle homodimer case
+        # Detect and handle homodimer case
         chains = set(df['chain ID'].unique())
-
+        
+        # If chain A exists, KEEP ONLY rows where chain ID == 'A'.  
         if self.target_chain in chains:
             df = df[ df['chain ID'] == self.target_chain ]
-
+        # If chain A does NOT exist, the ONLY valid alternative is homodimer AB.
         elif set(df['chain ID'].unique()) != self.homodimer_chains:
             message = "chain ID in FoldX energy file must be either A or B (heterodimer case) or AB (homodimer case)"
             raise MAVISpMultipleError(critical=[MAVISpCriticalError(message)],
-                                        warning=[])
+                                      warning=[])
 
+        # Drop unnecessary columns
         df = df.drop(['WT residue type', 'Residue #', 'chain ID'], axis=1)
 
-        # stack remaining columns
-        df = df.set_index('residue')
-        df = df.stack()
-        df = df.reset_index()
+        # Stack remaining columns
+        df = df.set_index('residue') # set 'residue' column as index
+        df = df.stack() # rotates columns downward and makes the dataframe long-format (level_1 contains the original column names and 0 contains the values)
+        df = df.reset_index() # reset index to turn the index into a column
 
-        # create mutation column
-        df['mutations'] = df['residue'] + df['level_1']
-        df = df.set_index('mutations')
+        # Create mutation column
+        df['mutations'] = df['residue'] + df['level_1'] # concatenate 'residue' and 'level_1' columns to create 'mutations' column
+        df = df.set_index('mutations') # set 'mutations' column as index
 
-        # drop now useless columns, rename
+        # Drop now useless columns, rename
         df = df.drop(['residue', 'level_1'], axis=1)
 
         # handle space around measure
         if self.measure == "":
-            measure = ""
+          measure = ""
         else:
-            measure = f"{self.measure} "
+          measure = f"{self.measure} "
 
-        # handle data type
+        # rename column Local Int. (Binding with B, heterodimer, FoldX5, kcal/mol)
         if data_type is None or data_type == '':
-            colname =  f"{self.type} ({measure}{interactor}, {self.complex_status}, {self.version}, {self.unit})"
+            colname =  f"{self.type} ({self.measure} {interactor}, {self.complex_status}, {self.version}, {self.unit})"
         else:
-            colname =  f"{self.type} ({measure}{interactor}, {self.complex_status}, {self.version}, {self.unit}, {data_type})"
-
-        return df.rename(columns={0 : colname})
+            colname =  f"{self.type} ({self.measure} {interactor}, {self.complex_status}, {self.version}, {self.unit}, {data_type})"
+        
+        return df.rename(columns={0 : colname}) # rename the sinle column named 0 to the formatted name
 
     def parse(self, dir_path):
-
+        """ reads the MutateX output files (energies.csv + energies_std.csv) for each interactor, converts them into mutation-indexed dataframes, and returns them to the Local interaction module."""
+        
         warnings = []
+        all_data = None
 
-        interactors = os.listdir(dir_path)
-        self.interactors = interactors
+        interactors = os.listdir(dir_path) #list of subfolders in dir_path
+        self.interactors = interactors #store interactors in the instance variable
 
         if len(interactors) == 0:
             raise MAVISpMultipleError(critical=[MAVISpCriticalError("no interactor folders found")],
                                       warning=warnings)
 
-        all_data = None
-
         for interactor in interactors:
-
+            
             interactor_dir = os.path.join(dir_path, interactor)
-
             mutatex_files = os.listdir(interactor_dir)
 
+            # expect energies.csv file per interactor
             if self.averages_filename not in mutatex_files:
                 this_error = f"energies.csv file not found in {interactor_dir}"
                 raise MAVISpMultipleError(warning=warnings,
-                                            critical=[MAVISpCriticalError(this_error)])
-
-            averages_df = self._parse_mutatex_energy_file(os.path.join(interactor_dir, self.averages_filename), '', interactor)
+                                          critical=[MAVISpCriticalError(this_error)])   
+            
+            # Parse averages file
+            averages_df = self._parse_mutatex_binding_file(os.path.join(interactor_dir, self.averages_filename), interactor, '')
 
+            # Parse stds file if it exists
             if self.stds_filename in mutatex_files:
-                stds_df = self._parse_mutatex_energy_file(os.path.join(interactor_dir, self.stds_filename), 'st. dev.', interactor)
+                stds_df = self._parse_mutatex_binding_file(os.path.join(interactor_dir, self.stds_filename), interactor, 'st. dev.')
             else:
-                warnings.append(MAVISpWarningError("standard deviation file not found for MutateX data"))
+                warnings.append(MAVISpWarningError("standard deviation file not found for MutateX binding data"))
                 stds_df = None
-
+        
+            # Combine averages and stds data
+            if stds_df is not None:
+                interactor_data = averages_df.join(stds_df, how='outer')
+            else:
+                interactor_data = averages_df
+            
+            # Combine data across interactors
             if all_data is None:
-                all_data = averages_df
+                all_data = interactor_data
             else:
-                all_data = all_data.join(averages_df, how='outer')
+                all_data = all_data.join(interactor_data, how='outer')
 
-            if stds_df is not None:
-                all_data = all_dat.join(stds_df, how='outer')
-
-        return all_data, warnings
+        return all_data, warnings  
 
 class MutateXDNABinding(MutateXBinding):
 
@@ -329,7 +340,6 @@ def parse(self, dir_path):
         return avg_mutation_data, std_mutation_data, warnings
 
 class RosettaDDGPredictionBinding(Method):
-
     unit = "kcal/mol"
     type = "Local Int."
     chain = 'A'
@@ -350,8 +360,8 @@ def parse(self, dir_path):
 
         warnings = []
 
-        interactors = os.listdir(dir_path)
-        self.interactors = interactors
+        interactors = os.listdir(dir_path) 
+        self.interactors = interactors 
 
         if len(interactors) == 0:
             raise MAVISpMultipleError(critical=[MAVISpCriticalError("no interactor folders found")],
@@ -361,55 +371,106 @@ def parse(self, dir_path):
 
         for interactor in interactors:
 
-            interactor_dir = os.path.join(dir_path, interactor)
-            rosetta_files = os.listdir(interactor_dir)
-
+            interactor_dir = os.path.join(dir_path, interactor) 
+            rosetta_files = os.listdir(interactor_dir) 
+            
+            # Identify the correct files
+            agg_file = None
+            struct_file = None
+            
+            for f in rosetta_files:
+                if f.endswith('_aggregate.csv'):
+                    agg_file = os.path.join(interactor_dir, f)
+                elif f.endswith('_structures.csv'):
+                    struct_file = os.path.join(interactor_dir, f)
+            
+
+            # Expect either a single file or multiple directories containing one file each or agg_file + struct_file
             if len(rosetta_files) == 1 and os.path.isfile(os.path.join(interactor_dir, rosetta_files[0])):
-
+                
                 rosetta_file = rosetta_files[0]
+                # Parse single aggregate CSV file
                 mutation_data = self._parse_aggregate_csv(os.path.join(interactor_dir, rosetta_file), warnings)
-                mutation_data_std = None
 
+            # Multiple directories containing one file each
             elif len(rosetta_files) > 1 and all( [ os.path.isdir(os.path.join(interactor_dir, f)) for f in rosetta_files] ):
                 mutation_data = None
-                for c, conformer_dir in enumerate(rosetta_files):
+                for c, conformer_dir in enumerate(rosetta_files): 
 
-                    conformer_files = os.listdir(os.path.join(interactor_dir, conformer_dir))
+                    conformer_files = os.listdir(os.path.join(interactor_dir, conformer_dir)) 
 
                     if len(conformer_files) != 1:
                         text = "only one file per conformer is supported for RosettaDDGPrediction"
                         raise MAVISpMultipleError(critical=[MAVISpCriticalError(text)],
                                                   warning=warnings)
-
-                    conformer_data = self._parse_aggregate_csv(os.path.join(interactor_dir, conformer_dir, conformer_files[0]), warnings)
-
+                    
+                    conformer_data = self._parse_aggregate_csv(os.path.join(interactor_dir, conformer_dir, conformer_files[0]), warnings) 
                     conformer_data = conformer_data.rename(columns={'total_score' : f'total_score_{c}'})
 
                     if mutation_data is None:
                         mutation_data = conformer_data
                     else:
                         mutation_data = mutation_data.join(conformer_data)
-
+                        
+                # Average total_score across conformers
                 mutation_data = pd.DataFrame(mutation_data.mean(axis=1), columns=['total_score'])
-                mutation_data_std = pd.DataFrame(mutation_data.std(axis=1), columns=['total_score'])
+
+            elif agg_file is not None:
+                mutation_data = self._parse_aggregate_csv(agg_file, warnings)
+
+                # Parse struct_file exists
+                if struct_file is not None:
+                    std_df = self._parse_structure_csv(struct_file, warnings)
+
+                    if std_df is not None:
+                        std_df.columns = [
+                            f"{self.type} (Binding with {interactor}, {self.complex_status}, {self.version}, {self.unit}, st. dev.)"
+                        ]
+
+                    mutation_data = mutation_data.join(std_df, how="outer")
 
             else:
                 text = f"dataset {interactor_dir} was not either a single files, or multiple directories containing one file"
                 raise MAVISpMultipleError(critical=[MAVISpCriticalError(text)],
                                             warning=warnings)
 
+            
             mutation_data = mutation_data.rename(columns={'total_score':f'{self.type} (Binding with {interactor}, {self.complex_status}, {self.version}, {self.unit})'})
 
             if all_data is None:
                 all_data = mutation_data
             else:
                 all_data = all_data.join(mutation_data, how='outer')
+            
+        # return the combined data for all interactors
+        return all_data, warnings
 
-            if mutation_data_std is not None:
-                mutation_data_std = mutation_data_std.rename(columns={'total_score':f'{self.type} (Binding with {interactor}, {self.complex_status}, {self.version}, {self.unit}, st. dev.)'})
-                all_data = all_data.join(mutation_data_std, how='outer')
 
-        return all_data, warnings
+    def _parse_structure_csv(self, csvf, warnings):
+        """Parse the RosettaDDGPrediction binding structure CSV file."""
+        try:
+            df = pd.read_csv(csvf)
+        except Exception as e:
+            this_error = f"Exception {type(e).__name__} while reading structure CSV: {e.args}"
+            raise MAVISpMultipleError(
+                warning=warnings,
+                critical=[MAVISpCriticalError(this_error)]
+            )
+
+        #keep only ddg rows
+        df = df[df["state"] == "ddg"]
+
+        if df.empty:
+            warnings.append(f"{csvf}: no ddg rows found")
+            return None
+
+        #group by mutation and compute stdev of total_score
+        std_series = df.groupby("mutation_label")["total_score"].std()
+
+        #turn into DataFrame
+        std_df = std_series.to_frame(name ="total_score")
+
+        return std_df
 
 class AlloSigma(Method):
 

From 6accee5e0158d48c4fc67c13c3c7df42d23dfb83 Mon Sep 17 00:00:00 2001
From: Matteo Tiberti <matteo.tiberti@gmail.com>
Date: Tue, 6 Jan 2026 12:45:04 +0100
Subject: [PATCH 3/4] first complete implementation for simple mode

---
 mavisp/methods.py | 91 ++++++++++++++++++++++-------------------------
 1 file changed, 42 insertions(+), 49 deletions(-)

diff --git a/mavisp/methods.py b/mavisp/methods.py
index 66c549d..781886e 100644
--- a/mavisp/methods.py
+++ b/mavisp/methods.py
@@ -96,8 +96,8 @@ class MutateXBinding(Method):
     
     unit = "kcal/mol"
     type = "Local Int."
-    heterodimer_chains = set(['A']) # If chain A exists → keep A only.
-    homodimer_chains   = set(['AB']) # If chain A does NOT exist → the ONLY valid alternative is homodimer AB.
+    heterodimer_chains = set(['A'])
+    homodimer_chains   = set(['AB'])
     target_chain       = 'A'
     measure = "Binding with"
     averages_filename = 'energies.csv'
@@ -114,8 +114,8 @@ def __init__(self, version, complex_status=None):
         self.interactors = []
 
     # data_type is either '' or 'st. dev.'
-    def _parse_mutatex_binding_file(self, fname, interactor, data_type):
-        """Parse a single MutateX binding file (average or std)."""
+    def _parse_mutatex_binding_energy_file(self, fname, interactor, data_type):
+        """Parse a single MutateX binding energy file (average or std)."""
 
         try:
             df = pd.read_csv(fname)
@@ -156,15 +156,17 @@ def _parse_mutatex_binding_file(self, fname, interactor, data_type):
 
         # handle space around measure
         if self.measure == "":
-          measure = ""
+            measure = ""
         else:
-          measure = f"{self.measure} "
+            measure = f"{self.measure} "
 
         # rename column Local Int. (Binding with B, heterodimer, FoldX5, kcal/mol)
         if data_type is None or data_type == '':
-            colname =  f"{self.type} ({self.measure} {interactor}, {self.complex_status}, {self.version}, {self.unit})"
+            data_type = ""
         else:
-            colname =  f"{self.type} ({self.measure} {interactor}, {self.complex_status}, {self.version}, {self.unit}, {data_type})"
+            data_type = f", {data_type}"
+
+        colname = f"{self.type} ({measure}{interactor}, {self.complex_status}, {self.version}, {self.unit}{data_type})"
         
         return df.rename(columns={0 : colname}) # rename the sinle column named 0 to the formatted name
 
@@ -174,8 +176,8 @@ def parse(self, dir_path):
         warnings = []
         all_data = None
 
-        interactors = os.listdir(dir_path) #list of subfolders in dir_path
-        self.interactors = interactors #store interactors in the instance variable
+        interactors = os.listdir(dir_path) # list of subfolders in dir_path
+        self.interactors = interactors # store interactors in the instance variable
 
         if len(interactors) == 0:
             raise MAVISpMultipleError(critical=[MAVISpCriticalError("no interactor folders found")],
@@ -193,13 +195,13 @@ def parse(self, dir_path):
                                           critical=[MAVISpCriticalError(this_error)])   
             
             # Parse averages file
-            averages_df = self._parse_mutatex_binding_file(os.path.join(interactor_dir, self.averages_filename), interactor, '')
+            averages_df = self._parse_mutatex_binding_energy_file(os.path.join(interactor_dir, self.averages_filename), interactor, '')
 
             # Parse stds file if it exists
             if self.stds_filename in mutatex_files:
-                stds_df = self._parse_mutatex_binding_file(os.path.join(interactor_dir, self.stds_filename), interactor, 'st. dev.')
+                stds_df = self._parse_mutatex_binding_energy_file(os.path.join(interactor_dir, self.stds_filename), interactor, 'st. dev.')
             else:
-                warnings.append(MAVISpWarningError("standard deviation file not found for MutateX binding data"))
+                warnings.append(MAVISpWarningError("standard deviation file not found for MutateX binding energy data"))
                 stds_df = None
         
             # Combine averages and stds data
@@ -339,13 +341,14 @@ def parse(self, dir_path):
 
         return avg_mutation_data, std_mutation_data, warnings
 
-class RosettaDDGPredictionBinding(Method):
+class RosettaDDGPredictionBinding(RosettaDDGPredictionStability):
+
     unit = "kcal/mol"
     type = "Local Int."
     chain = 'A'
     complex_status = 'heterodimer'
-
-    _parse_aggregate_csv = RosettaDDGPredictionStability._parse_aggregate_csv
+    aggregate_fname = 'ddg_mutations_aggregate.csv'
+    structures_fname = 'ddg_mutations_structures.csv'
 
     def __init__(self, version, complex_status=None):
 
@@ -365,7 +368,7 @@ def parse(self, dir_path):
 
         if len(interactors) == 0:
             raise MAVISpMultipleError(critical=[MAVISpCriticalError("no interactor folders found")],
-                                        warning=[])
+                                      warning=[])
 
         all_data = None
 
@@ -378,20 +381,28 @@ def parse(self, dir_path):
             agg_file = None
             struct_file = None
             
-            for f in rosetta_files:
-                if f.endswith('_aggregate.csv'):
-                    agg_file = os.path.join(interactor_dir, f)
-                elif f.endswith('_structures.csv'):
-                    struct_file = os.path.join(interactor_dir, f)
-            
-
-            # Expect either a single file or multiple directories containing one file each or agg_file + struct_file
-            if len(rosetta_files) == 1 and os.path.isfile(os.path.join(interactor_dir, rosetta_files[0])):
-                
+            # Expect either a single file or multiple directories containing one file each
+            if len(rosetta_files) == 1 and os.path.isfile(os.path.join(interactor_dir, rosetta_files[0])) and rosetta_files[0] == self.aggregate_fname:
                 rosetta_file = rosetta_files[0]
                 # Parse single aggregate CSV file
                 mutation_data = self._parse_aggregate_csv(os.path.join(interactor_dir, rosetta_file), warnings)
 
+            # or structures file (which we can use to calculate average and stdev)
+            elif (len(rosetta_files) == 1 and\
+                    os.path.isfile(os.path.join(interactor_dir, rosetta_files[0])) and \
+                    rosetta_files[0] == self.structures_fname)    or\
+                 (set(rosetta_files) == set([self.aggregate_fname, self.structures_fname]) and\
+                    os.path.isfile(os.path.join(interactor_dir, rosetta_files[0])) and\
+                    os.path.isfile(os.path.join(interactor_dir, rosetta_files[1]))):
+
+                if len(rosetta_files) == 2:
+                    warnings.append(MAVISpWarningError(f"for {interactor}, both Rosetta aggregate and structures file were found; the aggregate file will be ignored"))
+
+                mutation_data, std_df = self._parse_structure_csv(os.path.join(interactor_dir, self.structures_fname), warnings)
+
+                std_df = std_df.rename(columns={'total_score' : f"{self.type} (Binding with {interactor}, {self.complex_status}, {self.version}, {self.unit}, st. dev.)"})
+                mutation_data = mutation_data.join(std_df, how="outer")
+
             # Multiple directories containing one file each
             elif len(rosetta_files) > 1 and all( [ os.path.isdir(os.path.join(interactor_dir, f)) for f in rosetta_files] ):
                 mutation_data = None
@@ -415,26 +426,11 @@ def parse(self, dir_path):
                 # Average total_score across conformers
                 mutation_data = pd.DataFrame(mutation_data.mean(axis=1), columns=['total_score'])
 
-            elif agg_file is not None:
-                mutation_data = self._parse_aggregate_csv(agg_file, warnings)
-
-                # Parse struct_file exists
-                if struct_file is not None:
-                    std_df = self._parse_structure_csv(struct_file, warnings)
-
-                    if std_df is not None:
-                        std_df.columns = [
-                            f"{self.type} (Binding with {interactor}, {self.complex_status}, {self.version}, {self.unit}, st. dev.)"
-                        ]
-
-                    mutation_data = mutation_data.join(std_df, how="outer")
-
             else:
-                text = f"dataset {interactor_dir} was not either a single files, or multiple directories containing one file"
+                text = f"dataset {interactor_dir} did not contain an expected folder structure"
                 raise MAVISpMultipleError(critical=[MAVISpCriticalError(text)],
                                             warning=warnings)
 
-            
             mutation_data = mutation_data.rename(columns={'total_score':f'{self.type} (Binding with {interactor}, {self.complex_status}, {self.version}, {self.unit})'})
 
             if all_data is None:
@@ -445,7 +441,6 @@ def parse(self, dir_path):
         # return the combined data for all interactors
         return all_data, warnings
 
-
     def _parse_structure_csv(self, csvf, warnings):
         """Parse the RosettaDDGPrediction binding structure CSV file."""
         try:
@@ -460,17 +455,15 @@ def _parse_structure_csv(self, csvf, warnings):
         #keep only ddg rows
         df = df[df["state"] == "ddg"]
 
-        if df.empty:
-            warnings.append(f"{csvf}: no ddg rows found")
-            return None
-
         #group by mutation and compute stdev of total_score
         std_series = df.groupby("mutation_label")["total_score"].std()
+        average_series = df.groupby("mutation_label")["total_score"].mean()
 
         #turn into DataFrame
         std_df = std_series.to_frame(name ="total_score")
+        average_df = average_series.to_frame(name ="total_score")
 
-        return std_df
+        return average_df, std_df
 
 class AlloSigma(Method):
 

From e6c9b5bd2232bce6405d90d0a08797ad5d61ade7 Mon Sep 17 00:00:00 2001
From: Matteo Tiberti <matteo.tiberti@gmail.com>
Date: Tue, 6 Jan 2026 13:31:21 +0100
Subject: [PATCH 4/4] moved location of function definition

---
 mavisp/methods.py | 53 +++++++++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/mavisp/methods.py b/mavisp/methods.py
index 781886e..f14dbd4 100644
--- a/mavisp/methods.py
+++ b/mavisp/methods.py
@@ -171,7 +171,10 @@ def _parse_mutatex_binding_energy_file(self, fname, interactor, data_type):
         return df.rename(columns={0 : colname}) # rename the sinle column named 0 to the formatted name
 
     def parse(self, dir_path):
-        """ reads the MutateX output files (energies.csv + energies_std.csv) for each interactor, converts them into mutation-indexed dataframes, and returns them to the Local interaction module."""
+        """
+        reads the MutateX output files (energies.csv + energies_std.csv) for each interactor,
+        converts them into mutation-indexed dataframes, and returns them to the Local interaction module.
+        """
         
         warnings = []
         all_data = None
@@ -359,6 +362,30 @@ def __init__(self, version, complex_status=None):
 
         self.interactors = []
 
+    def _parse_structure_csv(self, csvf, warnings):
+        """Parse the RosettaDDGPrediction binding structure CSV file."""
+        try:
+            df = pd.read_csv(csvf)
+        except Exception as e:
+            this_error = f"Exception {type(e).__name__} while reading structure CSV: {e.args}"
+            raise MAVISpMultipleError(
+                warning=warnings,
+                critical=[MAVISpCriticalError(this_error)]
+            )
+
+        #keep only ddg rows
+        df = df[df["state"] == "ddg"]
+
+        #group by mutation and compute stdev of total_score
+        std_series = df.groupby("mutation_label")["total_score"].std()
+        average_series = df.groupby("mutation_label")["total_score"].mean()
+
+        #turn into DataFrame
+        std_df = std_series.to_frame(name ="total_score")
+        average_df = average_series.to_frame(name ="total_score")
+
+        return average_df, std_df
+
     def parse(self, dir_path):
 
         warnings = []
@@ -441,30 +468,6 @@ def parse(self, dir_path):
         # return the combined data for all interactors
         return all_data, warnings
 
-    def _parse_structure_csv(self, csvf, warnings):
-        """Parse the RosettaDDGPrediction binding structure CSV file."""
-        try:
-            df = pd.read_csv(csvf)
-        except Exception as e:
-            this_error = f"Exception {type(e).__name__} while reading structure CSV: {e.args}"
-            raise MAVISpMultipleError(
-                warning=warnings,
-                critical=[MAVISpCriticalError(this_error)]
-            )
-
-        #keep only ddg rows
-        df = df[df["state"] == "ddg"]
-
-        #group by mutation and compute stdev of total_score
-        std_series = df.groupby("mutation_label")["total_score"].std()
-        average_series = df.groupby("mutation_label")["total_score"].mean()
-
-        #turn into DataFrame
-        std_df = std_series.to_frame(name ="total_score")
-        average_df = average_series.to_frame(name ="total_score")
-
-        return average_df, std_df
-
 class AlloSigma(Method):
 
     name = "AlloSigma"