diff --git a/DFconverter.py b/DFconverter.py
deleted file mode 100644
index 5cb2f23..0000000
--- a/DFconverter.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from hipe4ml.tree_handler import TreeHandler
-import tomli
-import sys
-
-
-def convertDF(input_file):
-    """
-    Opens input file in toml format, retrives signal, background and deploy data
-    like TreeHandler objects
-
-    Parameters
-    ------------------------------------------------
-    df: str
-        input toml file
-    """
-    with open(str(input_file), encoding="utf-8") as inp_file:
-        inp_dict = tomli.load(inp_file)
-    signal = TreeHandler(inp_dict["signal"]["path"], inp_dict["signal"]["tree"])
-    background = TreeHandler(inp_dict["background"]["path"], inp_dict["background"]["tree"])
-    deploy =  TreeHandler(inp_dict["deploy"]["path"], inp_dict["deploy"]["tree"])
-
-    return signal, background, deploy
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..7995e55
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 CandidatesClassifier
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MLconfig_XGboostParameters.py b/MLconfig_XGboostParameters.py
deleted file mode 100644
index fcf6798..0000000
--- a/MLconfig_XGboostParameters.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import numpy as np
-import pandas as pd
-import xgboost as xgb
-import matplotlib.pyplot as plt
-
-from bayes_opt import BayesianOptimization
-import gc
-
-
-class XGBoostParams:
-
-    def __init__(self, params, dtrain):
-        self.params = params
-        self.dtrain = dtrain
-
-    def bo_tune_xgb(self, max_depth, gamma, alpha, n_estimators ,learning_rate):
-
-        params1 = {'max_depth': int(max_depth),
-                  'gamma': gamma,
-                  'alpha':alpha,
-                  'n_estimators': n_estimators,
-                  'learning_rate':learning_rate,
-                  'subsample': 0.8,
-                  'eta': 0.3,
-                  'eval_metric': 'auc', 'nthread' : 7}
-
-        cv_result = xgb.cv(params1, self.dtrain, num_boost_round=10, nfold=5)
-        return  cv_result['test-auc-mean'].iloc[-1]
-
-    def get_best_params(self):
-        """
-        Performs Bayesian Optimization and looks for the best parameters
-
-        Parameters:
-               None
-        """
-        #Invoking the Bayesian Optimizer with the specified parameters to tune
-        xgb_bo = BayesianOptimization(self.bo_tune_xgb, {'max_depth': (4, 10),
-                                                     'gamma': (0, 1),
-                                                    'alpha': (2,20),
-                                                     'learning_rate':(0,1),
-                                                     'n_estimators':(100,500)
-                                                    })
-        #performing Bayesian optimization for 5 iterations with 8 steps of random exploration
-        # with an #acquisition function of expected improvement
-        xgb_bo.maximize(n_iter=1, init_points=1)
-
-        max_param = xgb_bo.max['params']
-        param1= {'alpha': max_param['alpha'], 'gamma': max_param['gamma'], 'learning_rate': max_param['learning_rate'],
-         'max_depth': int(round(max_param['max_depth'],0)), 'n_estimators': int(round(max_param['n_estimators'],0)),
-          'objective': 'reg:logistic'}
-        gc.collect()
-
-
-        #To train the algorithm using the parameters selected by bayesian optimization
-        #Fit/train on training data
-        bst = xgb.train(param1, self.dtrain)
-        return bst
diff --git a/MLconfig_variables.py b/cand_class/MLconfig_variables.py
similarity index 79%
rename from MLconfig_variables.py
rename to cand_class/MLconfig_variables.py
index bc013b8..605c183 100644
--- a/MLconfig_variables.py
+++ b/cand_class/MLconfig_variables.py
@@ -6,6 +6,15 @@
 from scipy.stats import binned_statistic as b_s
 import matplotlib as mpl
 
+from hipe4ml import plot_utils
+
+def correlation_matrix(bgr, sign, vars_to_draw, leg_labels, output_path):
+    res_s_b = plot_utils.plot_corr([bgr, sign], vars_to_draw, leg_labels)
+    res_s_b[0].savefig(output_path+'/'+'corr_matrix_bgr.png')
+    res_s_b[1].savefig(output_path+'/'+'corr_matrix_sign.png')
+
+
+
 def calculate_correlation(df, vars_to_corr, target_var) :
     """
     Calculates correlations with target variable variable and standart errors
@@ -56,18 +65,18 @@ def plot1Dcorrelation(vars_to_draw,var_to_corr, corr_signal, corr_signal_errors,
          background covariance standart error of the mean
     output_path:
           path that contains output plot
-
     """
 
-    fig, ax = plt.subplots(figsize=(20,10))
-    plt.errorbar(vars_to_draw, corr_signal, yerr=corr_signal_errors, fmt='')
-    plt.errorbar(vars_to_draw, corr_bg, yerr=corr_bg_errors, fmt='')
+    fig, ax = plt.subplots(figsize=(10,6))
+    plt.errorbar(vars_to_draw, corr_signal, yerr=corr_signal_errors, fmt='--o')
+    plt.errorbar(vars_to_draw, corr_bg, yerr=corr_bg_errors, fmt='--o')
     ax.grid(zorder=0)
-    ax.set_xticklabels(vars_to_draw, fontsize=25, rotation =70)
-    ax.set_yticklabels([-0.5,-0.4,  -0.2,0, -0.2, 0.4], fontsize=25)
-    plt.legend(('signal','background'), fontsize = 25)
-    plt.title('Correlation of all variables with '+ var_to_corr+' along with SEM', fontsize = 25)
-    plt.ylabel('Correlation coefficient', fontsize = 25)
+    ax.set_xticklabels(vars_to_draw, fontsize=15, rotation =70)
+    # ax.set_yticklabels([-0.5,-0.4,  -0.2,0, -0.2, 0.4], fontsize=25)
+    ax.yaxis.set_tick_params(labelsize=15)
+    plt.legend(('signal','background'), fontsize = 15)
+    plt.title('Correlation of all variables with '+ var_to_corr+' along with SEM', fontsize = 18)
+    plt.ylabel('Correlation coefficient', fontsize = 15)
     fig.tight_layout()
     fig.savefig(output_path+'/all_vars_corr-'+ var_to_corr+'.png')
 
@@ -78,33 +87,24 @@ def profile_mass(df,variable_xaxis, sign, peak, edge_left, edge_right, pdf_key):
     This function takes the entries of the variables and distributes them in 25 bins.
     The function then plots the bin centers of the first variable on the x-axis and
     the mean values of the bins of the second variable on the y-axis, along with its bin stds.
-
     Parameters
     ------------------------------------------------
     df: pandas.DataFrame
         input DataFrame
-
     variable_xaxis: str
         variable to be plotted on x axis (invariant mass)
-
     x_unit: str
         x axis variable units
-
     variable_yaxis: str
         variable to be plotted on y axis
-
     sgn: int(0 or 1)
          signal definition(0 background, 1 signal)
-
     pdf_key: matplotlib.backends.backend_pdf.PdfPages
         output pdf file
-
     peak: int
         invariant mass peak position
-
     edge_left: int
         left edge of x axis variable
-
     edge_right: int
         left edge of y axis variable
     """
@@ -119,7 +119,7 @@ def profile_mass(df,variable_xaxis, sign, peak, edge_left, edge_right, pdf_key):
     for var in df.columns:
         if var != variable_xaxis:
 
-            fig, axs = plt.subplots(figsize=(20, 15))
+            fig, axs = plt.subplots(figsize=(10, 6))
 
             bin_means, bin_edges, binnumber = b_s(df[variable_xaxis],df[var], statistic='mean', bins=25)
             bin_std, bin_edges, binnumber = b_s(df[variable_xaxis],df[var], statistic='std', bins=25)
@@ -134,19 +134,21 @@ def profile_mass(df,variable_xaxis, sign, peak, edge_left, edge_right, pdf_key):
             bin_std = np.delete(bin_std , nan_ind)
 
 
-            plt.errorbar(x=bin_centers, y=bin_means, yerr=(bin_std/np.sqrt(bin_count)), linestyle='none', marker='.',mfc='red', ms=10)
-
+            plt.errorbar(x=bin_centers, y=bin_means, yerr=(bin_std/np.sqrt(bin_count)), linestyle='none', linewidth = 2, marker='.',mfc='red', ms=15)
 
+            plt.locator_params(axis='y', nbins=5)
+            plt.locator_params(axis='x', nbins=5)
 
-            plt.title('Mean of ' +var+ ' plotted versus bin centers of '+variable_xaxis+ \
-                      '('+keyword+')', fontsize=25)
-            plt.xlabel('Mass', fontsize=25)
-            plt.ylabel("Mean of each bin with the SEM ($\dfrac{bin\ std}{\sqrt{bin\ count}}$) of bin", fontsize=25)
+            plt.title('Mean of ' +var+ '  vs bin centers of '+variable_xaxis+ \
+                      '('+keyword+')', fontsize=19)
+            plt.xlabel('Mass', fontsize=17)
+            plt.ylabel(" SEM ($\dfrac{bin\ std}{\sqrt{bin\ count}}$) of bin", fontsize=17)
 
 
-            plt.vlines(x=peak,ymin=bin_means.min(),ymax=bin_means.max(), color='r', linestyle='-')
-
+            plt.vlines(x=peak,ymin=bin_means.min(),ymax=bin_means.max(), color='r', linestyle='-', linewidth = 3)
 
+            axs.xaxis.set_tick_params(labelsize=16)
+            axs.yaxis.set_tick_params(labelsize=16)
             fig.tight_layout()
             plt.savefig(pdf_key,format='pdf')
 
@@ -160,13 +162,10 @@ def plot2D_all(df, sample, sgn, pdf_key):
     ------------------------------------------------
     df: pandas.DataFrame
         input dataframe
-
     sample: str
          title of the sample
-
     sgn: int(0 or 1)
          signal definition(0 background, 1 signal)
-
     pdf_key: matplotlib.backends.backend_pdf.PdfPages
         output pdf file
     """
@@ -206,48 +205,49 @@ def plot2D_mass(df, sample, mass_var, mass_range, sgn, peak, pdf_key):
     ------------------------------------------------
     df: pandas.DataFrame
         input dataframe
-
     sample: str
          title of the sample
-
-
     mass_var: str
         name of the invariant mass variable
-
     mass_range: list
         mass range to be plotted
-
     sgn: int(0 or 1)
          signal definition(0 background, 1 signal)
-
     peak: int
         invariant mass value
-
     pdf_key: matplotlib.backends.backend_pdf.PdfPages
         output pdf file
     """
 
     for var in df.columns:
         if var != mass_var:
-            fig, axs = plt.subplots(figsize=(15, 10))
+            fig, axs = plt.subplots(figsize=(6, 4))
             cax = plt.hist2d(df[mass_var],df[var],range=[mass_range, [df[var].min(), df[var].max()]], bins=100,
                         norm=mpl.colors.LogNorm(), cmap=plt.cm.viridis)
 
 
             if sgn==1:
-                plt.title('Signal candidates ' + sample, fontsize = 25)
+                plt.title('Signal candidates ' + sample, fontsize = 15)
 
             if sgn==0:
-                plt.title('Background candidates ' + sample, fontsize = 25)
+                plt.title('Background candidates ' + sample, fontsize = 15)
 
 
-            plt.xlabel(mass_var, fontsize=25)
-            plt.ylabel(var, fontsize=25)
+            plt.xlabel(mass_var, fontsize=16)
+            plt.ylabel(var, fontsize=16)
 
-            plt.vlines(x=peak,ymin=df[var].min(),ymax=df[var].max(), color='r', linestyle='-')
+            plt.vlines(x=peak,ymin=df[var].min(),ymax=df[var].max(), color='r', linestyle='-', linewidth = 4)
 
             mpl.pyplot.colorbar()
 
+
+            axs.xaxis.set_tick_params(labelsize=11)
+            axs.yaxis.set_tick_params(labelsize=11)
+
+            plt.locator_params(axis='y', nbins=5)
+            plt.locator_params(axis='x', nbins=5)
+
+
             plt.legend(shadow=True,title =str(len(df))+ " samples")
 
             fig.tight_layout()
diff --git a/cand_class/apply_model.py b/cand_class/apply_model.py
new file mode 100644
index 0000000..146deb8
--- /dev/null
+++ b/cand_class/apply_model.py
@@ -0,0 +1,428 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import xgboost as xgb
+from hipe4ml.plot_utils import plot_roc_train_test
+from cand_class.helper import *
+from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
+
+from dataclasses import dataclass
+
+from matplotlib.font_manager import FontProperties
+
+from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
+                               AutoMinorLocator)
+
+import gc
+import matplotlib as mpl
+from cand_class.helper import *
+from hipe4ml import plot_utils
+
+mpl.rc('figure', max_open_warning = 0)
+
+
+@dataclass
+class ApplyXGB:
+    """
+    A class used to apply XGBoost predictions on the data
+
+    ...
+
+    Attributes
+    ----------
+    x_train : pd.core.frame.DataFrame
+        train dataframe
+    x_test : pd.core.frame.DataFrame
+        test dataframe
+    y_pred_train : np.ndarray
+        array with XGBoost predictions for train dataset
+    y_pred_test : np.ndarray
+        array with XGBoost predictions for test dataset
+    output_path : str
+        output path for plots
+
+    Methods
+    -------
+    get_predictions()
+        Returns train and test dataframes with predictions
+    features_importance(bst)
+
+    """
+
+    x_train : pd.core.frame.DataFrame
+    x_test : pd.core.frame.DataFrame
+
+    y_pred_train : np.ndarray
+    y_pred_test : np.ndarray
+
+    y_train : np.ndarray
+    y_test : np.ndarray
+
+    output_path : str
+
+    __train_res = pd.DataFrame()
+    __test_res = pd.DataFrame()
+
+    __best_train_thr : int = 0
+    __best_test_thr : int = 0
+
+
+    def get_predictions(self):
+        """
+        Makes XGB predictions
+
+        Returns
+        --------
+
+        Train and test dataframes with predictions
+        """
+        self.__train_res = self.x_train.copy()
+        self.__train_res['xgb_preds'] = self.y_pred_train
+
+        self.__test_res = self.x_test.copy()
+        self.__test_res['xgb_preds'] = self.y_pred_test
+
+        return self.__train_res, self.__test_res
+
+
+    def apply_prob_cut(self, ams, train_thr, test_thr):
+        """
+        Applies BDT cut on XGB probabilities and returns 'xgb_preds1' ==1 if
+        sample's prediction > BDT threshold, otherwise 'xgb_preds1' ==0
+        If ams==1 BDT cut is computed with respect to AMS metrics optimization
+        If ams==0 and train_thr!=0 and test_thr!=0, one should specify
+        thresholds for test and train datasets manually
+        """
+
+        if ams==1:
+            self.__best_train_thr, self.__best_test_thr, roc_curve_data = AMS(self.y_train, self.y_pred_train,
+             self.y_test, self.y_pred_test, self.output_path)
+
+        if ams==0 and train_thr!=0 and test_thr!=0:
+            self.__best_train_thr = train_thr
+            self.__best_test_thr = test_thr
+
+        train_pred = ((self.y_pred_train > self.__best_train_thr)*1)
+        test_pred = ((self.y_pred_test > self.__best_test_thr)*1)
+
+
+        self.__train_res['xgb_preds1'] = train_pred
+
+        self.__test_res['xgb_preds1'] = test_pred
+
+        return self.__train_res, self.__test_res
+
+
+    def print_roc(self):
+        plot_utils.plot_roc_train_test(self.y_test, self.__test_res['xgb_preds1'],
+        self.y_train, self.__train_res['xgb_preds1'], None, ['background', 'signal'])
+        plt.savefig(str(self.output_path)+'/roc_curve.png')
+
+
+
+
+    def features_importance(self, bst):
+         """
+         Plots confusion matrix. A Confusion Matrix C is such that Cij is equal to
+         the number of observations known to be in group i and predicted to be in
+         group j. Thus in binary classification, the count of true positives is C00,
+         false negatives C01,false positives is C10, and true neagtives is C11.
+
+         Confusion matrix is applied to previously unseen by model data, so we can
+         estimate model's performance
+
+         Parameters
+         ----------
+         bst: xgboost.sklearn.XGBClassifier
+               model's XGB classifier
+
+         Returns
+         -------
+
+             Saves plot with XGB features imporance
+
+         """
+         ax = xgb.plot_importance(bst)
+         plt.rcParams['figure.figsize'] = [6, 3]
+         plt.rcParams['font.size'] = 15
+         ax.xaxis.set_tick_params(labelsize=13)
+         ax.yaxis.set_tick_params(labelsize=13)
+         ax.figure.tight_layout()
+         ax.figure.savefig(str(self.output_path)+"/xgb_train_variables_rank.png")
+
+
+    def CM_plot_train_test(self, issignal):
+         """
+         Plots confusion matrix. A Confusion Matrix C is such that Cij is equal to
+         the number of observations known to be in group i and predicted to be in
+         group j. Thus in binary classification, the count of true positives is C00,
+         false negatives C01,false positives is C10, and true neagtives is C11.
+
+         Confusion matrix is applied to previously unseen by model data, so we can
+         estimate model's performance
+
+         Parameters
+         ----------
+         issignal: str
+                   signal label
+
+         Returns
+         -------
+
+             Saves plot with confusion matrix
+
+         """
+
+         cnf_matrix_train = confusion_matrix(self.__train_res[issignal], self.__train_res['xgb_preds1'], labels=[1,0])
+         np.set_printoptions(precision=2)
+         fig_train, axs_train = plt.subplots(figsize=(8, 6))
+         axs_train.yaxis.set_label_coords(-0.04,.5)
+         axs_train.xaxis.set_label_coords(0.5,-.005)
+
+         axs_train.xaxis.set_tick_params(labelsize=15)
+         axs_train.yaxis.set_tick_params(labelsize=15)
+
+         plot_confusion_matrix(cnf_matrix_train, classes=['signal','background'],
+          title=' Train Dataset Confusion Matrix for cut > '+"%.4f"%self.__best_train_thr)
+         plt.savefig(str(self.output_path)+'/confusion_matrix_extreme_gradient_boosting_train.png')
+
+
+         cnf_matrix_test = confusion_matrix(self.__test_res[issignal], self.__test_res['xgb_preds1'], labels=[1,0])
+         np.set_printoptions(precision=2)
+         fig_test, axs_test = plt.subplots(figsize=(8, 6))
+         axs_test.yaxis.set_label_coords(-0.04,.5)
+         axs_test.xaxis.set_label_coords(0.5,-.005)
+
+         axs_test.xaxis.set_tick_params(labelsize=15)
+         axs_test.yaxis.set_tick_params(labelsize=15)
+
+         plot_confusion_matrix(cnf_matrix_test, classes=['signal','background'],
+           title=' Test Dataset Confusion Matrix for cut > '+"%.4f"%self.__best_test_thr)
+         plt.savefig(str(self.output_path)+'/confusion_matrix_extreme_gradient_boosting_test.png')
+
+
+    def pT_vs_rapidity(self, df, sign_label, pred_label, x_range, y_range, data_name, pt_rap):
+        """
+        Plots distribution in pT-rapidity phase space
+
+        Parameters
+        ----------
+        df: pd.DataFrame
+            dataframe with XGBoost predictions
+        sign_label: str
+             dataframe column that srecifies if the sample is signal or not
+        pred_label: str
+             dataframe column with XGBoost prediction
+        x_range: list
+                rapidity range
+        y_range: list
+                 pT range
+        data_name: str
+                name of the dataset (for example, train or test)
+        pt_rap: list
+                 list with pt and rapidity labels(for example ['pt', 'rapid'])
+
+        Returns
+        -------
+
+            Saves istribution in pT-rapidity phase space
+
+        """
+        fig, axs = plt.subplots(1,3, figsize=(15, 4), gridspec_kw={'width_ratios': [1, 1, 1]})
+
+        df_orig = df[df[sign_label]==1]
+
+        df_cut = df[df[pred_label]==1]
+
+        diff_vars = pt_rap
+
+        difference = pd.concat([df_orig[diff_vars], df_cut[diff_vars]]).drop_duplicates(keep=False)
+
+        s_label = 'Signal '
+
+        axs[0].set_aspect(aspect = 'auto')
+        axs[1].set_aspect(aspect = 'auto')
+        axs[2].set_aspect(aspect = 'auto')
+
+        rej = round((1 -  (df_cut.shape[0] / df_orig.shape[0])) * 100, 5)
+        saved = round((df_cut.shape[0] / df_orig.shape[0]) * 100, 5)
+        diff = df_orig.shape[0] - df_cut.shape[0]
+        axs[0].legend(shadow=True, title =str(len(df_orig))+' samples', fontsize =14)
+        axs[1].legend(shadow=True, title =str(len(df_cut))+' samples', fontsize =14)
+        axs[2].legend(shadow=True, title ='ML cut saves \n'+ str(saved) +'% of '+ s_label, fontsize =14)
+
+
+
+        counts0, xedges0, yedges0, im0 = axs[0].hist2d(df_orig[pt_rap[1]], df_orig[pt_rap[0]] , range = [x_range, y_range], bins=100,
+                    norm=mpl.colors.LogNorm(), cmap=plt.cm.rainbow)
+
+        axs[0].set_title(s_label + ' candidates before ML cut '+data_name, fontsize = 16)
+        axs[0].set_xlabel('rapidity', fontsize=15)
+        axs[0].set_ylabel('pT, GeV', fontsize=15)
+
+
+        mpl.pyplot.colorbar(im0, ax = axs[0])
+
+
+
+        axs[0].xaxis.set_major_locator(MultipleLocator(1))
+        axs[0].xaxis.set_major_formatter(FormatStrFormatter('%d'))
+
+        axs[0].xaxis.set_tick_params(which='both', width=2)
+
+
+        fig.tight_layout()
+
+
+        counts1, xedges1, yedges1, im1 = axs[1].hist2d(df_cut[pt_rap[1]], df_cut[pt_rap[0]] , range = [x_range, y_range], bins=100,
+                    norm=mpl.colors.LogNorm(), cmap=plt.cm.rainbow)
+
+        axs[1].set_title(s_label + ' candidates after ML cut '+data_name, fontsize = 16)
+        axs[1].set_xlabel('rapidity', fontsize=15)
+        axs[1].set_ylabel('pT, GeV', fontsize=15)
+
+        mpl.pyplot.colorbar(im1, ax = axs[1])
+
+        axs[1].xaxis.set_major_locator(MultipleLocator(1))
+        axs[1].xaxis.set_major_formatter(FormatStrFormatter('%d'))
+
+        axs[1].xaxis.set_tick_params(which='both', width=2)
+
+        fig.tight_layout()
+
+
+        counts2, xedges2, yedges2, im2 = axs[2].hist2d(difference[pt_rap[1]], difference[pt_rap[0]] , range = [x_range, y_range], bins=100,
+                    norm=mpl.colors.LogNorm(), cmap=plt.cm.rainbow)
+
+        axs[2].set_title(s_label + ' difference ', fontsize = 16)
+        axs[2].set_xlabel('rapidity', fontsize=15)
+        axs[2].set_ylabel('pT, GeV', fontsize=15)
+
+        mpl.pyplot.colorbar(im1, ax = axs[2])
+
+
+        axs[2].xaxis.set_major_locator(MultipleLocator(1))
+        axs[2].xaxis.set_major_formatter(FormatStrFormatter('%d'))
+
+        axs[2].xaxis.set_tick_params(which='both', width=2)
+
+        fig.tight_layout()
+
+        fig.savefig(self.output_path+'/pT_rapidity_'+s_label+'_ML_cut_'+data_name+'.png')
+
+
+
+    def hist_variables(self, mass_var, df, sign_label, pred_label,  sample, pdf_key):
+        """
+        Applied quality cuts and created distributions for all the features in pdf
+        file
+        Parameters
+        ----------
+        df_s: dataframe
+              signal
+        df_b: dataframe
+              background
+        feature: str
+                name of the feature to be plotted
+        pdf_key: PdfPages object
+                name of pdf document with distributions
+        """
+
+        dfs_orig = df[df[sign_label]==1]
+        dfb_orig = df[df[sign_label]==0]
+
+        dfs_cut = df[(df[sign_label]==1) & (df[pred_label]==1)]
+        dfb_cut = df[(df[sign_label]==0) & (df[pred_label]==1)]
+
+        diff_vars = dfs_orig.columns.drop([sign_label, pred_label])
+
+        difference_s = pd.concat([dfs_orig[diff_vars], dfs_cut[diff_vars]]).drop_duplicates(keep=False)
+
+
+
+        for feature in diff_vars:
+            fig, ax = plt.subplots(3, figsize=(15, 10))
+
+
+            fontP = FontProperties()
+            fontP.set_size('xx-large')
+
+            ax[0].hist(dfs_orig[feature], label = 'signal', bins = 500, alpha = 0.4, color = 'blue')
+            ax[0].hist(dfb_orig[feature], label = 'background', bins = 500, alpha = 0.4, color = 'red')
+            ax[0].legend(shadow=True,title = 'S/B='+ str(round(len(dfs_orig)/len(dfb_orig), 3)) +
+
+                       '\n S samples:  '+str(dfs_orig.shape[0]) + '\n B samples: '+ str(dfb_orig.shape[0]) +
+                       '\nquality cuts ',
+                       title_fontsize=15, fontsize =15, bbox_to_anchor=(1.05, 1),
+                        loc='upper left', prop=fontP,)
+
+            ax[0].set_xlim(dfb_orig[feature].min(), dfb_orig[feature].max())
+
+            ax[0].xaxis.set_tick_params(labelsize=15)
+            ax[0].yaxis.set_tick_params(labelsize=15)
+
+            ax[0].set_title(str(feature) + ' MC '+ sample + ' before ML cut', fontsize = 25)
+            ax[0].set_xlabel(feature, fontsize = 25)
+
+            if feature!=mass_var:
+                ax[0].set_yscale('log')
+
+            fig.tight_layout()
+
+
+            if len(dfb_cut) !=0:
+                s_b_cut = round(len(dfs_cut)/len(dfb_cut), 3)
+                title1 = 'S/B='+ str(s_b_cut)
+            else:
+                title1 = 'S = '+str(len(dfs_cut)) + ' all bgr was cut'
+
+
+
+            ax[1].hist(dfs_cut[feature], label = 'signal', bins = 500, alpha = 0.4, color = 'blue')
+            ax[1].hist(dfb_cut[feature], label = 'background', bins = 500, alpha = 0.4, color = 'red')
+            ax[1].legend(shadow=True,title =  title1 +
+                       '\n S samples:  '+str(dfs_cut.shape[0]) + '\n B samples: '+ str(dfb_cut.shape[0]) +
+                       '\nquality cuts + ML cut',
+                        title_fontsize=15, fontsize =15, bbox_to_anchor=(1.05, 1),
+                        loc='upper left', prop=fontP,)
+
+
+            ax[1].set_xlim(dfb_orig[feature].min(), dfb_orig[feature].max())
+
+            ax[1].xaxis.set_tick_params(labelsize=15)
+            ax[1].yaxis.set_tick_params(labelsize=15)
+
+            ax[1].set_title(feature + ' MC '+ sample+ ' after ML cut', fontsize = 25)
+            ax[1].set_xlabel(feature, fontsize = 25)
+
+            if feature!='mass':
+                ax[1].set_yscale('log')
+
+            fig.tight_layout()
+
+
+
+
+            ax[2].hist(difference_s[feature], label = 'signal', bins = 500, alpha = 0.4, color = 'blue')
+            ax[2].legend(shadow=True,title ='S samples: '+str(len(difference_s)) +'\nsignal difference',
+                        title_fontsize=15, fontsize =15, bbox_to_anchor=(1.05, 1),
+                        loc='upper left', prop=fontP,)
+
+
+            ax[2].set_xlim(dfb_orig[feature].min(), dfb_orig[feature].max())
+
+            ax[2].xaxis.set_tick_params(labelsize=15)
+            ax[2].yaxis.set_tick_params(labelsize=15)
+
+            ax[2].set_title(feature + ' MC '+ sample +' signal difference', fontsize = 25)
+            ax[2].set_xlabel(feature, fontsize = 25)
+
+            if feature!=mass_var:
+                ax[2].set_yscale('log')
+
+            fig.tight_layout()
+
+            plt.savefig(pdf_key,format='pdf')
+
+
+        pdf_key.close()
diff --git a/cand_class/config_reader.py b/cand_class/config_reader.py
new file mode 100644
index 0000000..24b47eb
--- /dev/null
+++ b/cand_class/config_reader.py
@@ -0,0 +1,53 @@
+from hipe4ml.tree_handler import TreeHandler
+import tomli
+import sys
+
+
+def convertDF(input_file, mass_var):
+    """
+    Opens input file in toml format, retrives signal, background and deploy data
+    like TreeHandler objects
+
+    Parameters
+    ------------------------------------------------
+    df: str
+        input toml file
+    """
+    with open(str(input_file), encoding="utf-8") as inp_file:
+        inp_info = tomli.load(inp_file)
+
+    signal = TreeHandler(inp_info["signal"]["path"], inp_info["signal"]["tree"])
+    background = TreeHandler(inp_info["background"]["path"], inp_info["background"]["tree"])
+
+
+
+
+
+    bgr_left_edge = inp_info["peak_range"]["bgr_left_edge"]
+    bgr_right_edge = inp_info["peak_range"]["bgr_right_edge"]
+
+    peak_left_edge = inp_info["peak_range"]["sgn_left_edge"]
+    peak_right_edge = inp_info["peak_range"]["sgn_right_edge"]
+
+
+    selection = str(bgr_left_edge)+'< '+mass_var+' <'+str(peak_left_edge)+' or '+str(peak_right_edge)+\
+    '< '+mass_var+' <'+str(bgr_right_edge)
+
+    signalH = signal.get_subset(size = inp_info["number_of_events"]["number_of_signal_events"])
+    bkgH = background.get_subset(selection, size=inp_info["number_of_events"]["number_of_background_events"])
+
+    return signalH, bkgH
+
+
+def read_log_vars(inp_file):
+    with open(str(inp_file), encoding="utf-8") as inp_file:
+        inp_dict = tomli.load(inp_file)
+
+    return inp_dict['non_log_scale']['variables'], inp_dict['log_scale']['variables']
+
+
+
+def read_train_vars(inp_file):
+    with open(str(inp_file), encoding="utf-8") as inp_file:
+        inp_dict = tomli.load(inp_file)
+    return  inp_dict['train_vars']
diff --git a/helper.py b/cand_class/helper.py
similarity index 76%
rename from helper.py
rename to cand_class/helper.py
index a3dfc99..9b4aa7c 100644
--- a/helper.py
+++ b/cand_class/helper.py
@@ -2,16 +2,14 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 
-from read_configs import *
-
 import xgboost as xgb
 from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
 from numpy import sqrt, log, argmax
 import itertools
+import treelite
 
 
-
-def transform_df_to_log(df, vars, inp_file):
+def transform_df_to_log(df, vars, non_log_x, log_x):
     """
     Transforms DataFrame to DataFrame with features in log scale
     Parameters
@@ -24,17 +22,12 @@ def transform_df_to_log(df, vars, inp_file):
         config TOML file with list of features that should and shouldn't be
         transformed to log scale
     """
-    df_new = pd.DataFrame()
-
-    non_log_x, log_x = read_log_vars(inp_file)
-
-
+    df_new = df.copy()
 
     for var in vars:
         if var in log_x:
-            df_new['log('+ var+')'] = np.log(df[var])
-        if var in non_log_x:
-            df_new[var] = df[var]
+            df_new[var]  = df_new[var].apply(np.log)
+            df_new = df_new.rename(columns={var: 'log('+ var+')'})
     return df_new
 
 
@@ -87,24 +80,14 @@ def AMS(y_true, y_predict, y_true1, y_predict1, output_path):
     S0_best_threshold1 = (thresholds[xi1])
 
 
-    fig, ax = plt.subplots(figsize=(12, 8), dpi = 100)
-    plt.plot(fpr, tpr, linewidth=3 ,linestyle=':',color='darkorange',label='ROC curve train (area = %0.6f)' % roc_auc)
-    plt.plot(fpr1, tpr1, color='green',label='ROC curve test (area = %0.6f)' % roc_auc1)
-    plt.plot([0, 1], [0, 1], color='navy', linestyle='--', label='Random guess')
-    #plt.scatter(fpr[xi], tpr[xi], marker='o', color='black', label= 'Best Threshold train set = '+"%.4f" % S0_best_threshold +'\n AMS = '+ "%.2f" % S0[xi])
-    plt.scatter(fpr1[xi1], tpr1[xi1], marker='o', s=80, color='blue', label= 'Best Threshold test set = '+"%.4f" % S0_best_threshold1 +'\n AMS = '+ "%.2f" % S01[xi1])
-    plt.xlabel('False Positive Rate', fontsize = 18)
-    plt.ylabel('True Positive Rate', fontsize = 18)
-    plt.legend(loc="lower right", fontsize = 18)
-    plt.title('Receiver operating characteristic', fontsize = 18)
-    ax.tick_params(axis='both', which='major', labelsize=18)
-    plt.xlim([-0.01, 1.0])
-    plt.ylim([0, 1.02])
-    #axs.axis([-0.01, 1, 0.9, 1])
-    fig.tight_layout()
-    fig.savefig(str(output_path)+'/hists.png')
-    plt.show()
-    return S0_best_threshold, S0_best_threshold1
+    roc_curve_data = dict()
+    roc_curve_data["fpr_train"] = fpr
+    roc_curve_data["tpr_train"] = tpr
+
+    roc_curve_data["fpr_test"] = fpr1
+    roc_curve_data["tpr_test"] = tpr1
+
+    return S0_best_threshold, S0_best_threshold1, roc_curve_data
 
 
 def plot_confusion_matrix(cm, classes,
@@ -119,8 +102,11 @@ def plot_confusion_matrix(cm, classes,
 
     print(cm)
 
+    # plt.subplots(figsize=(12, 9))
+    # plt.figure(figsize = (6, 5))
     plt.imshow(cm, interpolation='nearest', cmap=cmap)
-    plt.title(title)
+
+    plt.title(title, fontsize = 20)
     plt.colorbar()
     tick_marks = np.arange(len(classes))
     plt.xticks(tick_marks, classes, rotation=45)
@@ -131,7 +117,7 @@ def plot_confusion_matrix(cm, classes,
     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
         plt.text(j, i, format(cm[i, j], fmt),
                  horizontalalignment="center",
-                 color="white" if cm[i, j] > thresh else "black")
+                 color="white" if cm[i, j] > thresh else "black", size = 15)
 
     plt.tight_layout()
     plt.ylabel('True label',fontsize = 15)
@@ -186,7 +172,27 @@ def difference_df(df_orig, df_cut, cut):
 
 
 def diff_SB_cut(df, target_label):
-    dfs_cut = df[(df['xgb_preds1']==1) & (df[target_label]==1)]
+    dfs_cut = df[(df['xgb_preds1']==1)]
     dfb_cut = df[(df['xgb_preds1']==1) & (df[target_label]==0)]
 
     return dfs_cut, dfb_cut
+
+
+
+def save_model_lib(bst_model, output_path):
+    bst = bst_model.get_booster()
+
+    #create an object out of your model, bst in our case
+    model = treelite.Model.from_xgboost(bst)
+    #use GCC compiler
+    toolchain = 'gcc'
+    #parallel_comp can be changed upto as many processors as one have
+    model.export_lib(toolchain=toolchain, libpath=output_path+'/xgb_model.so',
+                     params={'parallel_comp': 4}, verbose=True)
+
+
+    # Operating system of the target machine
+    platform = 'unix'
+    model.export_srcpkg(platform=platform, toolchain=toolchain,
+                pkgpath=output_path+'/XGBmodel.zip', libname='xgb_model.so',
+                verbose=True)
diff --git a/cand_class/hipe_conf_params.py b/cand_class/hipe_conf_params.py
new file mode 100644
index 0000000..3884f03
--- /dev/null
+++ b/cand_class/hipe_conf_params.py
@@ -0,0 +1,64 @@
+from hipe4ml.model_handler import ModelHandler
+from dataclasses import dataclass
+import xgboost as xgb
+
+import matplotlib.pyplot as plt
+from hipe4ml import plot_utils
+
+import xgboost as xgb
+
+
+@dataclass
+class XGBmodel():
+    features_for_train: list
+    hyper_pars_ranges: dict
+    train_test_data: list
+    output_path : str
+    __model_hdl: ModelHandler = (None, None, None)
+    metrics: str = 'roc_auc'
+    nfold: int = 3
+    init_points: int = 1
+    n_iter: int = 2
+    n_jobs: int = -1
+
+
+
+
+    def modelBO(self):
+        model_clf = xgb.XGBClassifier()
+        self.__model_hdl = ModelHandler(model_clf, self.features_for_train)
+        self.__model_hdl.optimize_params_bayes(self.train_test_data, self.hyper_pars_ranges,
+         self.metrics, self.nfold, self.init_points, self.n_iter, self.n_jobs)
+
+
+
+    def train_test_pred(self):
+        self.__model_hdl.train_test_model(self.train_test_data)
+
+        y_pred_train = self.__model_hdl.predict(self.train_test_data[0], False)
+        y_pred_test = self.__model_hdl.predict(self.train_test_data[2], False)
+
+
+        return y_pred_train, y_pred_test
+
+
+    def save_predictions(self, filename):
+        print(self.__model_hdl.get_original_model())
+        self.__model_hdl.dump_original_model(self.output_path+'/'+filename, xgb_format=False)
+
+
+    def load_model(self, filename):
+        self.__model_hdl.load_model_handler(filename)
+
+
+    def get_mode_booster(self):
+        return self.__model_hdl.model
+
+
+    def plot_dists(self):
+
+        leg_labels = ['background', 'signal']
+        ml_out_fig = plot_utils.plot_output_train_test(self.__model_hdl, self.train_test_data, 100,
+                                               False, leg_labels, True, density=True)
+
+        plt.savefig(str(self.output_path)+'/thresholds.png')
diff --git a/cand_class/hists_root.py b/cand_class/hists_root.py
new file mode 100644
index 0000000..9542182
--- /dev/null
+++ b/cand_class/hists_root.py
@@ -0,0 +1,291 @@
+import ROOT
+
+from array import array
+
+from cand_class.helper import *
+
+from dataclasses import dataclass
+
+@dataclass
+class HistBuilder:
+
+    output_path : str
+    root_output_name = 'hists.root'
+
+    def __post_init__(self):
+
+        __hist_out = ROOT.TFile(self.output_path+'/'+self.root_output_name, "UPDATE")
+        __hist_out.cd()
+
+        ROOT.gDirectory.mkdir('Signal')
+        ROOT.gDirectory.mkdir('Background')
+
+        __hist_out.cd()
+        __hist_out.cd('Signal')
+
+
+        ROOT.gDirectory.mkdir('train')
+        ROOT.gDirectory.mkdir('test')
+
+        __hist_out.cd()
+        ROOT.gDirectory.cd('Signal/train')
+        ROOT.gDirectory.mkdir('pt_rap')
+        ROOT.gDirectory.mkdir('hists')
+
+        __hist_out.cd()
+        ROOT.gDirectory.cd('Signal/test')
+        ROOT.gDirectory.mkdir('pt_rap')
+        ROOT.gDirectory.mkdir('hists')
+
+
+        __hist_out.cd()
+        __hist_out.cd('Background')
+
+        ROOT.gDirectory.mkdir('train')
+        ROOT.gDirectory.mkdir('test')
+
+
+        __hist_out.cd()
+        ROOT.gDirectory.cd('Background/train')
+        ROOT.gDirectory.mkdir('pt_rap')
+        ROOT.gDirectory.mkdir('hists')
+
+        __hist_out.cd()
+        ROOT.gDirectory.cd('Background/test')
+        ROOT.gDirectory.mkdir('pt_rap')
+        ROOT.gDirectory.mkdir('hists')
+
+        __hist_out.Close()
+
+
+    def roc_curve_root(self):
+        __hist_out = ROOT.TFile(self.output_path+'/'+self.root_output_name, "UPDATE");
+
+        __hist_out.cd()
+
+        fpr = roc_curve_data['fpr_train']
+        tpr = roc_curve_data['tpr_train']
+
+
+        fpr1 = roc_curve_data['fpr_test']
+        tpr1 = roc_curve_data['tpr_test']
+
+
+        fpr_d_tr = array('d', fpr.tolist())
+        tpr_d_tr = array('d', tpr.tolist())
+
+        fpr_d_ts = array('d', fpr1.tolist())
+        tpr_d_ts = array('d', tpr1.tolist())
+
+        train_roc = ROOT.TGraph(len(fpr_d_tr), fpr_d_tr, tpr_d_tr)
+        test_roc = ROOT.TGraph(len(fpr_d_ts), fpr_d_ts, tpr_d_ts)
+
+        train_roc.SetLineColor(ROOT.kRed + 2)
+        test_roc.SetLineColor(ROOT.kBlue + 2)
+
+
+        train_roc.SetLineWidth(3)
+        test_roc.SetLineWidth(3)
+
+
+        train_roc.SetLineStyle(9)
+        test_roc.SetLineStyle(9)
+
+
+        train_roc.SetTitle("Receiver operating characteristic train")
+        test_roc.SetTitle("Receiver operating characteristic test")
+
+        train_roc.GetXaxis().SetTitle('FPR');
+        train_roc.GetYaxis().SetTitle('TPR');
+
+        test_roc.GetXaxis().SetTitle('FPR');
+        test_roc.GetYaxis().SetTitle('TPR');
+
+        train_roc.Write("Train_roc")
+        test_roc.Write("Test_roc")
+
+        __hist_out.Close()
+
+    def pt_rap_root(self, df_orig, df_cut, difference, sign, x_range, y_range, data_name):
+
+        if sign ==0:
+            s_label = 'Background'
+            m = 5
+
+        if sign==1:
+            s_label = 'Signal'
+            m = 1
+
+        rej = round((1 -  (df_cut.shape[0] / df_orig.shape[0])) * 100, 5)
+        saved = round((df_cut.shape[0] / df_orig.shape[0]) * 100, 5)
+        diff = df_orig.shape[0] - df_cut.shape[0]
+
+        __hist_out = ROOT.TFile(self.output_path+'/'+self.root_output_name, "UPDATE");
+        __hist_out.cd()
+        ROOT.gDirectory.cd(s_label+'/'+data_name+'/'+'pt_rap')
+
+
+        rapidity_orig = array('d', df_orig['rapidity'].values.tolist())
+        pT_orig = array('d', df_orig['pT'].values.tolist())
+        pT_rap_before_cut = ROOT.TH2D( 'pT_rap_before_ML'+data_name, 'pT_rap_before_ML_'+data_name, 100, min(x_range),
+         max(x_range), 100, min(x_range), max(x_range))
+
+
+        for i in range(len(rapidity_orig)):
+            pT_rap_before_cut.Fill(rapidity_orig[i], pT_orig[i])
+
+        ROOT.gStyle.SetOptStat(0)
+        ROOT.gStyle.SetPalette(ROOT.kBird)
+
+        pT_rap_before_cut.GetXaxis().SetTitle('rapidity');
+        pT_rap_before_cut.GetYaxis().SetTitle('pT, GeV');
+        pT_rap_before_cut.Draw('COLZ')
+
+
+
+
+
+        rapidity_cut = array('d', df_cut['rapidity'].values.tolist())
+        pT_cut = array('d', df_cut['pT'].values.tolist())
+
+        pT_rap_cut = ROOT.TH2D( 'pT_rap_after_ML_'+data_name, 'pT_rap_after_ML_'+data_name, 100, min(x_range),
+         max(x_range), 100, min(x_range), max(x_range))
+
+
+        for i in range(len(rapidity_cut)):
+            pT_rap_cut.Fill(rapidity_cut[i], pT_cut[i])
+
+        ROOT.gStyle.SetOptStat(0)
+        ROOT.gStyle.SetPalette(ROOT.kBird)
+
+        pT_rap_cut.GetXaxis().SetTitle('rapidity');
+        pT_rap_cut.GetYaxis().SetTitle('pT, GeV');
+        pT_rap_cut.Draw('COLZ')
+
+
+
+
+
+        rapidity_diff = array('d', difference['rapidity'].values.tolist())
+        pT_diff = array('d', difference['pT'].values.tolist())
+
+        pT_rap_diff = ROOT.TH2D('pT_rap_diff_'+data_name, 'pT_rap_diff_'+data_name, 100, min(x_range),
+         max(x_range), 100, min(x_range), max(x_range))
+
+
+        for i in range(len(rapidity_diff)):
+            pT_rap_diff.Fill(rapidity_diff[i], pT_diff[i])
+
+        ROOT.gStyle.SetOptStat(0)
+        ROOT.gStyle.SetPalette(ROOT.kBird)
+
+        pT_rap_diff.GetXaxis().SetTitle('rapidity');
+        pT_rap_diff.GetYaxis().SetTitle('pT, GeV');
+        pT_rap_diff.Draw('COLZ')
+
+
+        pT_rap_before_cut.Write()
+        pT_rap_cut.Write()
+        pT_rap_diff.Write()
+
+        __hist_out.Close()
+
+
+    def hist_variables_root(self, mass_var, dfs_orig, dfb_orig, dfs_cut, dfb_cut,difference_s, sample):
+        """
+        Applied quality cuts and created distributions for all the features in pdf
+        file
+        Parameters
+        ----------
+        df_s: dataframe
+              signal
+        df_b: dataframe
+              background
+        feature: str
+                name of the feature to be plotted
+        pdf_key: PdfPages object
+                name of pdf document with distributions
+        """
+
+        __hist_out = ROOT.TFile(self.output_path+'/'+self.root_output_name, "UPDATE");
+        __hist_out.cd()
+
+
+        for feature in dfs_orig.columns:
+
+            dfs_orig_feat = array('d', dfs_orig[feature].values.tolist())
+            dfb_orig_feat = array('d', dfb_orig[feature].values.tolist())
+
+
+            dfs_cut_feat = array('d', dfs_cut[feature].values.tolist())
+            dfb_cut_feat = array('d', dfb_cut[feature].values.tolist())
+
+
+            dfs_diff_feat = array('d', difference_s[feature].values.tolist())
+
+
+            dfs_orig_root = ROOT.TH1D('signal before ML '+feature, 'signal before ML '+feature, 500,
+            min(dfs_orig[feature].values.tolist()), max(dfs_orig[feature].values.tolist()))
+
+            for i in range(len(dfs_orig_feat)):
+                dfs_orig_root.Fill(dfs_orig_feat[i])
+
+            dfs_orig_root.GetXaxis().SetTitle(feature);
+
+            dfs_orig_root.Draw()
+
+            dfb_orig_root = ROOT.TH1D('background before ML '+feature, 'background before ML '+feature, 500,
+            min(dfb_orig[feature].values.tolist()), max(dfb_orig[feature].values.tolist()))
+
+            for i in range(len(dfb_orig_feat)):
+                dfb_orig_root.Fill(dfb_orig_feat[i])
+
+            dfb_orig_root.GetXaxis().SetTitle(feature);
+            dfb_orig_root.Draw()
+
+
+            dfs_cut_root = ROOT.TH1D('signal after ML '+feature, 'signal after ML '+feature, 500,
+            min(dfs_cut[feature].values.tolist()), max(dfs_cut[feature].values.tolist()))
+
+            for i in range(len(dfs_cut_feat)):
+                dfs_cut_root.Fill(dfs_cut_feat[i])
+
+            dfs_cut_root.GetXaxis().SetTitle(feature);
+            dfs_cut_root.Draw()
+
+
+            dfb_cut_root = ROOT.TH1D('background after ML '+feature, 'background after ML '+feature, 500,
+            min(dfb_cut[feature].values.tolist()), max(dfb_cut[feature].values.tolist()))
+
+            for i in range(len(dfb_cut_feat)):
+                dfb_cut_root.Fill(dfb_cut_feat[i])
+
+            dfb_cut_root.GetXaxis().SetTitle(feature);
+            dfb_cut_root.Draw()
+
+
+            dfs_diff_root = ROOT.TH1D('signal difference '+feature, 'signal difference '+feature, 500,
+            min(difference_s[feature].values.tolist()), max(difference_s[feature].values.tolist()))
+
+            for i in range(len(dfs_diff_feat)):
+                dfs_diff_root.Fill(dfs_diff_feat[i])
+
+            dfs_diff_root.GetXaxis().SetTitle(feature);
+            dfs_diff_root.Draw()
+
+
+            __hist_out.cd()
+
+            __hist_out.cd('Signal'+'/'+sample+'/'+'hists')
+            dfs_orig_root.Write()
+            dfs_cut_root.Write()
+            dfs_diff_root.Write()
+
+            __hist_out.cd()
+
+            __hist_out.cd('Background'+'/'+sample+'/'+'hists')
+
+            dfb_orig_root.Write()
+            dfb_cut_root.Write()
+
+        __hist_out.Close()
diff --git a/read_configs.py b/read_configs.py
deleted file mode 100644
index 96de294..0000000
--- a/read_configs.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import numpy as np
-import pandas as pd
-import tomli
-
-
-def read_log_vars(inp_file):
-    with open(str(inp_file), encoding="utf-8") as inp_file:
-        inp_dict = tomli.load(inp_file)
-
-    return inp_dict['non_log_variables'], inp_dict['log_variables']
-
-
-def read_peak(inp_file):
-    with open(str(inp_file), encoding="utf-8") as inp_file:
-        inp_dict = tomli.load(inp_file)
-    return  inp_dict
-
-
-def read_train_vars(inp_file):
-    with open(str(inp_file), encoding="utf-8") as inp_file:
-        inp_dict = tomli.load(inp_file)
-    return  inp_dict['train_vars']
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..91f39c0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+uproot
+pandas
+numpy
+scikit-learn
+xgboost
+bayesian-optimization
+hipe4ml
+treelite
+treelite_runtime
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..93a33d4
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,11 @@
+from setuptools import setup, find_packages
+
+
+setup(
+    name="CandidatesClassifier",
+    version="0.1.0",
+    author="olha lavoryk",
+    license="MIT",
+    url="https://github.com/conformist89/CandidatesClassifier.git",
+    packages=find_packages()
+)
diff --git a/train_test_xgboost.py b/train_test_xgboost.py
deleted file mode 100644
index 04d3032..0000000
--- a/train_test_xgboost.py
+++ /dev/null
@@ -1,368 +0,0 @@
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import xgboost as xgb
-from hipe4ml.plot_utils import plot_roc_train_test
-from helper import *
-from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
-
-
-from matplotlib.font_manager import FontProperties
-
-from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
-                               AutoMinorLocator)
-
-import gc
-import matplotlib as mpl
-
-
-
-mpl.rc('figure', max_open_warning = 0)
-
-
-class TrainTestXGBoost:
-    """
-    Class used for train and test model on parameters found by Bayesian Optimization
-    Parameters
-    -------------------------------------------------
-    bst:
-        model to be used
-
-    dtrain: xgboost Matrix
-        train dataset
-
-    y_train:
-        train target label
-
-    dtest: xgboost Matrix
-        test dataset
-
-    y_test:
-        test target label
-
-    bst_train:
-        dataframe with predictions
-
-
-    """
-
-
-    def __init__(self, bst, dtrain, y_train, dtest, y_test, output_path):
-
-        self.bst = bst
-        self.dtrain = dtrain
-        self.y_train = y_train
-        self.dtest = dtest
-        self.y_test = y_test
-        self.output_path = output_path
-
-        self.__bst_train = None
-        self.__bst_test = None
-
-        self.__train_best_thr = None
-        self.__test_best_thr = None
-
-        self.__train_pred = None
-        self.__test_pred = None
-
-    def apply_predictions(self):
-
-        self.__bst_train= pd.DataFrame(data=self.bst.predict(self.dtrain, output_margin=False),  columns=["xgb_preds"])
-        self.__bst_train['issignal']=self.y_train
-
-
-        self.__bst_test= pd.DataFrame(data=self.bst.predict(self.dtest, output_margin=False),  columns=["xgb_preds"])
-        self.__bst_test['issignal']=self.y_test
-
-        return self.__bst_train, self.__bst_test
-
-
-    def get_threshold(self, train_y, test_y):
-        self.__train_best_thr, self.__test_best_thr = AMS(train_y, self.__bst_train['xgb_preds'], test_y, self.__bst_test['xgb_preds'], self.output_path)
-        return self.__train_best_thr, self.__test_best_thr
-
-
-
-    def apply_threshold(self):
-        cut_train = self.__train_best_thr
-        self.__train_pred = ((self.__bst_train['xgb_preds']>cut_train)*1)
-
-        cut_test = self.__test_best_thr
-        self.__test_pred = ((self.__bst_test['xgb_preds']>cut_test)*1)
-
-        return self.__train_pred, self.__test_pred
-
-
-    def get_result(self, x_train, x_test):
-
-        train_with_preds = x_train.copy()
-        train_with_preds['xgb_preds1'] = self.__train_pred.values
-
-
-        test_with_preds = x_test.copy()
-        test_with_preds['xgb_preds1'] = self.__test_pred.values
-
-        return train_with_preds, test_with_preds
-
-
-
-    def features_importance(self):
-        ax = xgb.plot_importance(self.bst)
-        plt.rcParams['figure.figsize'] = [6, 3]
-        ax.figure.tight_layout()
-        ax.figure.savefig(str(self.output_path)+"/xgb_train_variables_rank.png")
-
-
-
-    def CM_plot_train_test(self):
-         """
-         Plots confusion matrix. A Confusion Matrix C is such that Cij is equal to
-         the number of observations known to be in group i and predicted to be in
-         group j. Thus in binary classification, the count of true positives is C00,
-         false negatives C01,false positives is C10, and true neagtives is C11.
-
-         Confusion matrix is applied to previously unseen by model data, so we can
-         estimate model's performance
-
-         Parameters
-         ----------
-         test_best: numpy.float32
-                   best threshold
-
-         x_train: dataframe
-                 we want to get confusion matrix on training datasets
-         """
-         #lets take the best threshold and look at the confusion matrix
-
-         cnf_matrix_train = confusion_matrix(self.__bst_train['issignal'], self.__train_pred, labels=[1,0])
-         np.set_printoptions(precision=2)
-         fig_train, axs_train = plt.subplots(figsize=(10, 8))
-         axs_train.yaxis.set_label_coords(-0.04,.5)
-         axs_train.xaxis.set_label_coords(0.5,-.005)
-         plot_confusion_matrix(cnf_matrix_train, classes=['signal','background'],
-          title=' Train Dataset Confusion Matrix for XGB for cut > '+str(self.__train_best_thr))
-         plt.savefig(str(self.output_path)+'/confusion_matrix_extreme_gradient_boosting_train.png')
-
-
-         cnf_matrix_test = confusion_matrix(self.__bst_test['issignal'], self.__test_pred, labels=[1,0])
-         np.set_printoptions(precision=2)
-         fig_test, axs_test = plt.subplots(figsize=(10, 8))
-         axs_test.yaxis.set_label_coords(-0.04,.5)
-         axs_test.xaxis.set_label_coords(0.5,-.005)
-         plot_confusion_matrix(cnf_matrix_test, classes=['signal','background'],
-           title=' Test Dataset Confusion Matrix for XGB for cut > '+str(self.__test_best_thr))
-         plt.savefig(str(self.output_path)+'/confusion_matrix_extreme_gradient_boosting_test.png')
-
-
-    def preds_prob(self, preds, true, dataset):
-        if dataset =='train':
-            label1 = 'XGB Predictions on the training data set'
-        else:
-            label1 = 'XGB Predictions on the test data set'
-        fig, ax = plt.subplots(figsize=(12, 8))
-        bins1=100
-        plt.hist(self.__bst_test[preds], bins=bins1,facecolor='green',alpha = 0.3, label=label1)
-
-        TP = self.__bst_test[(self.__bst_test[true]==1)]
-        TN = self.__bst_test[(self.__bst_test[true]==0)]
-        #TP[preds].plot.hist(ax=ax, bins=bins1,facecolor='blue', histtype='stepfilled',alpha = 0.3, label='True Positives/signal in predictions')
-        hist, bins = np.histogram(TP[preds], bins=bins1)
-        err = np.sqrt(hist)
-        center = (bins[:-1] + bins[1:]) / 2
-
-
-        hist1, bins1 = np.histogram(TN[preds], bins=bins1)
-        err1 = np.sqrt(hist1)
-        plt.errorbar(center, hist1, yerr=err1, fmt='o',
-                     c='Red', label='Background in predictions')
-
-        plt.errorbar(center, hist, yerr=err, fmt='o',
-                     c='blue', label='Signal in predictions')
-
-        ax.set_yscale('log')
-        plt.xlabel('Probability',fontsize=18)
-        plt.ylabel('Counts', fontsize=18)
-        plt.legend(fontsize=18)
-        ax.set_xticks(np.arange(0,1.1,0.1))
-        ax.tick_params(axis='both', which='major', labelsize=18)
-        ax.tick_params(axis='both', which='minor', labelsize=16)
-        plt.show()
-        fig.tight_layout()
-        fig.savefig(str(self.output_path)+'/test_best_pred.png')
-
-
-    def pT_vs_rapidity(self, df_orig, df_cut, difference, sign, x_range, y_range, data_name):
-        fig, axs = plt.subplots(1,3, figsize=(15, 4), gridspec_kw={'width_ratios': [1, 1, 1]})
-
-
-        if sign ==0:
-            s_label = 'Background'
-            m = 5
-
-        if sign==1:
-            s_label = 'Signal'
-            m = 1
-
-        axs[0].set_aspect(aspect = 'auto')
-        axs[1].set_aspect(aspect = 'auto')
-        axs[2].set_aspect(aspect = 'auto')
-
-        rej = round((1 -  (df_cut.shape[0] / df_orig.shape[0])) * 100, 5)
-        diff = df_orig.shape[0] - df_cut.shape[0]
-        axs[0].legend(shadow=True, title =str(len(df_orig))+' samples', fontsize =14)
-        axs[1].legend(shadow=True, title =str(len(df_cut))+' samples', fontsize =14)
-        axs[2].legend(shadow=True, title ='ML cut rejects \n'+ str(rej) +'% of '+ s_label +
-        '\n ' + str(diff)+ ' samples were rejected ',
-         fontsize =14)
-
-        counts0, xedges0, yedges0, im0 = axs[0].hist2d(df_orig['rapidity'], df_orig['pT'] , range = [x_range, y_range], bins=100,
-                    norm=mpl.colors.LogNorm(), cmap=plt.cm.rainbow)
-
-        axs[0].set_title(s_label + ' candidates before ML cut '+data_name, fontsize = 16)
-        axs[0].set_xlabel('rapidity', fontsize=15)
-        axs[0].set_ylabel('pT, GeV', fontsize=15)
-
-
-        mpl.pyplot.colorbar(im0, ax = axs[0])
-
-
-
-        axs[0].xaxis.set_major_locator(MultipleLocator(1))
-        axs[0].xaxis.set_major_formatter(FormatStrFormatter('%d'))
-
-        axs[0].xaxis.set_tick_params(which='both', width=2)
-
-
-        fig.tight_layout()
-
-
-        counts1, xedges1, yedges1, im1 = axs[1].hist2d(df_cut['rapidity'], df_cut['pT'] , range = [x_range, y_range], bins=100,
-                    norm=mpl.colors.LogNorm(), cmap=plt.cm.rainbow)
-
-        axs[1].set_title(s_label + ' candidates after ML cut '+data_name, fontsize = 16)
-        axs[1].set_xlabel('rapidity', fontsize=15)
-        axs[1].set_ylabel('pT, GeV', fontsize=15)
-
-        mpl.pyplot.colorbar(im1, ax = axs[1])
-
-        axs[1].xaxis.set_major_locator(MultipleLocator(1))
-        axs[1].xaxis.set_major_formatter(FormatStrFormatter('%d'))
-
-        axs[1].xaxis.set_tick_params(which='both', width=2)
-
-        fig.tight_layout()
-
-
-        counts2, xedges2, yedges2, im2 = axs[2].hist2d(difference['rapidity'], difference['pT'] , range = [x_range, y_range], bins=100,
-                    norm=mpl.colors.LogNorm(), cmap=plt.cm.rainbow)
-
-        axs[2].set_title(s_label + ' difference ', fontsize = 16)
-        axs[2].set_xlabel('rapidity', fontsize=15)
-        axs[2].set_ylabel('pT, GeV', fontsize=15)
-
-        mpl.pyplot.colorbar(im1, ax = axs[2])
-
-
-        axs[2].xaxis.set_major_locator(MultipleLocator(1))
-        axs[2].xaxis.set_major_formatter(FormatStrFormatter('%d'))
-
-        axs[2].xaxis.set_tick_params(which='both', width=2)
-
-        fig.tight_layout()
-
-        fig.savefig(self.output_path+'/pT_rapidity_'+s_label+'_ML_cut_'+data_name+'.png')
-
-
-
-    def hist_variables(self, mass_var, dfs_orig, dfb_orig, dfs_cut, dfb_cut, difference_s, sample, pdf_key):
-        """
-        Applied quality cuts and created distributions for all the features in pdf
-        file
-        Parameters
-        ----------
-        df_s: dataframe
-              signal
-        df_b: dataframe
-              background
-        feature: str
-                name of the feature to be plotted
-        pdf_key: PdfPages object
-                name of pdf document with distributions
-        """
-
-        for feature in dfs_orig.columns:
-            fig, ax = plt.subplots(3, figsize=(20, 10))
-
-
-            fontP = FontProperties()
-            fontP.set_size('xx-large')
-
-            ax[0].hist(dfs_orig[feature], label = 'signal', bins = 500, alpha = 0.4, color = 'blue')
-            ax[0].hist(dfb_orig[feature], label = 'background', bins = 500, alpha = 0.4, color = 'red')
-            ax[0].legend(shadow=True,title = 'S/B='+ str(round(len(dfs_orig)/len(dfb_orig), 3)) +
-
-                       '\n S samples:  '+str(dfs_orig.shape[0]) + '\n B samples: '+ str(dfb_orig.shape[0]) +
-                       '\nquality cuts ',
-                       title_fontsize=15, fontsize =15, bbox_to_anchor=(1.05, 1),
-                        loc='upper left', prop=fontP,)
-
-            ax[0].set_xlim(dfb_orig[feature].min(), dfb_orig[feature].max())
-
-            ax[0].xaxis.set_tick_params(labelsize=15)
-            ax[0].yaxis.set_tick_params(labelsize=15)
-
-            ax[0].set_title(str(feature) + ' MC '+ sample, fontsize = 25)
-            ax[0].set_xlabel(feature, fontsize = 25)
-
-            if feature!=mass_var:
-                ax[0].set_yscale('log')
-
-            fig.tight_layout()
-
-
-            ax[1].hist(dfs_cut[feature], label = 'signal', bins = 500, alpha = 0.4, color = 'blue')
-            ax[1].hist(dfb_cut[feature], label = 'background', bins = 500, alpha = 0.4, color = 'red')
-            ax[1].legend(shadow=True,title = 'S/B='+ str(round(len(dfs_cut)/len(dfb_cut), 3)) +
-                       '\n S samples:  '+str(dfs_cut.shape[0]) + '\n B samples: '+ str(dfb_cut.shape[0]) +
-                       '\nquality cuts + ML cut',
-                        title_fontsize=15, fontsize =15, bbox_to_anchor=(1.05, 1),
-                        loc='upper left', prop=fontP,)
-
-
-            ax[1].set_xlim(dfb_orig[feature].min(), dfb_orig[feature].max())
-
-            ax[1].xaxis.set_tick_params(labelsize=15)
-            ax[1].yaxis.set_tick_params(labelsize=15)
-
-            ax[1].set_title(feature + ' MC '+ sample, fontsize = 25)
-            ax[1].set_xlabel(feature, fontsize = 25)
-
-            if feature!='mass':
-                ax[1].set_yscale('log')
-
-            fig.tight_layout()
-
-
-
-
-            ax[2].hist(difference_s[feature], label = 'signal', bins = 500, alpha = 0.4, color = 'blue')
-            ax[2].legend(shadow=True,title ='S samples: '+str(len(difference_s)) +'\nsignal difference',
-                        title_fontsize=15, fontsize =15, bbox_to_anchor=(1.05, 1),
-                        loc='upper left', prop=fontP,)
-
-
-            ax[2].set_xlim(dfb_orig[feature].min(), dfb_orig[feature].max())
-
-            ax[2].xaxis.set_tick_params(labelsize=15)
-            ax[2].yaxis.set_tick_params(labelsize=15)
-
-            ax[2].set_title(feature + ' MC '+ sample, fontsize = 25)
-            ax[2].set_xlabel(feature, fontsize = 25)
-
-            if feature!=mass_var:
-                ax[2].set_yscale('log')
-
-            fig.tight_layout()
-
-            plt.savefig(pdf_key,format='pdf')
-        pdf_key.close()