From 801835cce171252bb5a6eb24a3f25afa3727bdec Mon Sep 17 00:00:00 2001
From: Python3pkg <raliclo@gmail.com>
Date: Wed, 17 May 2017 22:59:55 -0700
Subject: [PATCH 1/3] Convert to python3

---
 backspinpy/Cef_tools.py     |  24 +-
 backspinpy/Cef_tools.py.bak | 161 +++++++
 backspinpy/__init__.py      |   2 +-
 backspinpy/__init__.py.bak  |   3 +
 backspinpy/backSPIN.py      |   2 +-
 backspinpy/backSPIN.py.bak  | 932 ++++++++++++++++++++++++++++++++++++
 6 files changed, 1110 insertions(+), 14 deletions(-)
 create mode 100644 backspinpy/Cef_tools.py.bak
 create mode 100644 backspinpy/__init__.py.bak
 create mode 100755 backspinpy/backSPIN.py.bak

diff --git a/backspinpy/Cef_tools.py b/backspinpy/Cef_tools.py
index 9d56f8a..c9fbfe4 100644
--- a/backspinpy/Cef_tools.py
+++ b/backspinpy/Cef_tools.py
@@ -40,7 +40,7 @@
 # attribute_values_list2 = cef.col_attr_values[2]
 # ...
 
-from builtins import str as unicode
+from builtins import str as str
 from io import open
 from builtins import range
 
@@ -73,7 +73,7 @@ def update(self):
         self.rows = len(self.matrix)
         self.cols = len(self.matrix[0])
         self.tab_fields = max(7, self.cols + self.row_attr + 1)
-        self.linestr = u'%s' + u'\t%s' * (self.tab_fields-1) + u'\n'
+        self.linestr = '%s' + '\t%s' * (self.tab_fields-1) + '\n'
 
     def add_header(self, name, value):
         self.header_names.append(name)
@@ -130,7 +130,7 @@ def readCEF(self, filepath, matrix_dtype = 'auto'):
                 try:
                     self.matrix.append( [matrix_dtype(el) for el in linelist[self.row_attr+1:] ])
                 except ValueError:
-                    print(repr(el), ' is invalid')
+                    print((repr(el), ' is invalid'))
 
 
 
@@ -138,24 +138,24 @@ def writeCEF(self, filepath, matrix_str_fmt = '%i'):
         self.update()
         with open(filepath, 'w') as fout:
             #Write cef file first line
-            fout.write( self.linestr % ( ('CEF', unicode(self.headers), unicode(self.row_attr),\
-                unicode(self.col_attr) , unicode(self.rows), unicode(self.cols), unicode(self.flags) ) +\
+            fout.write( self.linestr % ( ('CEF', str(self.headers), str(self.row_attr),\
+                str(self.col_attr) , str(self.rows), str(self.cols), str(self.flags) ) +\
                 ('',) * (self.tab_fields - 7) ) )
             #Write header
             for i in range(self.headers):
-                fout.write(self.linestr % ( (unicode( self.header_names[i]), unicode( self.header_values[i]) ) + ('',) * (self.tab_fields - 2) ))
+                fout.write(self.linestr % ( (str( self.header_names[i]), str( self.header_values[i]) ) + ('',) * (self.tab_fields - 2) ))
             #Write col attributes
             for i in range( self.col_attr ):
-                fout.write( self.linestr % ( ('',) * (self.row_attr) + (unicode( self.col_attr_names[i] ),) + tuple( unicode(el) for el in self.col_attr_values[i] ) ))
+                fout.write( self.linestr % ( ('',) * (self.row_attr) + (str( self.col_attr_names[i] ),) + tuple( str(el) for el in self.col_attr_values[i] ) ))
             #Write headers of row attributes
-            fout.write( self.linestr % ( tuple(unicode(el) for el in self.row_attr_names) + ('',)*(self.tab_fields-self.row_attr) ) )
+            fout.write( self.linestr % ( tuple(str(el) for el in self.row_attr_names) + ('',)*(self.tab_fields-self.row_attr) ) )
             #Write rows
             for i in range(self.rows):
                 for j in range(self.row_attr):
-                    fout.write(unicode(self.row_attr_values[j][i]) + u'\t')
-                fout.write(u'\t')
-                fout.write(u'\t'.join([unicode(matrix_str_fmt % el) for el in self.matrix[i]] ) )
-                fout.write(u'\n')
+                    fout.write(str(self.row_attr_values[j][i]) + '\t')
+                fout.write('\t')
+                fout.write('\t'.join([str(matrix_str_fmt % el) for el in self.matrix[i]] ) )
+                fout.write('\n')
 
 
 
diff --git a/backspinpy/Cef_tools.py.bak b/backspinpy/Cef_tools.py.bak
new file mode 100644
index 0000000..9d56f8a
--- /dev/null
+++ b/backspinpy/Cef_tools.py.bak
@@ -0,0 +1,161 @@
+# Copyright (c) 2015 Gioele La Manno and Sten Linnarsson
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+# This code contains a simple parser reader for Cef files
+# typical usage to write a cef_file:
+#
+# cef = CEF_obj()
+# cef.add_header('Mynote', 'This is my message')
+# cef.set_matrix(your_array)
+# cef.add_col_attr(attribute_name, attribute_values_list)
+# cef.add_row_attr(attribute_name, attribute_values_list)
+# cef.writeCEF('path/to/your/file.cef')
+#
+# to read:
+# cef = CEF_obj()
+# cef.readCEF('path/to/your/file.cef')
+# your_array = cef.matrix
+# attribute_values_list2 = cef.col_attr_values[2]
+# ...
+
+from builtins import str as unicode
+from io import open
+from builtins import range
+
+class CEF_obj(object):
+    def __init__(self):
+        self.headers = 0
+        self.row_attr = 0
+        self.col_attr = 0
+        self.rows = 0
+        self.cols = 0
+        self.flags = 0
+        self.tab_fields = 7
+
+        self.header_names = []
+        self.header_values = []
+
+        self.row_attr_names = []
+        self.row_attr_values = []
+
+        self.col_attr_names = []
+        self.col_attr_values = []
+
+        self.matrix = []
+
+
+    def update(self):
+        self.headers = len( self.header_names)
+        self.row_attr = len(self.row_attr_names)
+        self.col_attr = len(self.col_attr_names)
+        self.rows = len(self.matrix)
+        self.cols = len(self.matrix[0])
+        self.tab_fields = max(7, self.cols + self.row_attr + 1)
+        self.linestr = u'%s' + u'\t%s' * (self.tab_fields-1) + u'\n'
+
+    def add_header(self, name, value):
+        self.header_names.append(name)
+        self.header_values.append(value)
+
+    def add_row_attr(self, name, value):
+        self.row_attr_names.append(name)
+        self.row_attr_values.append(list(value))
+
+    def add_col_attr(self, name, value):
+        self.col_attr_names.append(name)
+        self.col_attr_values.append(list(value))
+
+    def set_matrix(self, matrix):
+        for row in matrix:
+            self.matrix.append(list(row))
+
+    def readCEF(self, filepath, matrix_dtype = 'auto'):
+        #Delete all the stored information
+        self.__init__()
+        #Start parsing
+        with open(filepath, 'rU') as fin:
+            # Read cef file first line
+            self.header, self.row_attr, self.col_attr, self.rows,\
+            self.cols, self.flags = fin.readline().rstrip('\n').split('\t')[1:7]
+            self.header = int(self.header)
+            self.row_attr = int( self.row_attr )
+            self.col_attr = int(self.col_attr)
+            self.rows = int(self.rows)
+            self.cols = int(self.cols)
+            self.flags = int(self.flags)
+            self.row_attr_values = [[] for _ in range(self.row_attr)]
+            # Read header
+            for i in range(self.header):
+                name, value = fin.readline().rstrip('\n').split('\t')[:2]
+                self.header_names.append(name)
+                self.header_values.append(value)
+            # Read col attr
+            for i in range(self.col_attr):
+                line_col_attr = fin.readline().rstrip('\n').split('\t')[self.row_attr:]
+                self.col_attr_names.append( line_col_attr[0] )
+                self.col_attr_values.append( line_col_attr[1:] ) 
+            #Read row attr and matrix
+            self.row_attr_names += fin.readline().rstrip('\n').split('\t')[:self.row_attr]
+            for _ in range(self.rows):
+                linelist = fin.readline().rstrip('\n').split('\t')
+                for n, entry in enumerate( linelist[:self.row_attr] ):
+                    self.row_attr_values[n].append( entry )
+                if matrix_dtype == 'auto':
+                    if sum(('.' in i) or ('e' in i) for i in linelist[self.row_attr+1:]) != 0:
+                        matrix_dtype = float
+                    else:
+                        matrix_dtype = int
+                try:
+                    self.matrix.append( [matrix_dtype(el) for el in linelist[self.row_attr+1:] ])
+                except ValueError:
+                    print(repr(el), ' is invalid')
+
+
+
+    def writeCEF(self, filepath, matrix_str_fmt = '%i'):
+        self.update()
+        with open(filepath, 'w') as fout:
+            #Write cef file first line
+            fout.write( self.linestr % ( ('CEF', unicode(self.headers), unicode(self.row_attr),\
+                unicode(self.col_attr) , unicode(self.rows), unicode(self.cols), unicode(self.flags) ) +\
+                ('',) * (self.tab_fields - 7) ) )
+            #Write header
+            for i in range(self.headers):
+                fout.write(self.linestr % ( (unicode( self.header_names[i]), unicode( self.header_values[i]) ) + ('',) * (self.tab_fields - 2) ))
+            #Write col attributes
+            for i in range( self.col_attr ):
+                fout.write( self.linestr % ( ('',) * (self.row_attr) + (unicode( self.col_attr_names[i] ),) + tuple( unicode(el) for el in self.col_attr_values[i] ) ))
+            #Write headers of row attributes
+            fout.write( self.linestr % ( tuple(unicode(el) for el in self.row_attr_names) + ('',)*(self.tab_fields-self.row_attr) ) )
+            #Write rows
+            for i in range(self.rows):
+                for j in range(self.row_attr):
+                    fout.write(unicode(self.row_attr_values[j][i]) + u'\t')
+                fout.write(u'\t')
+                fout.write(u'\t'.join([unicode(matrix_str_fmt % el) for el in self.matrix[i]] ) )
+                fout.write(u'\n')
+
+
+
diff --git a/backspinpy/__init__.py b/backspinpy/__init__.py
index 2b92ec8..5b7711f 100644
--- a/backspinpy/__init__.py
+++ b/backspinpy/__init__.py
@@ -1,3 +1,3 @@
-from __future__ import absolute_import
+
 from .backSPIN import SPIN, backSPIN, fit_CV, feature_selection
 from .Cef_tools import CEF_obj
diff --git a/backspinpy/__init__.py.bak b/backspinpy/__init__.py.bak
new file mode 100644
index 0000000..2b92ec8
--- /dev/null
+++ b/backspinpy/__init__.py.bak
@@ -0,0 +1,3 @@
+from __future__ import absolute_import
+from .backSPIN import SPIN, backSPIN, fit_CV, feature_selection
+from .Cef_tools import CEF_obj
diff --git a/backspinpy/backSPIN.py b/backspinpy/backSPIN.py
index 02bc3bd..baf9208 100755
--- a/backspinpy/backSPIN.py
+++ b/backspinpy/backSPIN.py
@@ -34,7 +34,7 @@
 # pyinstaller -F backSPIN.py -n backspin-mac-64-bit
 #
 
-from __future__ import division, print_function, absolute_import
+
 from numpy import *
 import getopt
 import sys
diff --git a/backspinpy/backSPIN.py.bak b/backspinpy/backSPIN.py.bak
new file mode 100755
index 0000000..02bc3bd
--- /dev/null
+++ b/backspinpy/backSPIN.py.bak
@@ -0,0 +1,932 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015, Amit Zeisel, Gioele La Manno and Sten Linnarsson
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This .py file can be used as a library or a command-line version of BackSPIN, 
+# This version of BackSPIN was implemented by Gioele La Manno.
+# The BackSPIN biclustering algorithm was developed by Amit Zeisel and is described
+# in Zeisel et al. Cell types in the mouse cortex and hippocampus revealed by 
+# single-cell RNA-seq Science 2015 (PMID: 25700174, doi: 10.1126/science.aaa1934). 
+#
+# Building using pyinstaller:
+# pyinstaller -F backSPIN.py -n backspin-mac-64-bit
+#
+
+from __future__ import division, print_function, absolute_import
+from numpy import *
+import getopt
+import sys
+import os
+from .Cef_tools import CEF_obj
+
+class Results:
+        pass
+
+def calc_loccenter(x, lin_log_flag):
+    M,N = x.shape
+    if N==1 and M>1:
+        x = x.T
+    M,N = x.shape
+    loc_center = zeros(M)
+    min_x = x.min(1)
+    x = x - min_x[:,newaxis]
+    for i in range(M):
+        ind = where(x[i,:]>0)[0]
+        if len(ind) != 0:
+            if lin_log_flag == 1:
+                w = x[i,ind]/sum(x[i,ind], 0)
+            else:
+                w = (2**x[i,ind])/sum(2**x[i,ind], 0)
+            loc_center[i] = sum(w*ind, 0)       
+        else:
+            loc_center[i] = 0
+
+    return loc_center
+
+def _calc_weights_matrix(mat_size, wid):
+    '''Calculate Weight Matrix
+    Parameters
+    ----------
+    mat_size: int
+        dimension of the distance matrix
+    wid: int
+        parameter that controls the width of the neighbourood
+    Returns
+    -------
+    weights_mat: 2-D array
+        the weights matrix to multiply with the distance matrix
+
+    '''
+    #calculate square distance from the diagonal
+    sqd = (arange(1,mat_size+1)[newaxis,:] - arange(1,mat_size+1)[:,newaxis])**2
+    #make the distance relative to the mat_size
+    norm_sqd = sqd/wid
+    #evaluate a normal pdf
+    weights_mat = exp(-norm_sqd/mat_size)
+    #avoid useless precision that would slow down the matrix multiplication
+    weights_mat -= 1e-6
+    weights_mat[weights_mat<0] = 0
+    #normalize row and column sum
+    weights_mat /= sum(weights_mat,0)[newaxis,:]
+    weights_mat /= sum(weights_mat,1)[:, newaxis]
+    #fix asimmetries
+    weights_mat = (weights_mat + weights_mat.T) / 2.
+    return weights_mat
+
+
+def _sort_neighbourhood( dist_matrix, wid ):
+    '''Perform a single iteration of SPIN
+    Parameters
+    ----------
+    dist_matrix: 2-D array
+        distance matrix
+    wid: int
+        parameter that controls the width of the neighbourood
+    Returns
+    -------
+    sorted_ind: 1-D array
+        indexes that order the matrix
+
+    '''
+    assert wid > 0, 'Parameter wid < 0 is not allowed'
+    mat_size = dist_matrix.shape[0]
+    #assert mat_size>2, 'Matrix is too small to be sorted'
+    weights_mat = _calc_weights_matrix(mat_size, wid)
+    #Calculate the dot product (can be very slow for big mat_size)
+    mismatch_score = dot(dist_matrix, weights_mat)
+    energy, target_permutation = mismatch_score.min(1), mismatch_score.argmin(1)
+    max_energy = max(energy)
+    #Avoid points that have the same target_permutation value
+    sort_score = target_permutation - 0.1 * sign( (mat_size/2 - target_permutation) ) * energy/max_energy
+    #sort_score = target_permutation - 0.1 * sign( 1-2*(int(1000*energy/max_energy) % 2) ) * energy/max_energy # Alternative
+    # Sorting the matrix
+    sorted_ind = sort_score.argsort(0)[::-1]
+    return sorted_ind
+
+
+def sort_mat_by_neighborhood(dist_matrix, wid, times):
+    '''Perform several iterations of SPIN using a fixed wid parameter
+    Parameters
+    ----------
+    dist_matrix: 2-D array
+        distance matrix
+    wid: int
+        parameter that controls the width of the neighbourood
+    times: int
+        number of repetitions
+    verbose: bool
+        print the progress
+    Returns
+    -------
+    indexes: 1-D array
+        indexes that order the matrix
+
+    '''
+    # original indexes
+    indexes = arange(dist_matrix.shape[0])
+    for i in range(times):
+        #sort the sitance matrix according the previous iteration
+        tmpmat = dist_matrix[indexes,:] 
+        tmpmat = tmpmat[:,indexes]
+        sorted_ind = _sort_neighbourhood(tmpmat, wid);
+        #resort the original indexes
+        indexes = indexes[sorted_ind]
+    return indexes
+
+
+def _generate_widlist(data, axis=1, step=0.6):
+    '''Generate a list of wid parameters to execute sort_mat_by_neighborhood
+    Parameters
+    ----------
+    data: 2-D array
+        the data matrix
+    axis: int
+        the axis to take in consideration
+    step: float
+        the increment between two successive wid parameters
+    Returns
+    -------
+    wid_list: list of int
+        list of wid parameters to run SPIN
+
+    '''
+    max_wid = data.shape[axis]*0.6
+    new_wid = 1
+    wid_list = []
+    while new_wid < (1+step)*max_wid:
+        wid_list.append( new_wid )
+        new_wid = int(ceil( new_wid + new_wid*(step) +1))
+    return wid_list[::-1]
+
+
+
+def SPIN(dt, widlist=[10,1], iters=30, axis='both', verbose=False):
+    """Run the original SPIN algorithm
+    Parameters
+    ----------
+    dt: 2-D array
+        the data matrix
+    widlist: float or list of int
+        If float is passed, it is used as step parameted of _generate_widlist, 
+        and widlist is generated to run SPIN.
+        If list is passed it is used directly to run SPIN.
+    iters: int
+        number of repetitions for every wid in widlist
+    axis: int
+        the axis to take in consideration (must be 0, 1 or 'both')
+    step: float
+        the increment between two successive wid parameters
+    Returns
+    -------
+    indexes: 1-D array (if axis in [0,1]) or tuple of 1-D array (if axis = 'both')
+        indexes that sort the data matrix
+    Notes
+    -----
+    Typical usage
+    sorted_dt0 = SPIN(dt, iters=30, axis=0)
+    sorted_dt1 = SPIN(dt, iters=30, axis=1)
+    dt = dt[sorted_dt0,:]
+    dt = dt[:,sorted_dt1]
+    """
+    IXc = arange(dt.shape[1])
+    IXr = arange(dt.shape[0])
+    assert axis in ['both', 0,1], 'axis must be 0, 1 or \'both\' '
+    #Sort both axis
+    if axis == 'both':
+        CCc = 1 - corrcoef(dt.T) 
+        CCr = 1 - corrcoef(dt) 
+        if type(widlist) != list:
+            widlist_r = _generate_widlist(dt, axis=0, step=widlist)
+            widlist_c = _generate_widlist(dt, axis=1, step=widlist)
+        if verbose:
+                print('\nSorting genes.')
+                print('Neighbourood=', end=""),
+        for wid in widlist_r:
+            if verbose:
+                print ('%i, ' % wid),
+                sys.stdout.flush()
+            INDr = sort_mat_by_neighborhood(CCr, wid, iters)
+            CCr = CCr[INDr,:][:,INDr]
+            IXr = IXr[INDr]
+        if verbose:
+                print ('\nSorting cells.')
+                print ('Neighbourood=',end="")
+        for wid in widlist_c:
+            if verbose:
+                print ('%i, ' % wid),
+                sys.stdout.flush()
+            INDc = sort_mat_by_neighborhood(CCc, wid, iters)
+            CCc = CCc[:,INDc][INDc,:]
+            IXc= IXc[INDc]
+        return IXr, IXc
+    #Sort rows
+    elif axis == 0:
+        CCr = 1 - corrcoef(dt)
+        if type(widlist) != list:
+            widlist = _generate_widlist(dt, axis=0, step=widlist)
+        if verbose:
+                print ('\nSorting genes.\nNeighbourood=',end="")
+        for wid in widlist:
+            if verbose:
+                print ('%i, ' % wid,end="")
+                sys.stdout.flush()
+            INDr = sort_mat_by_neighborhood(CCr, wid, iters)
+            CCr = CCr[INDr,:][:,INDr]
+            IXr = IXr[INDr]
+        return IXr
+    #Sort columns
+    elif axis == 1:
+        CCc = 1 - corrcoef(dt.T)
+        if type(widlist) != list:
+            widlist = _generate_widlist(dt, axis=1, step=widlist)
+        if verbose:
+            print ('\nSorting cells.\nNeighbourood=',end="")
+        for wid in widlist:
+            if verbose:
+                print ('%i, ' % wid,end="")
+                sys.stdout.flush()
+            INDc = sort_mat_by_neighborhood(CCc, wid, iters)
+            CCc = CCc[:,INDc][INDc,:]
+            IXc = IXc[INDc]
+        return IXc
+
+
+def backSPIN(data, numLevels=2, first_run_iters=10, first_run_step=0.05, runs_iters=8 ,runs_step=0.25,\
+    split_limit_g=2, split_limit_c=2, stop_const = 1.15, low_thrs=0.2, verbose=False):
+    '''Run the backSPIN algorithm
+    Parameters
+    ----------
+    data: 2-D array
+        the data matrix, rows should be genes and columns single cells/samples
+    numLevels: int
+        the number of splits that will be tried
+    first_run_iters: float
+        the iterations of the preparatory SPIN
+    first_run_step: float
+        the step parameter passed to _generate_widlist for the preparatory SPIN
+    runs_iters: int
+        the iterations parameter passed to the _divide_to_2and_resort.
+        influences all the SPIN iterations except the first
+    runs_step: float
+        the step parameter passed to the _divide_to_2and_resort.
+        influences all the SPIN iterations except the first
+    wid: float
+        the wid of every iteration of the splitting and resorting
+    split_limit_g: int
+        If the number of specific genes in a subgroup is smaller than this number
+         splitting of that subgrup is not allowed
+    split_limit_c: int
+        If the number cells in a subgroup is smaller than this number splitting of
+        that subgrup is not allowed
+    stop_const: float
+        minimum score that a breaking point has to reach to be suitable for splitting
+    low_thrs: float
+        genes with average lower than this threshold are assigned to either of the 
+        splitting group reling on genes that are higly correlated with them
+
+    Returns
+    -------
+    results: Result object
+        The results object contain the following attributes
+        genes_order: 1-D array
+            indexes (a permutation) sorting the genes 
+        cells_order: 1-D array
+            indexes (a permutation) sorting the cells 
+        genes_gr_level: 2-D array
+            for each depth level contains the cluster indexes for each gene
+        cells_gr_level:
+            for each depth level contains the cluster indexes for each cell
+        cells_gr_level_sc:
+            score of the splitting
+        genes_bor_level:
+            the border index between gene clusters
+        cells_bor_level:
+            the border index between cell clusters
+
+    Notes
+    -----
+    Typical usage
+    
+    '''
+    assert numLevels>0, '0 is not an available depth for backSPIN, use SPIN instead'
+    #initialize some varaibles
+    genes_bor_level = [[] for i in range(numLevels)] 
+    cells_bor_level = [[] for i in range(numLevels)] 
+    N,M = data.shape
+    genes_order = arange(N)
+    cells_order = arange(M)
+    genes_gr_level = zeros((N,numLevels+1))
+    cells_gr_level = zeros((M,numLevels+1))
+    cells_gr_level_sc = zeros((M,numLevels+1))
+
+    # Do a Preparatory SPIN on cells
+    if verbose:
+        print ('\nPreparatory SPIN')
+    ix1 = SPIN(data, widlist=_generate_widlist(data, axis=1, step=first_run_step), iters=first_run_iters, axis=1, verbose=verbose)
+    cells_order = cells_order[ix1]
+
+    #For every level of depth DO:
+    for i in range(numLevels): 
+        k=0 # initialize group id counter
+        # For every group generated at the parent level DO:
+        for j in range( len( set(cells_gr_level[:,i]) ) ): 
+            # Extract the a data matrix of the genes at that level
+            g_settmp = nonzero(genes_gr_level[:,i]==j)[0] #indexes of genes in the level j
+            c_settmp = nonzero(cells_gr_level[:,i]==j)[0] #indexes of cells in the level j
+            datatmp = data[ ix_(genes_order[g_settmp], cells_order[c_settmp]) ]
+            # If we are not below the splitting limit for both genes and cells DO:
+            if (len(g_settmp)>split_limit_g) & (len(c_settmp)>split_limit_c): 
+                # Split and SPINsort the two halves
+                if i == numLevels-1:
+                    divided = _divide_to_2and_resort(datatmp, wid=runs_step, iters_spin=runs_iters,\
+                        stop_const=stop_const, low_thrs=low_thrs, sort_genes=True, verbose=verbose)
+                else:
+                    divided = _divide_to_2and_resort(datatmp, wid=runs_step, iters_spin=runs_iters,\
+                        stop_const=stop_const, low_thrs=low_thrs, sort_genes=False,verbose=verbose)
+                # _divide_to_2and_resort retruns an empty array in gr2 if the splitting condition was not satisfied
+                if divided:
+                    sorted_data_resort1, genes_resort1, cells_resort1,\
+                    gr1, gr2, genesgr1, genesgr2, score1, score2 = divided
+                    # Resort from the previous level
+                    genes_order[g_settmp] = genes_order[g_settmp[genes_resort1]]
+                    cells_order[c_settmp] = cells_order[c_settmp[cells_resort1]]
+                    # Assign a numerical identifier to the groups
+                    genes_gr_level[g_settmp[genesgr1],i+1] = k
+                    genes_gr_level[g_settmp[genesgr2],i+1] = k+1
+                    cells_gr_level[c_settmp[gr1],i+1] = k
+                    cells_gr_level[c_settmp[gr2],i+1] = k+1
+                    # Not really clear what sc is
+                    cells_gr_level_sc[c_settmp[gr1],i+1] = score1
+                    cells_gr_level_sc[c_settmp[gr2],i+1] = score2
+                    # Augment the counter of 2 becouse two groups were generated from one
+                    k = k+2
+                else:
+                    # The split is not convenient, keep everithing the same
+                    genes_gr_level[g_settmp,i+1] = k
+                    # if it is the deepest level: perform gene sorting
+                    if i == numLevels-1:
+                        if (datatmp.shape[0] > 2 )and (datatmp.shape[1] > 2):
+                            genes_resort1 = SPIN(datatmp, widlist=runs_step, iters=runs_iters, axis=0, verbose=verbose)
+                            genes_order[g_settmp] = genes_order[g_settmp[genes_resort1]]
+                    cells_gr_level[c_settmp,i+1] = k
+                    cells_gr_level_sc[c_settmp,i+1] = cells_gr_level_sc[c_settmp,i]
+                    # Augment of 1 becouse no new group was generated
+                    k = k+1
+            else:
+                # Below the splitting limit: the split is not convenient, keep everithing the same
+                genes_gr_level[g_settmp,i+1] = k
+                cells_gr_level[c_settmp,i+1] = k
+                cells_gr_level_sc[c_settmp,i+1] = cells_gr_level_sc[c_settmp,i]
+                # Augment of 1 becouse no new group was generated
+                k = k+1
+        
+        # Find boundaries
+        genes_bor_level[i] = r_[0, nonzero(diff(genes_gr_level[:,i+1])>0)[0]+1, data.shape[0] ]
+        cells_bor_level[i] = r_[0, nonzero(diff(cells_gr_level[:,i+1])>0)[0]+1, data.shape[1] ]
+
+    #dataout_sorted = data[ ix_(genes_order,cells_order) ]
+
+    results = Results()
+    results.genes_order = genes_order
+    results.cells_order = cells_order
+    results.genes_gr_level = genes_gr_level
+    results.cells_gr_level = cells_gr_level
+    results.cells_gr_level_sc = cells_gr_level_sc
+    results.genes_bor_level = genes_bor_level
+    results.cells_bor_level = cells_bor_level
+
+    return results
+    
+    
+
+def _divide_to_2and_resort(sorted_data, wid, iters_spin=8, stop_const = 1.15, low_thrs=0.2 , sort_genes=True, verbose=False):
+    '''Core function of backSPIN: split the datamatrix in two and resort the two halves
+
+    Parameters
+    ----------
+    sorted_data: 2-D array
+        the data matrix, rows should be genes and columns single cells/samples
+    wid: float
+        wid parameter to give to widlist parameter of th SPIN fucntion
+    stop_const: float
+        minimum score that a breaking point has to reach to be suitable for splitting
+    low_thrs: float
+        if the difference between the average expression of two groups is lower than threshold the algorythm 
+        uses higly correlated gens to assign the gene to one of the two groups
+    verbose: bool
+        information about the split is printed
+
+    Returns
+    -------
+    '''
+    
+    # Calculate correlation matrix for cells and genes
+    Rcells = corrcoef(sorted_data.T)
+    Rgenes = corrcoef(sorted_data)
+    # Look for the optimal breaking point
+    N = Rcells.shape[0]
+    score = zeros(N)
+    for i in range(2,N-2):
+        if i == 2:
+            tmp1 = sum( Rcells[:i,:i] )
+            tmp2 = sum( Rcells[i:,i:] )
+            score[i] = (tmp1+tmp2) / float(i**2 + (N-i)**2)
+        else:
+            tmp1 += sum(Rcells[i-1,:i]) + sum(Rcells[:i-1,i-1]);
+            tmp2 -= sum(Rcells[i-1:,i-1]) + sum(Rcells[i-1,i:]);
+            score[i] = (tmp1+tmp2) / float(i**2 + (N-i)**2)
+    
+    breakp1 = argmax(score)
+    score1 = Rcells[:breakp1,:breakp1]
+    score1 = triu(score1)
+    score1 = mean( score1[score1 != 0] )
+    score2 = Rcells[breakp1:, breakp1:]
+    score2 = triu(score2)
+    score2 = mean( score2[score2 != 0] )
+    avg_tot = triu(Rcells)
+    avg_tot = mean( avg_tot[avg_tot != 0] )
+
+    # If it is convenient to break
+    if (max([score1,score2])/avg_tot) > stop_const:
+        # Divide in two groups
+        gr1 = arange(N)[:breakp1]
+        gr2 = arange(N)[breakp1:]
+        # and assign the genes into the two groups
+        mean_gr1 = sorted_data[:, gr1].mean(1)
+        mean_gr2 = sorted_data[:, gr2].mean(1)
+        concat_loccenter_gr1 = c_[ calc_loccenter(sorted_data[:,gr1], 2), calc_loccenter(sorted_data[:,gr1][...,::-1], 2) ]
+        concat_loccenter_gr2 = c_[ calc_loccenter(sorted_data[:,gr2], 2), calc_loccenter(sorted_data[:,gr2][...,::-1], 2) ]
+        center_gr1, flip_flag1 = concat_loccenter_gr1.min(1), concat_loccenter_gr1.argmin(1)
+        center_gr2, flip_flag2 = concat_loccenter_gr2.max(1), concat_loccenter_gr2.argmax(1)
+        sorted_data_tmp = array( sorted_data )
+        sorted_data_tmp[ix_(flip_flag1==1,gr1)] = sorted_data[ix_(flip_flag1==1,gr1)][...,::-1]
+        sorted_data_tmp[ix_(flip_flag2==1,gr2)] = sorted_data[ix_(flip_flag2==1,gr2)][...,::-1]
+        loc_center = calc_loccenter(sorted_data_tmp, 2)
+
+        imax = zeros(loc_center.shape)
+        imax[loc_center<=breakp1] = 1
+        imax[loc_center>breakp1] = 2
+
+        genesgr1 = where(imax==1)[0]
+        genesgr2 = where(imax==2)[0]
+        if size(genesgr1) == 0:
+            IN = argmax(mean_gr1)
+            genesgr1 = array([IN])
+            genesgr2 = setdiff1d(genesgr2, IN)
+        elif size(genesgr2) == 0:
+            IN = argmax(mean_gr2)
+            genesgr2 = array([IN])
+            genesgr1 = setdiff1d(genesgr1, IN)
+        
+        if verbose:
+            print ('\nSplitting (%i, %i) ' %  sorted_data.shape)
+            print ('in (%i,%i) ' % (genesgr1.shape[0],gr1.shape[0]))
+            print ('and (%i,%i)' % (genesgr2.shape[0],gr2.shape[0]),end="")
+            sys.stdout.flush()
+
+        # Data of group1
+        datagr1 = sorted_data[ix_(genesgr1,gr1)]
+        # zero center
+        datagr1 = datagr1 - datagr1.mean(1)[:,newaxis]
+        # Resort group1
+        if min( datagr1.shape ) > 1:
+            if sort_genes:
+                genesorder1,cellorder1 = SPIN(datagr1, widlist=wid, iters=iters_spin, axis='both', verbose=verbose)
+            else:
+                cellorder1 = SPIN(datagr1, widlist=wid, iters=iters_spin, axis=1, verbose=verbose)
+                genesorder1 = arange(datagr1.shape[0])
+        elif len(genesgr1) == 1:
+            genesorder1 = 0
+            cellorder1 = argsort( datagr1[0,:] )
+        elif len(gr1) == 1:
+            cellorder1 = 0
+            genesorder1 = argsort( datagr1[:,0] )
+
+        # Data of group2
+        datagr2 = sorted_data[ix_(genesgr2,gr2)]
+        # zero center
+        datagr2 = datagr2 - datagr2.mean(1)[:,newaxis]
+        # Resort group2
+        if min( datagr2.shape )>1:
+            if sort_genes:
+                genesorder2, cellorder2 = SPIN(datagr2, widlist=wid, iters=iters_spin, axis='both',verbose=verbose)
+            else:
+                cellorder2 = SPIN(datagr2, widlist=wid, iters=iters_spin, axis=1,verbose=verbose)
+                genesorder2 = arange(datagr2.shape[0])
+        elif len(genesgr2) == 1:
+            genesorder2 = 0
+            cellorder2 = argsort(datagr2[0,:])
+        elif len(gr2) == 1:
+            cellorder2 = 0
+            genesorder2 = argsort(datagr2[:,0])
+        
+        # contcatenate cells and genes indexes
+        genes_resort1 = r_[genesgr1[genesorder1], genesgr2[genesorder2] ]
+        cells_resort1 = r_[gr1[cellorder1], gr2[cellorder2] ]
+        genesgr1 = arange(len(genesgr1))
+        genesgr2 = arange(len(genesgr1), len(sorted_data[:,0]))
+        # resort
+        sorted_data_resort1 = sorted_data[ix_(genes_resort1,cells_resort1)]
+
+        return sorted_data_resort1, genes_resort1, cells_resort1, gr1, gr2, genesgr1, genesgr2, score1, score2
+
+    else:
+        if verbose:
+            print('Low splitting score was : %.4f' % (max([score1,score2])/avg_tot))
+        return False
+
+
+def fit_CV(mu, cv, fit_method='Exp', svr_gamma=0.06, x0=[0.5,0.5], verbose=False):
+    '''Fits a noise model (CV vs mean)
+    Parameters
+    ----------
+    mu: 1-D array
+        mean of the genes (raw counts)
+    cv: 1-D array
+        coefficient of variation for each gene
+    fit_method: string
+        allowed: 'SVR', 'Exp', 'binSVR', 'binExp' 
+        default: 'SVR'(requires scikit learn)
+        SVR: uses Support vector regression to fit the noise model
+        Exp: Parametric fit to cv = mu^(-a) + b
+        bin: before fitting the distribution of mean is normalized to be
+             uniform by downsampling and resampling.
+    Returns
+    -------
+    score: 1-D array
+        Score is the relative position with respect of the fitted curve
+    mu_linspace: 1-D array
+        x coordiantes to plot (min(log2(mu)) -> max(log2(mu)))
+    cv_fit: 1-D array
+        y=f(x) coordinates to plot 
+    pars: tuple or None
+    
+    '''
+    log2_m = log2(mu)
+    log2_cv = log2(cv)
+    
+    if len(mu)>1000 and 'bin' in fit_method:
+        #histogram with 30 bins
+        n,xi = histogram(log2_m,30)
+        med_n = percentile(n,50)
+        for i in range(0,len(n)):
+            # index of genes within the ith bin
+            ind = where( (log2_m >= xi[i]) & (log2_m < xi[i+1]) )[0]
+            if len(ind)>med_n:
+                #Downsample if count is more than median
+                ind = ind[random.permutation(len(ind))]
+                ind = ind[:len(ind)-med_n]
+                mask = ones(len(log2_m), dtype=bool)
+                mask[ind] = False
+                log2_m = log2_m[mask]
+                log2_cv = log2_cv[mask]
+            elif (around(med_n/len(ind))>1) and (len(ind)>5):
+                #Duplicate if count is less than median
+                log2_m = r_[ log2_m, tile(log2_m[ind], around(med_n/len(ind))-1) ]
+                log2_cv = r_[ log2_cv, tile(log2_cv[ind], around(med_n/len(ind))-1) ]
+    else:
+        if 'bin' in fit_method:
+            print('More than 1000 input feature needed for bin correction.')
+        pass
+                
+    if 'SVR' in fit_method:
+        try:
+            from sklearn.svm import SVR
+            if svr_gamma == 'auto':
+                svr_gamma = 1000./len(mu)
+            #Fit the Support Vector Regression
+            clf = SVR(gamma=svr_gamma)
+            clf.fit(log2_m[:,newaxis], log2_cv)
+            fitted_fun = clf.predict
+            score = log2(cv) - fitted_fun(log2(mu)[:,newaxis])
+            params = None
+            #The coordinates of the fitted curve
+            mu_linspace = linspace(min(log2_m),max(log2_m))
+            cv_fit = fitted_fun(mu_linspace[:,newaxis])
+            return score, mu_linspace, cv_fit , params
+            
+        except ImportError:
+            if verbose:
+                print('SVR fit requires scikit-learn python library. Using exponential instead.')
+            if 'bin' in fit_method:
+                return fit_CV(mu, cv, fit_method='binExp', x0=x0)
+            else:
+                return fit_CV(mu, cv, fit_method='Exp', x0=x0)
+    elif 'Exp' in fit_method:
+        from scipy.optimize import minimize
+        #Define the objective function to fit (least squares)
+        fun = lambda x, log2_m, log2_cv: sum(abs( log2( (2.**log2_m)**(-x[0])+x[1]) - log2_cv ))
+        #Fit using Nelder-Mead algorythm
+        optimization =  minimize(fun, x0, args=(log2_m,log2_cv), method='Nelder-Mead')
+        params = optimization.x
+        #The fitted function
+        fitted_fun = lambda log_mu: log2( (2.**log_mu)**(-params[0]) + params[1])
+        # Score is the relative position with respect of the fitted curve
+        score = log2(cv) - fitted_fun(log2(mu))
+        #The coordinates of the fitted curve
+        mu_linspace = linspace(min(log2_m),max(log2_m))
+        cv_fit = fitted_fun(mu_linspace)
+        return score, mu_linspace, cv_fit , params
+    
+    
+
+def feature_selection(data,thrs, verbose=False):
+    if thrs>= data.shape[0]:
+        if verbose:
+            print ("Trying to select %i features but only %i genes available." %( thrs, data.shape[0]))
+            print ("Skipping feature selection")
+        return arange(data.shape[0])
+    ix_genes = arange(data.shape[0])
+    threeperK = int(ceil(3*data.shape[1]/1000.))
+    zerotwoperK = int(floor(0.3*data.shape[1]/1000.))
+    # is at least 1 molecule in 0.3% of thecells, is at least 2 molecules in 0.03% of the cells
+    condition = (sum(data>=1, 1)>= threeperK) & (sum(data>=2, 1)>=zerotwoperK) 
+    ix_genes = ix_genes[condition]
+    
+    mu = data[ix_genes,:].mean(1)
+    sigma = data[ix_genes,:].std(1, ddof=1)
+    cv = sigma/mu
+
+    try:
+        score, mu_linspace, cv_fit , params = fit_CV(mu,cv,fit_method='SVR', verbose=verbose)
+    except ImportError:
+        print ("WARNING: Feature selection was skipped becouse scipy is required. Install scipy to run feature selection.")
+        return arange(data.shape[0])
+ 
+    return ix_genes[argsort(score)[::-1]][:thrs]
+
+def usage_quick():
+
+    message ='''usage: backSPIN [-hbv] [-i inputfile] [-o outputfolder] [-d int] [-f int] [-t int] [-s float] [-T int] [-S float] [-g int] [-c int] [-k float] [-r float]
+    manual: backSPIN -h
+    '''
+    print (message)
+
+def usage():
+
+    message='''
+       backSPIN commandline tool
+       -------------------------
+
+       The options are as follows:
+
+       -i [inputfile]
+       --input=[inputfile]
+              Path of the cef formatted tab delimited file.
+              Rows should be genes and columns single cells/samples.
+              For further information on the cef format visit:
+              https://github.com/linnarsson-lab/ceftools
+
+       -o [outputfile]
+       --output=[outputfile]
+              The name of the file to which the output will be written
+
+       -d [int]
+              Depth/Number of levels: The number of nested splits that will be tried by the algorithm
+       -t [int]
+              Number of the iterations used in the preparatory SPIN.
+              Defaults to 10
+       -f [int]   
+              Feature selection is performed before BackSPIN. Argument controls how many genes are seleceted.
+              Selection is based on expected noise (a curve fit to the CV-vs-mean plot).
+       -s [float]
+              Controls the decrease rate of the width parameter used in the preparatory SPIN.
+              Smaller values will increase the number of SPIN iterations and result in higher 
+              precision in the first step but longer execution time.
+              Defaults to 0.1
+       -T [int]
+              Number of the iterations used for every width parameter.
+              Does not apply on the first run (use -t instead)
+              Defaults to 8
+       -S [float]
+              Controls the decrease rate of the width parameter.
+              Smaller values will increase the number of SPIN iterations and result in higher 
+              precision but longer execution time.
+              Does not apply on the first run (use -s instead)
+              Defaults to 0.3
+       -g [int]
+              Minimal number of genes that a group must contain for splitting to be allowed.
+              Defaults to 2
+       -c [int]
+              Minimal number of cells that a group must contain for splitting to be allowed.
+              Defaults to 2
+       -k [float]
+              Minimum score that a breaking point has to reach to be suitable for splitting.
+              Defaults to 1.15
+       -r [float]
+              If the difference between the average expression of two groups is lower than threshold the algorythm 
+              uses higly correlated genes to assign the gene to one of the two groups
+              Defaults to 0.2
+       -b [axisvalue]
+              Run normal SPIN instead of backSPIN.
+              Normal spin accepts the parameters -T -S
+              An axis value 0 to only sort genes (rows), 1 to only sort cells (columns) or 'both' for both
+              must be passed
+       -v  
+              Verbose. Print  to the stdoutput extra details of what is happening
+
+    '''
+
+    print(message)
+
+
+
+if __name__ == '__main__':
+    print("")
+    #defaults arguments
+    input_path = None
+    outfiles_path = None
+    numLevels=2 # -d
+    feature_fit = False # -f
+    feature_genes = 2000
+    first_run_iters=10 # -t
+    first_run_step=0.1 # -s
+    runs_iters=8 # -T
+    runs_step=0.3 # -S
+    split_limit_g=2 # -g
+    split_limit_c=2 # -c
+    stop_const = 1.15 # -k
+    low_thrs=0.2 # -r
+    normal_spin = False #-b
+    normal_spin_axis = 'both'
+    verbose=False # -v
+
+    optlist, args = getopt.gnu_getopt(sys.argv[1:], "hvi:o:f:d:t:s:T:S:g:c:k:r:b:", ["help", "input=","output="])
+
+    if optlist== [] and args == []:
+        usage_quick()
+        sys.exit()
+    for opt, a in optlist:
+        if opt in ("-h", "--help"):
+            usage()
+            sys.exit()
+        elif opt in ('-i', '--input'):
+            input_path = a
+        elif opt in ("-o", "--output"):
+            outfiles_path = a
+        elif opt == '-d':
+            numLevels = int(a)
+        elif opt == '-f':
+            feature_fit = True
+            if a != '':
+                feature_genes = int(a)
+        elif opt == '-t':
+            first_run_iters = int(a)
+        elif opt == '-s':
+            first_run_step = float(a)
+        elif opt == '-T':
+            runs_iters = int(a)
+        elif opt == '-S':
+            runs_step = float(a)
+        elif opt == '-g':
+            split_limit_g = int(a)
+        elif opt == '-c':
+            split_limit_c = int(a)
+        elif opt == '-k':
+            stop_const = float(a)
+        elif opt == '-r':
+            low_thrs = float(a)
+        elif opt == '-v':
+            verbose = True
+        elif opt == '-b':
+            normal_spin = True
+            if a != '':
+                if a == 'both':
+                    normal_spin_axis = a
+                else:
+                    normal_spin_axis = int(a)
+        else:
+            assert False, "%s option is not supported" % opt
+
+    if input_path == None:
+        print ('No input file was provided.\nYou need to specify an input file\n(e.g. backSPIN -i path/to/your/file/foo.cef)\n')
+        sys.exit()
+    if outfiles_path == None:
+        print ('No output file was provided.\nYou need to specify an output file\n(e.g. backSPIN -o path/to/your/file/bar.cef)\n')
+        sys.exit()
+
+    try:
+        if verbose:
+            print ('Loading file.')
+        input_cef = CEF_obj()
+        input_cef.readCEF(input_path)
+
+        data = array(input_cef.matrix)
+
+        if feature_fit:
+            if verbose:
+                print ("Performing feature selection")
+            ix_features = feature_selection(data, feature_genes, verbose=verbose)
+            if verbose:
+                print ("Selected %i genes" % len(ix_features))
+            data = data[ix_features, :]
+            input_cef.matrix = data.tolist()
+            input_cef.row_attr_values = atleast_2d( array( input_cef.row_attr_values ))[:,ix_features].tolist()
+            input_cef.update()
+        
+        data = log2(data+1)
+        data = data - data.mean(1)[:,newaxis]
+        if data.shape[0] <= 3 and data.shape[1] <= 3:
+            print ('Input file is not correctly formatted.\n')
+            sys.exit()
+    except Exception as err:
+        import traceback
+        print ('There was an error')
+        print (traceback.format_exc())
+        print ('Error occurred in parsing the input file.')
+        print ('Please check that your input file is a correctly formatted cef file.\n')
+        sys.exit()
+
+    if normal_spin == False:
+
+        print ('backSPIN started\n----------------\n')
+        print ('Input file:\n%s\n' % input_path)
+        print ('Output file:\n%s\n' % outfiles_path)
+        print ('numLevels: %i\nfirst_run_iters: %i\nfirst_run_step: %.3f\nruns_iters: %i\nruns_step: %.3f\nsplit_limit_g: %i\nsplit_limit_c: %i\nstop_const: %.3f\nlow_thrs: %.3f\n' % (numLevels, first_run_iters, first_run_step, runs_iters,\
+            runs_step, split_limit_g, split_limit_c, stop_const, low_thrs))
+
+
+        results = backSPIN(data, numLevels, first_run_iters, first_run_step, runs_iters, runs_step,\
+            split_limit_g, split_limit_c, stop_const, low_thrs, verbose)
+
+        sys.stdout.flush()
+        print ('\nWriting output.\n')
+
+        output_cef = CEF_obj()
+
+        for h_name, h_val in zip( input_cef.header_names, input_cef.header_values):
+            output_cef.add_header(h_name, h_val )
+        for c_name, c_val in zip( input_cef.col_attr_names, input_cef.col_attr_values):
+            output_cef.add_col_attr(c_name, array(c_val)[results.cells_order])
+        for r_name, r_val in zip( input_cef.row_attr_names, input_cef.row_attr_values):
+            output_cef.add_row_attr(r_name, array(r_val)[results.genes_order])
+
+        for level, groups in enumerate( results.genes_gr_level.T ):
+            output_cef.add_row_attr('Level_%i_group' % level, [int(el) for el in groups])
+        for level, groups in enumerate( results.cells_gr_level.T ):
+            output_cef.add_col_attr('Level_%i_group' % level, [int(el) for el in groups])
+
+        output_cef.set_matrix(array(input_cef.matrix)[results.genes_order,:][:,results.cells_order])
+        if sum(type(i)==float for i in input_cef.matrix[0]) + sum(type(i)==float for i in input_cef.matrix[-1]) == 0:
+            fmt = '%i'
+        else:
+            fmt ='%.6g'
+        output_cef.writeCEF( outfiles_path, matrix_str_fmt=fmt )
+    else:
+
+        print ('normal SPIN started\n----------------\n')
+        print ('Input file:\n%s\n' % input_path)
+        print ('Output file:\n%s\n' % outfiles_path)
+
+        results = SPIN(data, widlist=runs_step, iters=runs_iters, axis=normal_spin_axis, verbose=verbose)
+
+        print ('\nWriting output.\n')
+
+        output_cef = CEF_obj()
+
+        for h_name, h_val in zip( input_cef.header_names, input_cef.header_values):
+            output_cef.add_header(h_name, h_val )
+
+        if normal_spin_axis == 'both':
+            for c_name, c_val in zip( input_cef.col_attr_names, input_cef.col_attr_values):
+                output_cef.add_col_attr(c_name, array(c_val)[results[1]])
+            for r_name, r_val in zip( input_cef.row_attr_names, input_cef.row_attr_values):
+                output_cef.add_row_attr(r_name, array(r_val)[results[0]])
+            output_cef.set_matrix(array(input_cef.matrix)[results[0],:][:,results[1]])
+
+        if normal_spin_axis == 0:
+            for r_name, r_val in zip( input_cef.row_attr_names, input_cef.row_attr_values):
+                output_cef.add_row_attr(r_name, array(r_val)[results])
+            output_cef.set_matrix(array(input_cef.matrix)[results,:])
+
+        if normal_spin_axis == 1:
+            for c_name, c_val in zip( input_cef.col_attr_names, input_cef.col_attr_values):
+                output_cef.add_col_attr(c_name, array(c_val)[results])
+            output_cef.set_matrix(array(input_cef.matrix)[:,results])
+
+        
+
+        output_cef.writeCEF( outfiles_path )
+

From 43b112667710ec340ddc62eafb8e8856c8d22601 Mon Sep 17 00:00:00 2001
From: Python3pkg <raliclo@gmail.com>
Date: Thu, 18 May 2017 05:14:21 -0700
Subject: [PATCH 2/3] Remove .bak

---
 backspinpy/Cef_tools.py.bak | 161 -------
 backspinpy/__init__.py.bak  |   3 -
 backspinpy/backSPIN.py.bak  | 932 ------------------------------------
 3 files changed, 1096 deletions(-)
 delete mode 100644 backspinpy/Cef_tools.py.bak
 delete mode 100644 backspinpy/__init__.py.bak
 delete mode 100755 backspinpy/backSPIN.py.bak

diff --git a/backspinpy/Cef_tools.py.bak b/backspinpy/Cef_tools.py.bak
deleted file mode 100644
index 9d56f8a..0000000
--- a/backspinpy/Cef_tools.py.bak
+++ /dev/null
@@ -1,161 +0,0 @@
-# Copyright (c) 2015 Gioele La Manno and Sten Linnarsson
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-# This code contains a simple parser reader for Cef files
-# typical usage to write a cef_file:
-#
-# cef = CEF_obj()
-# cef.add_header('Mynote', 'This is my message')
-# cef.set_matrix(your_array)
-# cef.add_col_attr(attribute_name, attribute_values_list)
-# cef.add_row_attr(attribute_name, attribute_values_list)
-# cef.writeCEF('path/to/your/file.cef')
-#
-# to read:
-# cef = CEF_obj()
-# cef.readCEF('path/to/your/file.cef')
-# your_array = cef.matrix
-# attribute_values_list2 = cef.col_attr_values[2]
-# ...
-
-from builtins import str as unicode
-from io import open
-from builtins import range
-
-class CEF_obj(object):
-    def __init__(self):
-        self.headers = 0
-        self.row_attr = 0
-        self.col_attr = 0
-        self.rows = 0
-        self.cols = 0
-        self.flags = 0
-        self.tab_fields = 7
-
-        self.header_names = []
-        self.header_values = []
-
-        self.row_attr_names = []
-        self.row_attr_values = []
-
-        self.col_attr_names = []
-        self.col_attr_values = []
-
-        self.matrix = []
-
-
-    def update(self):
-        self.headers = len( self.header_names)
-        self.row_attr = len(self.row_attr_names)
-        self.col_attr = len(self.col_attr_names)
-        self.rows = len(self.matrix)
-        self.cols = len(self.matrix[0])
-        self.tab_fields = max(7, self.cols + self.row_attr + 1)
-        self.linestr = u'%s' + u'\t%s' * (self.tab_fields-1) + u'\n'
-
-    def add_header(self, name, value):
-        self.header_names.append(name)
-        self.header_values.append(value)
-
-    def add_row_attr(self, name, value):
-        self.row_attr_names.append(name)
-        self.row_attr_values.append(list(value))
-
-    def add_col_attr(self, name, value):
-        self.col_attr_names.append(name)
-        self.col_attr_values.append(list(value))
-
-    def set_matrix(self, matrix):
-        for row in matrix:
-            self.matrix.append(list(row))
-
-    def readCEF(self, filepath, matrix_dtype = 'auto'):
-        #Delete all the stored information
-        self.__init__()
-        #Start parsing
-        with open(filepath, 'rU') as fin:
-            # Read cef file first line
-            self.header, self.row_attr, self.col_attr, self.rows,\
-            self.cols, self.flags = fin.readline().rstrip('\n').split('\t')[1:7]
-            self.header = int(self.header)
-            self.row_attr = int( self.row_attr )
-            self.col_attr = int(self.col_attr)
-            self.rows = int(self.rows)
-            self.cols = int(self.cols)
-            self.flags = int(self.flags)
-            self.row_attr_values = [[] for _ in range(self.row_attr)]
-            # Read header
-            for i in range(self.header):
-                name, value = fin.readline().rstrip('\n').split('\t')[:2]
-                self.header_names.append(name)
-                self.header_values.append(value)
-            # Read col attr
-            for i in range(self.col_attr):
-                line_col_attr = fin.readline().rstrip('\n').split('\t')[self.row_attr:]
-                self.col_attr_names.append( line_col_attr[0] )
-                self.col_attr_values.append( line_col_attr[1:] ) 
-            #Read row attr and matrix
-            self.row_attr_names += fin.readline().rstrip('\n').split('\t')[:self.row_attr]
-            for _ in range(self.rows):
-                linelist = fin.readline().rstrip('\n').split('\t')
-                for n, entry in enumerate( linelist[:self.row_attr] ):
-                    self.row_attr_values[n].append( entry )
-                if matrix_dtype == 'auto':
-                    if sum(('.' in i) or ('e' in i) for i in linelist[self.row_attr+1:]) != 0:
-                        matrix_dtype = float
-                    else:
-                        matrix_dtype = int
-                try:
-                    self.matrix.append( [matrix_dtype(el) for el in linelist[self.row_attr+1:] ])
-                except ValueError:
-                    print(repr(el), ' is invalid')
-
-
-
-    def writeCEF(self, filepath, matrix_str_fmt = '%i'):
-        self.update()
-        with open(filepath, 'w') as fout:
-            #Write cef file first line
-            fout.write( self.linestr % ( ('CEF', unicode(self.headers), unicode(self.row_attr),\
-                unicode(self.col_attr) , unicode(self.rows), unicode(self.cols), unicode(self.flags) ) +\
-                ('',) * (self.tab_fields - 7) ) )
-            #Write header
-            for i in range(self.headers):
-                fout.write(self.linestr % ( (unicode( self.header_names[i]), unicode( self.header_values[i]) ) + ('',) * (self.tab_fields - 2) ))
-            #Write col attributes
-            for i in range( self.col_attr ):
-                fout.write( self.linestr % ( ('',) * (self.row_attr) + (unicode( self.col_attr_names[i] ),) + tuple( unicode(el) for el in self.col_attr_values[i] ) ))
-            #Write headers of row attributes
-            fout.write( self.linestr % ( tuple(unicode(el) for el in self.row_attr_names) + ('',)*(self.tab_fields-self.row_attr) ) )
-            #Write rows
-            for i in range(self.rows):
-                for j in range(self.row_attr):
-                    fout.write(unicode(self.row_attr_values[j][i]) + u'\t')
-                fout.write(u'\t')
-                fout.write(u'\t'.join([unicode(matrix_str_fmt % el) for el in self.matrix[i]] ) )
-                fout.write(u'\n')
-
-
-
diff --git a/backspinpy/__init__.py.bak b/backspinpy/__init__.py.bak
deleted file mode 100644
index 2b92ec8..0000000
--- a/backspinpy/__init__.py.bak
+++ /dev/null
@@ -1,3 +0,0 @@
-from __future__ import absolute_import
-from .backSPIN import SPIN, backSPIN, fit_CV, feature_selection
-from .Cef_tools import CEF_obj
diff --git a/backspinpy/backSPIN.py.bak b/backspinpy/backSPIN.py.bak
deleted file mode 100755
index 02bc3bd..0000000
--- a/backspinpy/backSPIN.py.bak
+++ /dev/null
@@ -1,932 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright (c) 2015, Amit Zeisel, Gioele La Manno and Sten Linnarsson
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# This .py file can be used as a library or a command-line version of BackSPIN, 
-# This version of BackSPIN was implemented by Gioele La Manno.
-# The BackSPIN biclustering algorithm was developed by Amit Zeisel and is described
-# in Zeisel et al. Cell types in the mouse cortex and hippocampus revealed by 
-# single-cell RNA-seq Science 2015 (PMID: 25700174, doi: 10.1126/science.aaa1934). 
-#
-# Building using pyinstaller:
-# pyinstaller -F backSPIN.py -n backspin-mac-64-bit
-#
-
-from __future__ import division, print_function, absolute_import
-from numpy import *
-import getopt
-import sys
-import os
-from .Cef_tools import CEF_obj
-
-class Results:
-        pass
-
-def calc_loccenter(x, lin_log_flag):
-    M,N = x.shape
-    if N==1 and M>1:
-        x = x.T
-    M,N = x.shape
-    loc_center = zeros(M)
-    min_x = x.min(1)
-    x = x - min_x[:,newaxis]
-    for i in range(M):
-        ind = where(x[i,:]>0)[0]
-        if len(ind) != 0:
-            if lin_log_flag == 1:
-                w = x[i,ind]/sum(x[i,ind], 0)
-            else:
-                w = (2**x[i,ind])/sum(2**x[i,ind], 0)
-            loc_center[i] = sum(w*ind, 0)       
-        else:
-            loc_center[i] = 0
-
-    return loc_center
-
-def _calc_weights_matrix(mat_size, wid):
-    '''Calculate Weight Matrix
-    Parameters
-    ----------
-    mat_size: int
-        dimension of the distance matrix
-    wid: int
-        parameter that controls the width of the neighbourood
-    Returns
-    -------
-    weights_mat: 2-D array
-        the weights matrix to multiply with the distance matrix
-
-    '''
-    #calculate square distance from the diagonal
-    sqd = (arange(1,mat_size+1)[newaxis,:] - arange(1,mat_size+1)[:,newaxis])**2
-    #make the distance relative to the mat_size
-    norm_sqd = sqd/wid
-    #evaluate a normal pdf
-    weights_mat = exp(-norm_sqd/mat_size)
-    #avoid useless precision that would slow down the matrix multiplication
-    weights_mat -= 1e-6
-    weights_mat[weights_mat<0] = 0
-    #normalize row and column sum
-    weights_mat /= sum(weights_mat,0)[newaxis,:]
-    weights_mat /= sum(weights_mat,1)[:, newaxis]
-    #fix asimmetries
-    weights_mat = (weights_mat + weights_mat.T) / 2.
-    return weights_mat
-
-
-def _sort_neighbourhood( dist_matrix, wid ):
-    '''Perform a single iteration of SPIN
-    Parameters
-    ----------
-    dist_matrix: 2-D array
-        distance matrix
-    wid: int
-        parameter that controls the width of the neighbourood
-    Returns
-    -------
-    sorted_ind: 1-D array
-        indexes that order the matrix
-
-    '''
-    assert wid > 0, 'Parameter wid < 0 is not allowed'
-    mat_size = dist_matrix.shape[0]
-    #assert mat_size>2, 'Matrix is too small to be sorted'
-    weights_mat = _calc_weights_matrix(mat_size, wid)
-    #Calculate the dot product (can be very slow for big mat_size)
-    mismatch_score = dot(dist_matrix, weights_mat)
-    energy, target_permutation = mismatch_score.min(1), mismatch_score.argmin(1)
-    max_energy = max(energy)
-    #Avoid points that have the same target_permutation value
-    sort_score = target_permutation - 0.1 * sign( (mat_size/2 - target_permutation) ) * energy/max_energy
-    #sort_score = target_permutation - 0.1 * sign( 1-2*(int(1000*energy/max_energy) % 2) ) * energy/max_energy # Alternative
-    # Sorting the matrix
-    sorted_ind = sort_score.argsort(0)[::-1]
-    return sorted_ind
-
-
-def sort_mat_by_neighborhood(dist_matrix, wid, times):
-    '''Perform several iterations of SPIN using a fixed wid parameter
-    Parameters
-    ----------
-    dist_matrix: 2-D array
-        distance matrix
-    wid: int
-        parameter that controls the width of the neighbourood
-    times: int
-        number of repetitions
-    verbose: bool
-        print the progress
-    Returns
-    -------
-    indexes: 1-D array
-        indexes that order the matrix
-
-    '''
-    # original indexes
-    indexes = arange(dist_matrix.shape[0])
-    for i in range(times):
-        #sort the sitance matrix according the previous iteration
-        tmpmat = dist_matrix[indexes,:] 
-        tmpmat = tmpmat[:,indexes]
-        sorted_ind = _sort_neighbourhood(tmpmat, wid);
-        #resort the original indexes
-        indexes = indexes[sorted_ind]
-    return indexes
-
-
-def _generate_widlist(data, axis=1, step=0.6):
-    '''Generate a list of wid parameters to execute sort_mat_by_neighborhood
-    Parameters
-    ----------
-    data: 2-D array
-        the data matrix
-    axis: int
-        the axis to take in consideration
-    step: float
-        the increment between two successive wid parameters
-    Returns
-    -------
-    wid_list: list of int
-        list of wid parameters to run SPIN
-
-    '''
-    max_wid = data.shape[axis]*0.6
-    new_wid = 1
-    wid_list = []
-    while new_wid < (1+step)*max_wid:
-        wid_list.append( new_wid )
-        new_wid = int(ceil( new_wid + new_wid*(step) +1))
-    return wid_list[::-1]
-
-
-
-def SPIN(dt, widlist=[10,1], iters=30, axis='both', verbose=False):
-    """Run the original SPIN algorithm
-    Parameters
-    ----------
-    dt: 2-D array
-        the data matrix
-    widlist: float or list of int
-        If float is passed, it is used as step parameted of _generate_widlist, 
-        and widlist is generated to run SPIN.
-        If list is passed it is used directly to run SPIN.
-    iters: int
-        number of repetitions for every wid in widlist
-    axis: int
-        the axis to take in consideration (must be 0, 1 or 'both')
-    step: float
-        the increment between two successive wid parameters
-    Returns
-    -------
-    indexes: 1-D array (if axis in [0,1]) or tuple of 1-D array (if axis = 'both')
-        indexes that sort the data matrix
-    Notes
-    -----
-    Typical usage
-    sorted_dt0 = SPIN(dt, iters=30, axis=0)
-    sorted_dt1 = SPIN(dt, iters=30, axis=1)
-    dt = dt[sorted_dt0,:]
-    dt = dt[:,sorted_dt1]
-    """
-    IXc = arange(dt.shape[1])
-    IXr = arange(dt.shape[0])
-    assert axis in ['both', 0,1], 'axis must be 0, 1 or \'both\' '
-    #Sort both axis
-    if axis == 'both':
-        CCc = 1 - corrcoef(dt.T) 
-        CCr = 1 - corrcoef(dt) 
-        if type(widlist) != list:
-            widlist_r = _generate_widlist(dt, axis=0, step=widlist)
-            widlist_c = _generate_widlist(dt, axis=1, step=widlist)
-        if verbose:
-                print('\nSorting genes.')
-                print('Neighbourood=', end=""),
-        for wid in widlist_r:
-            if verbose:
-                print ('%i, ' % wid),
-                sys.stdout.flush()
-            INDr = sort_mat_by_neighborhood(CCr, wid, iters)
-            CCr = CCr[INDr,:][:,INDr]
-            IXr = IXr[INDr]
-        if verbose:
-                print ('\nSorting cells.')
-                print ('Neighbourood=',end="")
-        for wid in widlist_c:
-            if verbose:
-                print ('%i, ' % wid),
-                sys.stdout.flush()
-            INDc = sort_mat_by_neighborhood(CCc, wid, iters)
-            CCc = CCc[:,INDc][INDc,:]
-            IXc= IXc[INDc]
-        return IXr, IXc
-    #Sort rows
-    elif axis == 0:
-        CCr = 1 - corrcoef(dt)
-        if type(widlist) != list:
-            widlist = _generate_widlist(dt, axis=0, step=widlist)
-        if verbose:
-                print ('\nSorting genes.\nNeighbourood=',end="")
-        for wid in widlist:
-            if verbose:
-                print ('%i, ' % wid,end="")
-                sys.stdout.flush()
-            INDr = sort_mat_by_neighborhood(CCr, wid, iters)
-            CCr = CCr[INDr,:][:,INDr]
-            IXr = IXr[INDr]
-        return IXr
-    #Sort columns
-    elif axis == 1:
-        CCc = 1 - corrcoef(dt.T)
-        if type(widlist) != list:
-            widlist = _generate_widlist(dt, axis=1, step=widlist)
-        if verbose:
-            print ('\nSorting cells.\nNeighbourood=',end="")
-        for wid in widlist:
-            if verbose:
-                print ('%i, ' % wid,end="")
-                sys.stdout.flush()
-            INDc = sort_mat_by_neighborhood(CCc, wid, iters)
-            CCc = CCc[:,INDc][INDc,:]
-            IXc = IXc[INDc]
-        return IXc
-
-
-def backSPIN(data, numLevels=2, first_run_iters=10, first_run_step=0.05, runs_iters=8 ,runs_step=0.25,\
-    split_limit_g=2, split_limit_c=2, stop_const = 1.15, low_thrs=0.2, verbose=False):
-    '''Run the backSPIN algorithm
-    Parameters
-    ----------
-    data: 2-D array
-        the data matrix, rows should be genes and columns single cells/samples
-    numLevels: int
-        the number of splits that will be tried
-    first_run_iters: float
-        the iterations of the preparatory SPIN
-    first_run_step: float
-        the step parameter passed to _generate_widlist for the preparatory SPIN
-    runs_iters: int
-        the iterations parameter passed to the _divide_to_2and_resort.
-        influences all the SPIN iterations except the first
-    runs_step: float
-        the step parameter passed to the _divide_to_2and_resort.
-        influences all the SPIN iterations except the first
-    wid: float
-        the wid of every iteration of the splitting and resorting
-    split_limit_g: int
-        If the number of specific genes in a subgroup is smaller than this number
-         splitting of that subgrup is not allowed
-    split_limit_c: int
-        If the number cells in a subgroup is smaller than this number splitting of
-        that subgrup is not allowed
-    stop_const: float
-        minimum score that a breaking point has to reach to be suitable for splitting
-    low_thrs: float
-        genes with average lower than this threshold are assigned to either of the 
-        splitting group reling on genes that are higly correlated with them
-
-    Returns
-    -------
-    results: Result object
-        The results object contain the following attributes
-        genes_order: 1-D array
-            indexes (a permutation) sorting the genes 
-        cells_order: 1-D array
-            indexes (a permutation) sorting the cells 
-        genes_gr_level: 2-D array
-            for each depth level contains the cluster indexes for each gene
-        cells_gr_level:
-            for each depth level contains the cluster indexes for each cell
-        cells_gr_level_sc:
-            score of the splitting
-        genes_bor_level:
-            the border index between gene clusters
-        cells_bor_level:
-            the border index between cell clusters
-
-    Notes
-    -----
-    Typical usage
-    
-    '''
-    assert numLevels>0, '0 is not an available depth for backSPIN, use SPIN instead'
-    #initialize some varaibles
-    genes_bor_level = [[] for i in range(numLevels)] 
-    cells_bor_level = [[] for i in range(numLevels)] 
-    N,M = data.shape
-    genes_order = arange(N)
-    cells_order = arange(M)
-    genes_gr_level = zeros((N,numLevels+1))
-    cells_gr_level = zeros((M,numLevels+1))
-    cells_gr_level_sc = zeros((M,numLevels+1))
-
-    # Do a Preparatory SPIN on cells
-    if verbose:
-        print ('\nPreparatory SPIN')
-    ix1 = SPIN(data, widlist=_generate_widlist(data, axis=1, step=first_run_step), iters=first_run_iters, axis=1, verbose=verbose)
-    cells_order = cells_order[ix1]
-
-    #For every level of depth DO:
-    for i in range(numLevels): 
-        k=0 # initialize group id counter
-        # For every group generated at the parent level DO:
-        for j in range( len( set(cells_gr_level[:,i]) ) ): 
-            # Extract the a data matrix of the genes at that level
-            g_settmp = nonzero(genes_gr_level[:,i]==j)[0] #indexes of genes in the level j
-            c_settmp = nonzero(cells_gr_level[:,i]==j)[0] #indexes of cells in the level j
-            datatmp = data[ ix_(genes_order[g_settmp], cells_order[c_settmp]) ]
-            # If we are not below the splitting limit for both genes and cells DO:
-            if (len(g_settmp)>split_limit_g) & (len(c_settmp)>split_limit_c): 
-                # Split and SPINsort the two halves
-                if i == numLevels-1:
-                    divided = _divide_to_2and_resort(datatmp, wid=runs_step, iters_spin=runs_iters,\
-                        stop_const=stop_const, low_thrs=low_thrs, sort_genes=True, verbose=verbose)
-                else:
-                    divided = _divide_to_2and_resort(datatmp, wid=runs_step, iters_spin=runs_iters,\
-                        stop_const=stop_const, low_thrs=low_thrs, sort_genes=False,verbose=verbose)
-                # _divide_to_2and_resort retruns an empty array in gr2 if the splitting condition was not satisfied
-                if divided:
-                    sorted_data_resort1, genes_resort1, cells_resort1,\
-                    gr1, gr2, genesgr1, genesgr2, score1, score2 = divided
-                    # Resort from the previous level
-                    genes_order[g_settmp] = genes_order[g_settmp[genes_resort1]]
-                    cells_order[c_settmp] = cells_order[c_settmp[cells_resort1]]
-                    # Assign a numerical identifier to the groups
-                    genes_gr_level[g_settmp[genesgr1],i+1] = k
-                    genes_gr_level[g_settmp[genesgr2],i+1] = k+1
-                    cells_gr_level[c_settmp[gr1],i+1] = k
-                    cells_gr_level[c_settmp[gr2],i+1] = k+1
-                    # Not really clear what sc is
-                    cells_gr_level_sc[c_settmp[gr1],i+1] = score1
-                    cells_gr_level_sc[c_settmp[gr2],i+1] = score2
-                    # Augment the counter of 2 becouse two groups were generated from one
-                    k = k+2
-                else:
-                    # The split is not convenient, keep everithing the same
-                    genes_gr_level[g_settmp,i+1] = k
-                    # if it is the deepest level: perform gene sorting
-                    if i == numLevels-1:
-                        if (datatmp.shape[0] > 2 )and (datatmp.shape[1] > 2):
-                            genes_resort1 = SPIN(datatmp, widlist=runs_step, iters=runs_iters, axis=0, verbose=verbose)
-                            genes_order[g_settmp] = genes_order[g_settmp[genes_resort1]]
-                    cells_gr_level[c_settmp,i+1] = k
-                    cells_gr_level_sc[c_settmp,i+1] = cells_gr_level_sc[c_settmp,i]
-                    # Augment of 1 becouse no new group was generated
-                    k = k+1
-            else:
-                # Below the splitting limit: the split is not convenient, keep everithing the same
-                genes_gr_level[g_settmp,i+1] = k
-                cells_gr_level[c_settmp,i+1] = k
-                cells_gr_level_sc[c_settmp,i+1] = cells_gr_level_sc[c_settmp,i]
-                # Augment of 1 becouse no new group was generated
-                k = k+1
-        
-        # Find boundaries
-        genes_bor_level[i] = r_[0, nonzero(diff(genes_gr_level[:,i+1])>0)[0]+1, data.shape[0] ]
-        cells_bor_level[i] = r_[0, nonzero(diff(cells_gr_level[:,i+1])>0)[0]+1, data.shape[1] ]
-
-    #dataout_sorted = data[ ix_(genes_order,cells_order) ]
-
-    results = Results()
-    results.genes_order = genes_order
-    results.cells_order = cells_order
-    results.genes_gr_level = genes_gr_level
-    results.cells_gr_level = cells_gr_level
-    results.cells_gr_level_sc = cells_gr_level_sc
-    results.genes_bor_level = genes_bor_level
-    results.cells_bor_level = cells_bor_level
-
-    return results
-    
-    
-
-def _divide_to_2and_resort(sorted_data, wid, iters_spin=8, stop_const = 1.15, low_thrs=0.2 , sort_genes=True, verbose=False):
-    '''Core function of backSPIN: split the datamatrix in two and resort the two halves
-
-    Parameters
-    ----------
-    sorted_data: 2-D array
-        the data matrix, rows should be genes and columns single cells/samples
-    wid: float
-        wid parameter to give to widlist parameter of th SPIN fucntion
-    stop_const: float
-        minimum score that a breaking point has to reach to be suitable for splitting
-    low_thrs: float
-        if the difference between the average expression of two groups is lower than threshold the algorythm 
-        uses higly correlated gens to assign the gene to one of the two groups
-    verbose: bool
-        information about the split is printed
-
-    Returns
-    -------
-    '''
-    
-    # Calculate correlation matrix for cells and genes
-    Rcells = corrcoef(sorted_data.T)
-    Rgenes = corrcoef(sorted_data)
-    # Look for the optimal breaking point
-    N = Rcells.shape[0]
-    score = zeros(N)
-    for i in range(2,N-2):
-        if i == 2:
-            tmp1 = sum( Rcells[:i,:i] )
-            tmp2 = sum( Rcells[i:,i:] )
-            score[i] = (tmp1+tmp2) / float(i**2 + (N-i)**2)
-        else:
-            tmp1 += sum(Rcells[i-1,:i]) + sum(Rcells[:i-1,i-1]);
-            tmp2 -= sum(Rcells[i-1:,i-1]) + sum(Rcells[i-1,i:]);
-            score[i] = (tmp1+tmp2) / float(i**2 + (N-i)**2)
-    
-    breakp1 = argmax(score)
-    score1 = Rcells[:breakp1,:breakp1]
-    score1 = triu(score1)
-    score1 = mean( score1[score1 != 0] )
-    score2 = Rcells[breakp1:, breakp1:]
-    score2 = triu(score2)
-    score2 = mean( score2[score2 != 0] )
-    avg_tot = triu(Rcells)
-    avg_tot = mean( avg_tot[avg_tot != 0] )
-
-    # If it is convenient to break
-    if (max([score1,score2])/avg_tot) > stop_const:
-        # Divide in two groups
-        gr1 = arange(N)[:breakp1]
-        gr2 = arange(N)[breakp1:]
-        # and assign the genes into the two groups
-        mean_gr1 = sorted_data[:, gr1].mean(1)
-        mean_gr2 = sorted_data[:, gr2].mean(1)
-        concat_loccenter_gr1 = c_[ calc_loccenter(sorted_data[:,gr1], 2), calc_loccenter(sorted_data[:,gr1][...,::-1], 2) ]
-        concat_loccenter_gr2 = c_[ calc_loccenter(sorted_data[:,gr2], 2), calc_loccenter(sorted_data[:,gr2][...,::-1], 2) ]
-        center_gr1, flip_flag1 = concat_loccenter_gr1.min(1), concat_loccenter_gr1.argmin(1)
-        center_gr2, flip_flag2 = concat_loccenter_gr2.max(1), concat_loccenter_gr2.argmax(1)
-        sorted_data_tmp = array( sorted_data )
-        sorted_data_tmp[ix_(flip_flag1==1,gr1)] = sorted_data[ix_(flip_flag1==1,gr1)][...,::-1]
-        sorted_data_tmp[ix_(flip_flag2==1,gr2)] = sorted_data[ix_(flip_flag2==1,gr2)][...,::-1]
-        loc_center = calc_loccenter(sorted_data_tmp, 2)
-
-        imax = zeros(loc_center.shape)
-        imax[loc_center<=breakp1] = 1
-        imax[loc_center>breakp1] = 2
-
-        genesgr1 = where(imax==1)[0]
-        genesgr2 = where(imax==2)[0]
-        if size(genesgr1) == 0:
-            IN = argmax(mean_gr1)
-            genesgr1 = array([IN])
-            genesgr2 = setdiff1d(genesgr2, IN)
-        elif size(genesgr2) == 0:
-            IN = argmax(mean_gr2)
-            genesgr2 = array([IN])
-            genesgr1 = setdiff1d(genesgr1, IN)
-        
-        if verbose:
-            print ('\nSplitting (%i, %i) ' %  sorted_data.shape)
-            print ('in (%i,%i) ' % (genesgr1.shape[0],gr1.shape[0]))
-            print ('and (%i,%i)' % (genesgr2.shape[0],gr2.shape[0]),end="")
-            sys.stdout.flush()
-
-        # Data of group1
-        datagr1 = sorted_data[ix_(genesgr1,gr1)]
-        # zero center
-        datagr1 = datagr1 - datagr1.mean(1)[:,newaxis]
-        # Resort group1
-        if min( datagr1.shape ) > 1:
-            if sort_genes:
-                genesorder1,cellorder1 = SPIN(datagr1, widlist=wid, iters=iters_spin, axis='both', verbose=verbose)
-            else:
-                cellorder1 = SPIN(datagr1, widlist=wid, iters=iters_spin, axis=1, verbose=verbose)
-                genesorder1 = arange(datagr1.shape[0])
-        elif len(genesgr1) == 1:
-            genesorder1 = 0
-            cellorder1 = argsort( datagr1[0,:] )
-        elif len(gr1) == 1:
-            cellorder1 = 0
-            genesorder1 = argsort( datagr1[:,0] )
-
-        # Data of group2
-        datagr2 = sorted_data[ix_(genesgr2,gr2)]
-        # zero center
-        datagr2 = datagr2 - datagr2.mean(1)[:,newaxis]
-        # Resort group2
-        if min( datagr2.shape )>1:
-            if sort_genes:
-                genesorder2, cellorder2 = SPIN(datagr2, widlist=wid, iters=iters_spin, axis='both',verbose=verbose)
-            else:
-                cellorder2 = SPIN(datagr2, widlist=wid, iters=iters_spin, axis=1,verbose=verbose)
-                genesorder2 = arange(datagr2.shape[0])
-        elif len(genesgr2) == 1:
-            genesorder2 = 0
-            cellorder2 = argsort(datagr2[0,:])
-        elif len(gr2) == 1:
-            cellorder2 = 0
-            genesorder2 = argsort(datagr2[:,0])
-        
-        # contcatenate cells and genes indexes
-        genes_resort1 = r_[genesgr1[genesorder1], genesgr2[genesorder2] ]
-        cells_resort1 = r_[gr1[cellorder1], gr2[cellorder2] ]
-        genesgr1 = arange(len(genesgr1))
-        genesgr2 = arange(len(genesgr1), len(sorted_data[:,0]))
-        # resort
-        sorted_data_resort1 = sorted_data[ix_(genes_resort1,cells_resort1)]
-
-        return sorted_data_resort1, genes_resort1, cells_resort1, gr1, gr2, genesgr1, genesgr2, score1, score2
-
-    else:
-        if verbose:
-            print('Low splitting score was : %.4f' % (max([score1,score2])/avg_tot))
-        return False
-
-
-def fit_CV(mu, cv, fit_method='Exp', svr_gamma=0.06, x0=[0.5,0.5], verbose=False):
-    '''Fits a noise model (CV vs mean)
-    Parameters
-    ----------
-    mu: 1-D array
-        mean of the genes (raw counts)
-    cv: 1-D array
-        coefficient of variation for each gene
-    fit_method: string
-        allowed: 'SVR', 'Exp', 'binSVR', 'binExp' 
-        default: 'SVR'(requires scikit learn)
-        SVR: uses Support vector regression to fit the noise model
-        Exp: Parametric fit to cv = mu^(-a) + b
-        bin: before fitting the distribution of mean is normalized to be
-             uniform by downsampling and resampling.
-    Returns
-    -------
-    score: 1-D array
-        Score is the relative position with respect of the fitted curve
-    mu_linspace: 1-D array
-        x coordiantes to plot (min(log2(mu)) -> max(log2(mu)))
-    cv_fit: 1-D array
-        y=f(x) coordinates to plot 
-    pars: tuple or None
-    
-    '''
-    log2_m = log2(mu)
-    log2_cv = log2(cv)
-    
-    if len(mu)>1000 and 'bin' in fit_method:
-        #histogram with 30 bins
-        n,xi = histogram(log2_m,30)
-        med_n = percentile(n,50)
-        for i in range(0,len(n)):
-            # index of genes within the ith bin
-            ind = where( (log2_m >= xi[i]) & (log2_m < xi[i+1]) )[0]
-            if len(ind)>med_n:
-                #Downsample if count is more than median
-                ind = ind[random.permutation(len(ind))]
-                ind = ind[:len(ind)-med_n]
-                mask = ones(len(log2_m), dtype=bool)
-                mask[ind] = False
-                log2_m = log2_m[mask]
-                log2_cv = log2_cv[mask]
-            elif (around(med_n/len(ind))>1) and (len(ind)>5):
-                #Duplicate if count is less than median
-                log2_m = r_[ log2_m, tile(log2_m[ind], around(med_n/len(ind))-1) ]
-                log2_cv = r_[ log2_cv, tile(log2_cv[ind], around(med_n/len(ind))-1) ]
-    else:
-        if 'bin' in fit_method:
-            print('More than 1000 input feature needed for bin correction.')
-        pass
-                
-    if 'SVR' in fit_method:
-        try:
-            from sklearn.svm import SVR
-            if svr_gamma == 'auto':
-                svr_gamma = 1000./len(mu)
-            #Fit the Support Vector Regression
-            clf = SVR(gamma=svr_gamma)
-            clf.fit(log2_m[:,newaxis], log2_cv)
-            fitted_fun = clf.predict
-            score = log2(cv) - fitted_fun(log2(mu)[:,newaxis])
-            params = None
-            #The coordinates of the fitted curve
-            mu_linspace = linspace(min(log2_m),max(log2_m))
-            cv_fit = fitted_fun(mu_linspace[:,newaxis])
-            return score, mu_linspace, cv_fit , params
-            
-        except ImportError:
-            if verbose:
-                print('SVR fit requires scikit-learn python library. Using exponential instead.')
-            if 'bin' in fit_method:
-                return fit_CV(mu, cv, fit_method='binExp', x0=x0)
-            else:
-                return fit_CV(mu, cv, fit_method='Exp', x0=x0)
-    elif 'Exp' in fit_method:
-        from scipy.optimize import minimize
-        #Define the objective function to fit (least squares)
-        fun = lambda x, log2_m, log2_cv: sum(abs( log2( (2.**log2_m)**(-x[0])+x[1]) - log2_cv ))
-        #Fit using Nelder-Mead algorythm
-        optimization =  minimize(fun, x0, args=(log2_m,log2_cv), method='Nelder-Mead')
-        params = optimization.x
-        #The fitted function
-        fitted_fun = lambda log_mu: log2( (2.**log_mu)**(-params[0]) + params[1])
-        # Score is the relative position with respect of the fitted curve
-        score = log2(cv) - fitted_fun(log2(mu))
-        #The coordinates of the fitted curve
-        mu_linspace = linspace(min(log2_m),max(log2_m))
-        cv_fit = fitted_fun(mu_linspace)
-        return score, mu_linspace, cv_fit , params
-    
-    
-
-def feature_selection(data,thrs, verbose=False):
-    if thrs>= data.shape[0]:
-        if verbose:
-            print ("Trying to select %i features but only %i genes available." %( thrs, data.shape[0]))
-            print ("Skipping feature selection")
-        return arange(data.shape[0])
-    ix_genes = arange(data.shape[0])
-    threeperK = int(ceil(3*data.shape[1]/1000.))
-    zerotwoperK = int(floor(0.3*data.shape[1]/1000.))
-    # is at least 1 molecule in 0.3% of thecells, is at least 2 molecules in 0.03% of the cells
-    condition = (sum(data>=1, 1)>= threeperK) & (sum(data>=2, 1)>=zerotwoperK) 
-    ix_genes = ix_genes[condition]
-    
-    mu = data[ix_genes,:].mean(1)
-    sigma = data[ix_genes,:].std(1, ddof=1)
-    cv = sigma/mu
-
-    try:
-        score, mu_linspace, cv_fit , params = fit_CV(mu,cv,fit_method='SVR', verbose=verbose)
-    except ImportError:
-        print ("WARNING: Feature selection was skipped becouse scipy is required. Install scipy to run feature selection.")
-        return arange(data.shape[0])
- 
-    return ix_genes[argsort(score)[::-1]][:thrs]
-
-def usage_quick():
-
-    message ='''usage: backSPIN [-hbv] [-i inputfile] [-o outputfolder] [-d int] [-f int] [-t int] [-s float] [-T int] [-S float] [-g int] [-c int] [-k float] [-r float]
-    manual: backSPIN -h
-    '''
-    print (message)
-
-def usage():
-
-    message='''
-       backSPIN commandline tool
-       -------------------------
-
-       The options are as follows:
-
-       -i [inputfile]
-       --input=[inputfile]
-              Path of the cef formatted tab delimited file.
-              Rows should be genes and columns single cells/samples.
-              For further information on the cef format visit:
-              https://github.com/linnarsson-lab/ceftools
-
-       -o [outputfile]
-       --output=[outputfile]
-              The name of the file to which the output will be written
-
-       -d [int]
-              Depth/Number of levels: The number of nested splits that will be tried by the algorithm
-       -t [int]
-              Number of the iterations used in the preparatory SPIN.
-              Defaults to 10
-       -f [int]   
-              Feature selection is performed before BackSPIN. Argument controls how many genes are seleceted.
-              Selection is based on expected noise (a curve fit to the CV-vs-mean plot).
-       -s [float]
-              Controls the decrease rate of the width parameter used in the preparatory SPIN.
-              Smaller values will increase the number of SPIN iterations and result in higher 
-              precision in the first step but longer execution time.
-              Defaults to 0.1
-       -T [int]
-              Number of the iterations used for every width parameter.
-              Does not apply on the first run (use -t instead)
-              Defaults to 8
-       -S [float]
-              Controls the decrease rate of the width parameter.
-              Smaller values will increase the number of SPIN iterations and result in higher 
-              precision but longer execution time.
-              Does not apply on the first run (use -s instead)
-              Defaults to 0.3
-       -g [int]
-              Minimal number of genes that a group must contain for splitting to be allowed.
-              Defaults to 2
-       -c [int]
-              Minimal number of cells that a group must contain for splitting to be allowed.
-              Defaults to 2
-       -k [float]
-              Minimum score that a breaking point has to reach to be suitable for splitting.
-              Defaults to 1.15
-       -r [float]
-              If the difference between the average expression of two groups is lower than threshold the algorythm 
-              uses higly correlated genes to assign the gene to one of the two groups
-              Defaults to 0.2
-       -b [axisvalue]
-              Run normal SPIN instead of backSPIN.
-              Normal spin accepts the parameters -T -S
-              An axis value 0 to only sort genes (rows), 1 to only sort cells (columns) or 'both' for both
-              must be passed
-       -v  
-              Verbose. Print  to the stdoutput extra details of what is happening
-
-    '''
-
-    print(message)
-
-
-
-if __name__ == '__main__':
-    print("")
-    #defaults arguments
-    input_path = None
-    outfiles_path = None
-    numLevels=2 # -d
-    feature_fit = False # -f
-    feature_genes = 2000
-    first_run_iters=10 # -t
-    first_run_step=0.1 # -s
-    runs_iters=8 # -T
-    runs_step=0.3 # -S
-    split_limit_g=2 # -g
-    split_limit_c=2 # -c
-    stop_const = 1.15 # -k
-    low_thrs=0.2 # -r
-    normal_spin = False #-b
-    normal_spin_axis = 'both'
-    verbose=False # -v
-
-    optlist, args = getopt.gnu_getopt(sys.argv[1:], "hvi:o:f:d:t:s:T:S:g:c:k:r:b:", ["help", "input=","output="])
-
-    if optlist== [] and args == []:
-        usage_quick()
-        sys.exit()
-    for opt, a in optlist:
-        if opt in ("-h", "--help"):
-            usage()
-            sys.exit()
-        elif opt in ('-i', '--input'):
-            input_path = a
-        elif opt in ("-o", "--output"):
-            outfiles_path = a
-        elif opt == '-d':
-            numLevels = int(a)
-        elif opt == '-f':
-            feature_fit = True
-            if a != '':
-                feature_genes = int(a)
-        elif opt == '-t':
-            first_run_iters = int(a)
-        elif opt == '-s':
-            first_run_step = float(a)
-        elif opt == '-T':
-            runs_iters = int(a)
-        elif opt == '-S':
-            runs_step = float(a)
-        elif opt == '-g':
-            split_limit_g = int(a)
-        elif opt == '-c':
-            split_limit_c = int(a)
-        elif opt == '-k':
-            stop_const = float(a)
-        elif opt == '-r':
-            low_thrs = float(a)
-        elif opt == '-v':
-            verbose = True
-        elif opt == '-b':
-            normal_spin = True
-            if a != '':
-                if a == 'both':
-                    normal_spin_axis = a
-                else:
-                    normal_spin_axis = int(a)
-        else:
-            assert False, "%s option is not supported" % opt
-
-    if input_path == None:
-        print ('No input file was provided.\nYou need to specify an input file\n(e.g. backSPIN -i path/to/your/file/foo.cef)\n')
-        sys.exit()
-    if outfiles_path == None:
-        print ('No output file was provided.\nYou need to specify an output file\n(e.g. backSPIN -o path/to/your/file/bar.cef)\n')
-        sys.exit()
-
-    try:
-        if verbose:
-            print ('Loading file.')
-        input_cef = CEF_obj()
-        input_cef.readCEF(input_path)
-
-        data = array(input_cef.matrix)
-
-        if feature_fit:
-            if verbose:
-                print ("Performing feature selection")
-            ix_features = feature_selection(data, feature_genes, verbose=verbose)
-            if verbose:
-                print ("Selected %i genes" % len(ix_features))
-            data = data[ix_features, :]
-            input_cef.matrix = data.tolist()
-            input_cef.row_attr_values = atleast_2d( array( input_cef.row_attr_values ))[:,ix_features].tolist()
-            input_cef.update()
-        
-        data = log2(data+1)
-        data = data - data.mean(1)[:,newaxis]
-        if data.shape[0] <= 3 and data.shape[1] <= 3:
-            print ('Input file is not correctly formatted.\n')
-            sys.exit()
-    except Exception as err:
-        import traceback
-        print ('There was an error')
-        print (traceback.format_exc())
-        print ('Error occurred in parsing the input file.')
-        print ('Please check that your input file is a correctly formatted cef file.\n')
-        sys.exit()
-
-    if normal_spin == False:
-
-        print ('backSPIN started\n----------------\n')
-        print ('Input file:\n%s\n' % input_path)
-        print ('Output file:\n%s\n' % outfiles_path)
-        print ('numLevels: %i\nfirst_run_iters: %i\nfirst_run_step: %.3f\nruns_iters: %i\nruns_step: %.3f\nsplit_limit_g: %i\nsplit_limit_c: %i\nstop_const: %.3f\nlow_thrs: %.3f\n' % (numLevels, first_run_iters, first_run_step, runs_iters,\
-            runs_step, split_limit_g, split_limit_c, stop_const, low_thrs))
-
-
-        results = backSPIN(data, numLevels, first_run_iters, first_run_step, runs_iters, runs_step,\
-            split_limit_g, split_limit_c, stop_const, low_thrs, verbose)
-
-        sys.stdout.flush()
-        print ('\nWriting output.\n')
-
-        output_cef = CEF_obj()
-
-        for h_name, h_val in zip( input_cef.header_names, input_cef.header_values):
-            output_cef.add_header(h_name, h_val )
-        for c_name, c_val in zip( input_cef.col_attr_names, input_cef.col_attr_values):
-            output_cef.add_col_attr(c_name, array(c_val)[results.cells_order])
-        for r_name, r_val in zip( input_cef.row_attr_names, input_cef.row_attr_values):
-            output_cef.add_row_attr(r_name, array(r_val)[results.genes_order])
-
-        for level, groups in enumerate( results.genes_gr_level.T ):
-            output_cef.add_row_attr('Level_%i_group' % level, [int(el) for el in groups])
-        for level, groups in enumerate( results.cells_gr_level.T ):
-            output_cef.add_col_attr('Level_%i_group' % level, [int(el) for el in groups])
-
-        output_cef.set_matrix(array(input_cef.matrix)[results.genes_order,:][:,results.cells_order])
-        if sum(type(i)==float for i in input_cef.matrix[0]) + sum(type(i)==float for i in input_cef.matrix[-1]) == 0:
-            fmt = '%i'
-        else:
-            fmt ='%.6g'
-        output_cef.writeCEF( outfiles_path, matrix_str_fmt=fmt )
-    else:
-
-        print ('normal SPIN started\n----------------\n')
-        print ('Input file:\n%s\n' % input_path)
-        print ('Output file:\n%s\n' % outfiles_path)
-
-        results = SPIN(data, widlist=runs_step, iters=runs_iters, axis=normal_spin_axis, verbose=verbose)
-
-        print ('\nWriting output.\n')
-
-        output_cef = CEF_obj()
-
-        for h_name, h_val in zip( input_cef.header_names, input_cef.header_values):
-            output_cef.add_header(h_name, h_val )
-
-        if normal_spin_axis == 'both':
-            for c_name, c_val in zip( input_cef.col_attr_names, input_cef.col_attr_values):
-                output_cef.add_col_attr(c_name, array(c_val)[results[1]])
-            for r_name, r_val in zip( input_cef.row_attr_names, input_cef.row_attr_values):
-                output_cef.add_row_attr(r_name, array(r_val)[results[0]])
-            output_cef.set_matrix(array(input_cef.matrix)[results[0],:][:,results[1]])
-
-        if normal_spin_axis == 0:
-            for r_name, r_val in zip( input_cef.row_attr_names, input_cef.row_attr_values):
-                output_cef.add_row_attr(r_name, array(r_val)[results])
-            output_cef.set_matrix(array(input_cef.matrix)[results,:])
-
-        if normal_spin_axis == 1:
-            for c_name, c_val in zip( input_cef.col_attr_names, input_cef.col_attr_values):
-                output_cef.add_col_attr(c_name, array(c_val)[results])
-            output_cef.set_matrix(array(input_cef.matrix)[:,results])
-
-        
-
-        output_cef.writeCEF( outfiles_path )
-

From efd33daca72982cb3b3723557a95698a9f717dac Mon Sep 17 00:00:00 2001
From: Python3pkg <raliclo@gmail.com>
Date: Thu, 18 May 2017 06:47:35 -0700
Subject: [PATCH 3/3] Add Travis-CI

---
 .travis.yml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..7d03ea8
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,20 @@
+sudo: false
+language: python
+python:
+  - "2.7"
+  - "3.2"
+  - "3.3"
+  - "3.4"
+  - "3.5"
+  - "3.6"
+  - "3.7-dev" # 3.7 development branch
+
+install: "pip install -r requirements.txt; pip install tox-travis"
+env:
+  - TOXENV=pep8
+  - TOXENV=coverage
+script: tox
+before_install:
+  - pip install codecov
+after_success:
+  - codecov
\ No newline at end of file