diff --git a/scMatch.py b/scMatch.py
index a69d066..eefa465 100644
--- a/scMatch.py
+++ b/scMatch.py
@@ -6,6 +6,7 @@
 @author: rhou
 """
 import warnings
+
 warnings.filterwarnings("ignore")
 import argparse
 import pandas as pd
@@ -17,10 +18,11 @@
 import multiprocessing
 from functools import partial
 
+
 def SPMAnno(refDB, keepZeros, testMethod, testCol):
     firstLayerHeader = [testCol.columns[0], testCol.columns[0]]
-    
-    #find common genes between test data and ref data
+
+    # find common genes between test data and ref data
     testRows = set(testCol.index)
     refRows = set(refDB.index)
     if keepZeros:
@@ -29,11 +31,11 @@ def SPMAnno(refDB, keepZeros, testMethod, testCol):
         commonRows = list(refRows.intersection(testRows))
     commonRowsLen = len(commonRows)
     thirdLayerHeader = [commonRowsLen, commonRowsLen]
-    
-    #only keep non-zero genes
-    testCol = testCol.loc[commonRows, ].fillna(0.0)
-    testrefDB = refDB.loc[commonRows, ].fillna(0.0)
-    
+
+    # only keep non-zero genes
+    testCol = testCol.reindex(index=commonRows).fillna(0.0)
+    testrefDB = refDB.reindex(index=commonRows).fillna(0.0)
+
     if testMethod == 'Spearman':
         spr_correlation = testrefDB.apply(lambda col: spearmanr(col, testCol)[0], axis=0)
         spr_correlation = spr_correlation.to_frame().fillna(0).round(10).reset_index()
@@ -41,17 +43,20 @@ def SPMAnno(refDB, keepZeros, testMethod, testCol):
         secondLayerHeader = ['sample name', 'Spearman correlation coefficient']
         spr_correlation = spr_correlation.sort_values(by=['Spearman correlation coefficient'], ascending=False)
         spr_correlation = spr_correlation.reset_index(drop=True)
-        return (firstLayerHeader, thirdLayerHeader, secondLayerHeader, spr_correlation, testCol.columns[0], spr_correlation.iloc[0,0], spr_correlation.iloc[0,1])
+        return (firstLayerHeader, thirdLayerHeader, secondLayerHeader, spr_correlation, testCol.columns[0],
+                spr_correlation.iloc[0, 0], spr_correlation.iloc[0, 1])
     elif testMethod == 'Pearson':
         pes_correlation = testrefDB.apply(lambda col: pearsonr(col.to_frame(), testCol)[0], axis=0)
-        pes_correlation = pes_correlation.fillna(0).round(10).T.ix[:,0].reset_index()
+        pes_correlation = pes_correlation.fillna(0).round(10).T.loc[:, 0].reset_index()
         pes_correlation.columns = ['sample name', 'Pearson correlation coefficient']
         secondLayerHeader = ['sample name', 'Pearson correlation coefficient']
         pes_correlation = pes_correlation.sort_values(by=['Pearson correlation coefficient'], ascending=False)
         pes_correlation = pes_correlation.reset_index(drop=True)
-        return (firstLayerHeader, thirdLayerHeader, secondLayerHeader, pes_correlation, testCol.columns[0], pes_correlation.iloc[0,0], pes_correlation.iloc[0,1])
+        return (firstLayerHeader, thirdLayerHeader, secondLayerHeader, pes_correlation, testCol.columns[0],
+                pes_correlation.iloc[0, 0], pes_correlation.iloc[0, 1])
+
 
-#transfer given species gene symbols to hids
+# transfer given species gene symbols to hids
 def TransferToHids(refDS, species, geneList):
     hidCol = 0  # universal id for homology genes
     taxidCol = 1  # species id
@@ -59,51 +64,53 @@ def TransferToHids(refDS, species, geneList):
 
     homoDF = pd.read_csv(os.path.join(refDS, 'homologene.data'), sep='\t', index_col=None, header=None)
     # reduce the list to genes of the given species
-    speciesDF = homoDF.ix[homoDF[taxidCol] == int(species),].set_index(geneSymbolCol)
+    speciesDF = homoDF.loc[homoDF[taxidCol] == int(species),].set_index(geneSymbolCol)
 
     geneDF = speciesDF.loc[speciesDF.index.isin(geneList)]
     notnaGenes = geneDF[geneDF[hidCol].notnull()]
-    notnaGenes = notnaGenes.ix[~notnaGenes[hidCol].duplicated(keep='first'),]
+    notnaGenes = notnaGenes.loc[~notnaGenes[hidCol].duplicated(keep='first'),]
     notnaGenesSymbols = list(notnaGenes.index)
-    notnaGenesHIDs = list(notnaGenes.ix[:, hidCol])
+    notnaGenesHIDs = list(notnaGenes.loc[:, hidCol])
     notnaGenesHIDs = [int(i) for i in notnaGenesHIDs]
     return notnaGenesSymbols, notnaGenesHIDs
 
+
 def AnnSCData(testType, em, refDS, refType, refTypeName, keepZeros, testMethod, coreNum, savefolder):
-    #if across species need to replace symbols to HIDs
+    # if across species need to replace symbols to HIDs
     if testType == refType:
-        #load reference database
+        # load reference database
         refDB = pd.read_csv(os.path.join(refDS, '%s_symbol.csv' % refType), index_col=0, header=0)
     else:
-        #load reference database
+        # load reference database
         refDB = pd.read_csv(os.path.join(refDS, '%s_HID.csv' % refType), index_col=0, header=0)
-            
-        #for different species, transfer symbols to hids
+
+        # for different species, transfer symbols to hids
         geneList = list(em.index)
         geneList, hidList = TransferToHids(refDS, testType, geneList)
-        em = em.ix[~em.index.duplicated(keep='first'),]
-        em = em.ix[geneList, ]
+        em = em.loc[~em.index.duplicated(keep='first'),]
+        em = em.loc[geneList,]
         hidList = [str(i) for i in hidList]
         em.index = hidList
         hidList = [str(i) for i in refDB.index]
         refDB.index = hidList
-        
-    #remove duplicate indices
-    refDB = refDB.ix[~refDB.index.duplicated(keep='first'),]
+
+    # remove duplicate indices
+    # replace as seen here https://github.com/asrhou/scMatch/issues/10
+    refDB = refDB.loc[~refDB.index.duplicated(keep='first'),]
     print('reference dataset shape: %s genes, %s samples' % refDB.shape)
-    em = em.ix[~em.index.duplicated(keep='first'),]
-    
-    #split expression matrix to single-cell expression profiles
+    em = em.loc[~em.index.duplicated(keep='first'),]
+
+    # split expression matrix to single-cell expression profiles
     eps = np.split(em, len(em.columns), axis=1)
-    
-    #annotate single-cell expression profiles in parallel
+
+    # annotate single-cell expression profiles in parallel
     p = multiprocessing.Pool(coreNum)
     func = partial(SPMAnno, refDB, keepZeros, testMethod)
     resultList = p.map(func, eps)
     p.close()
     p.join()
-    
-    #generate final sample annotation expression matrix
+
+    # generate final sample annotation expression matrix
     firstLayerHeader = [item for i in resultList for item in i[0]]
     secondLayerHeader = [item for i in resultList for item in i[1]]
     thirdLayerHeader = [item for i in resultList for item in i[2]]
@@ -112,90 +119,93 @@ def AnnSCData(testType, em, refDS, refType, refTypeName, keepZeros, testMethod,
     tuples = list(zip(*arrays))
     headers = pd.MultiIndex.from_tuples(tuples, names=['identifier', 'tested genes', 'annotation'])
     merged.columns = headers
-    
-    #prepare folder to save annotation result
+
+    # prepare folder to save annotation result
     if not os.path.exists(savefolder):
         os.mkdir(savefolder)
-        
+
     rstFolder = 'annotation_result'
     if keepZeros:
         rstFolder = rstFolder + '_keep_all_genes'
     else:
         rstFolder = rstFolder + '_keep_expressed_genes'
-    
+
     savefolder = os.path.join(savefolder, rstFolder)
     if not os.path.exists(savefolder):
         os.mkdir(savefolder)
-        
-    #save file
+
+    # save file
     print('##########saving annotation results in the folder: %s' % savefolder)
     if len(merged.columns) > 8190:
         colNum = len(merged.columns)
         parts = int(colNum / 8000)
         for partIdx in range(parts):
-            subMerged = merged.iloc[:,8000*partIdx:8000*(partIdx+1)]
-            subMerged.to_excel(os.path.join(savefolder, refTypeName+"_%s_Part%04d.xlsx" % (testMethod, partIdx+1)))
-        subMerged = merged.iloc[:,8000*parts:]
-        subMerged.to_excel(os.path.join(savefolder, refTypeName+"_%s_Part%04d.xlsx" % (testMethod, parts+1)))
+            subMerged = merged.iloc[:, 8000 * partIdx:8000 * (partIdx + 1)]
+            subMerged.to_excel(os.path.join(savefolder, refTypeName + "_%s_Part%04d.xlsx" % (testMethod, partIdx + 1)))
+        subMerged = merged.iloc[:, 8000 * parts:]
+        subMerged.to_excel(os.path.join(savefolder, refTypeName + "_%s_Part%04d.xlsx" % (testMethod, parts + 1)))
     else:
-        merged.to_excel(os.path.join(savefolder, refTypeName+"_%s.xlsx" % testMethod))
-        
-    #save top ann result
-    topAnn = pd.DataFrame({'cell':[i[4] for i in resultList], 'cell type':[''] * len(resultList), 'top sample':[i[5] for i in resultList], 'top correlation score':[i[6] for i in resultList]})
+        merged.to_excel(os.path.join(savefolder, refTypeName + "_%s.xlsx" % testMethod))
+
+    # save top ann result
+    topAnn = pd.DataFrame({'cell': [i[4] for i in resultList], 'cell type': [''] * len(resultList),
+                           'top sample': [i[5] for i in resultList],
+                           'top correlation score': [i[6] for i in resultList]})
     mapData = pd.read_csv(os.path.join(refDS, '%s_map.csv' % refType), index_col=0, header=0)
     for idx in topAnn.index:
-        topAnn.ix[idx, 'cell type'] = mapData.ix[topAnn.ix[idx, 'top sample'], 'cell type']
-    saveNameP = os.path.join(savefolder, refTypeName+"_%s_top_ann.csv" % (testMethod))
-    topAnn.to_csv(saveNameP, index=False, columns = ['cell', 'cell type', 'top sample', 'top correlation score'])
+        topAnn.loc[idx, 'cell type'] = mapData.loc[topAnn.loc[idx, 'top sample'], 'cell type']
+    saveNameP = os.path.join(savefolder, refTypeName + "_%s_top_ann.csv" % (testMethod))
+    topAnn.to_csv(saveNameP, index=False, columns=['cell', 'cell type', 'top sample', 'top correlation score'])
     print('##########DONE!')
-    
+
+
 def SortAnno(testItem):
     oldcols = testItem.columns
     cell = oldcols.get_level_values(0)[0]
     testItem.columns = ["name", "coefficient"]
     testItem = testItem.sort_values(by=['coefficient'], ascending=False)
-    topAnn = testItem.iloc[0,0]
-    topCoff = testItem.iloc[0,1]
+    topAnn = testItem.iloc[0, 0]
+    topCoff = testItem.iloc[0, 1]
     testItem.columns = oldcols
     testItem = testItem.reset_index(drop=True)
     return (testItem, cell, topAnn, topCoff)
 
-#start to annotate test dataset
+
+# start to annotate test dataset
 def main(testType, testFormat, testDS, testGenes, refDS, refTypeList, keepZeros, testMethodList, coreNum):
-    
-    #load test data
+    # load test data
     print('##########loading test data')
     if testFormat == '10x':
         fileItem = glob.glob(os.path.join(testDS, "matrix.mtx"))[0]
         em = io.mmread(fileItem)
         em = em.tocsr().toarray()
         if os.path.exists(os.path.join(opt.testDS, 'genes.tsv')):
-            row = pd.read_table(fileItem[:-10]+"genes.tsv", header=None, index_col=None)
+            row = pd.read_table(fileItem[:-10] + "genes.tsv", header=None, index_col=None)
         else:
-            row = pd.read_table(fileItem[:-10]+"features.tsv", header=None, index_col=None)
-        col = pd.read_table(fileItem[:-10]+"barcodes.tsv", header=None, index_col=None)
+            row = pd.read_table(fileItem[:-10] + "features.tsv", header=None, index_col=None)
+        col = pd.read_table(fileItem[:-10] + "barcodes.tsv", header=None, index_col=None)
         em = pd.DataFrame(em, index=row.T.values[1], columns=col.T.values[0])
         savefolder = testDS
     else:
         em = pd.read_csv(testDS, index_col=0, header=0)
         savefolder = testDS[:-4]
-        
+
     # remove unrelated genes
     geneList = set(em.index)
     if testGenes != 'none':
-        testGeneList = set(pd.read_csv(testGenes,index_col=None, header=0).columns)
+        testGeneList = set(pd.read_csv(testGenes, index_col=None, header=0).columns)
         geneList = testGeneList.intersection(testGeneList)
-        em = em.ix[~em.index.duplicated(keep='first'),]
-        em = em.ix[geneList,]
+        em = em.loc[~em.index.duplicated(keep='first'),]
+        em = em.loc[geneList,]
     print('test dataset shape: %s genes, %s samples' % em.shape)
-    
-    hidSpecDict = {'9606':'human', '10090':'mouse'}
-    
+
+    hidSpecDict = {'9606': 'human', '10090': 'mouse'}
+
     for refType in refTypeList:
         print('##########annotating test data with %s data' % hidSpecDict[refType])
         for testMethod in testMethodList:
             AnnSCData(testType, em, refDS, refType, hidSpecDict[refType], keepZeros, testMethod, coreNum, savefolder)
-            
+
     if len(refTypeList) == 2:
         print('##########merging annotation data')
         rstFolder = 'annotation_result'
@@ -209,64 +219,74 @@ def main(testType, testFormat, testDS, testGenes, refDS, refTypeList, keepZeros,
             tomergeList = [i for i in tomergeList if "_Avg" not in i]
             topAnnList = []
             for tomerge in tomergeList:
-                #merge results
-                humanAnn = pd.read_excel(tomerge, header=[0,1,2])
-                humanAnn.columns = pd.MultiIndex.from_arrays([list(humanAnn.columns.get_level_values(0)),list(humanAnn.columns.get_level_values(2))], names=('identifier', 'annotation'))
-                mouseAnn = pd.read_excel(tomerge.replace('human_','mouse_'), header=[0,1,2])
-                mouseAnn.columns = pd.MultiIndex.from_arrays([list(mouseAnn.columns.get_level_values(0)),list(mouseAnn.columns.get_level_values(2))], names=('identifier', 'annotation'))
+                # merge results
+                humanAnn = pd.read_excel(tomerge, header=[0, 1, 2])
+                humanAnn.columns = pd.MultiIndex.from_arrays(
+                    [list(humanAnn.columns.get_level_values(0)), list(humanAnn.columns.get_level_values(2))],
+                    names=('identifier', 'annotation'))
+                mouseAnn = pd.read_excel(tomerge.replace('human_', 'mouse_'), header=[0, 1, 2])
+                mouseAnn.columns = pd.MultiIndex.from_arrays(
+                    [list(mouseAnn.columns.get_level_values(0)), list(mouseAnn.columns.get_level_values(2))],
+                    names=('identifier', 'annotation'))
                 mergedAnn = pd.concat([humanAnn, mouseAnn])
-                
-                #sort merged results
-                #split merged results to single-cell merged results
-                mergedAnnList = np.split(mergedAnn, len(mergedAnn.columns)/2, axis=1)
-                
-                #annotate single-cell expression profiles in parallel
+
+                # sort merged results
+                # split merged results to single-cell merged results
+                mergedAnnList = np.split(mergedAnn, len(mergedAnn.columns) / 2, axis=1)
+
+                # annotate single-cell expression profiles in parallel
                 p = multiprocessing.Pool(coreNum)
                 resultList = p.map(SortAnno, mergedAnnList)
                 p.close()
                 p.join()
-                
+
                 mergedAnn = pd.concat([i[0] for i in resultList], axis=1)
-                mergedAnn.to_excel(tomerge.replace('human_','combined_'))
-                
-                #save top ann result
-                topAnn = pd.DataFrame({'cell':[i[1] for i in resultList], 'cell type':[''] * len(resultList), 'top sample':[i[2] for i in resultList], 'top correlation score':[i[3] for i in resultList]})
+                mergedAnn.to_excel(tomerge.replace('human_', 'combined_'))
+
+                # save top ann result
+                topAnn = pd.DataFrame({'cell': [i[1] for i in resultList], 'cell type': [''] * len(resultList),
+                                       'top sample': [i[2] for i in resultList],
+                                       'top correlation score': [i[3] for i in resultList]})
                 topAnnList.append(topAnn)
             mergedAnn = pd.concat(topAnnList)
             mapData1 = pd.read_csv(os.path.join(refDS, '9606_map.csv'), index_col=0, header=0)
             mapData2 = pd.read_csv(os.path.join(refDS, '10090_map.csv'), index_col=0, header=0)
             mapData = pd.concat([mapData1, mapData2])
             for idx in mergedAnn.index:
-                mergedAnn.ix[idx, 'cell type'] = mapData.ix[mergedAnn.ix[idx, 'top sample'], 'cell type']
+                mergedAnn.loc[idx, 'cell type'] = mapData.loc[mergedAnn.loc[idx, 'top sample'], 'cell type']
             saveNameP = os.path.join(savefolder, "combined_%s_top_ann.csv" % (testMethod))
-            mergedAnn.to_csv(saveNameP, index=False, columns = ['cell', 'cell type', 'top sample', 'top correlation score'])
-            
+            mergedAnn.to_csv(saveNameP, index=False,
+                             columns=['cell', 'cell type', 'top sample', 'top correlation score'])
+
         print('##########DONE!')
 
+
 if __name__ == "__main__":
-    
-    #process arguments
+
+    # process arguments
     parser = argparse.ArgumentParser()
-    parser.add_argument('--refType', default='human', help='human (default) | mouse | both ') 
-    parser.add_argument('--testType', default='human', help='human (default) | mouse | rat | chimpanzee |zebrafish') 
+    parser.add_argument('--refType', default='human', help='human (default) | mouse | both ')
+    parser.add_argument('--testType', default='human', help='human (default) | mouse | rat | chimpanzee |zebrafish')
     parser.add_argument('--refDS', required=True, help='path to the folder of reference dataset(s)')
     parser.add_argument('--dFormat', default='10x', help='10x (default) | csv')
-    parser.add_argument('--testDS', required=True, help='path to the folder of test dataset if dtype is 10x, otherwise, the path to the file')
+    parser.add_argument('--testDS', required=True,
+                        help='path to the folder of test dataset if dtype is 10x, otherwise, the path to the file')
     parser.add_argument('--testMethod', default='s', help='s[pearman] (default) | p[earson] | both')
-    parser.add_argument('--testGenes', default='none', help='optional, path to the csv file whose first row is the genes used to calculate the correlation coefficient')
+    parser.add_argument('--testGenes', default='none',
+                        help='optional, path to the csv file whose first row is the genes used to calculate the correlation coefficient')
     parser.add_argument('--keepZeros', default='yes', help='y[es] (default) | n[o]')
     parser.add_argument('--coreNum', type=int, default=1, help='number of the cores to use, default=1')
-    
+
     opt = parser.parse_args()
-    
-    #check testType
-    avaSpecDict = {'human':'9606', 'mouse':'10090', 'rat':'10116', 'chimpanzee':'9598', 'zebrafish':'7955'}
+
+    # check testType
+    avaSpecDict = {'human': '9606', 'mouse': '10090', 'rat': '10116', 'chimpanzee': '9598', 'zebrafish': '7955'}
     if opt.testType.lower() not in avaSpecDict.keys():
         sys.exit("The testType can only be 'human', 'mouse', 'rat', 'chimpanzee' or 'zebrafish'.")
     else:
         testType = avaSpecDict[opt.testType]
-        
-    #check testMethod
+
+    # check testMethod
     if opt.testMethod.lower() == "p":
         testMethodList = ['Pearson']
     elif opt.testMethod.lower() == "s":
@@ -275,16 +295,16 @@ def main(testType, testFormat, testDS, testGenes, refDS, refTypeList, keepZeros,
         testMethodList = ['Pearson', 'Spearman']
     else:
         sys.exit("The testMethod can only be 's', 'p' or 'both'.")
-    
-    #check keepZeros
+
+    # check keepZeros
     if opt.keepZeros.lower()[0] == "n":
         keepZeros = False
     elif opt.keepZeros.lower()[0] == "y":
         keepZeros = True
     else:
         sys.exit("The keepZeros can only be 'y[es]' or 'n[o]'.")
-    
-    #check refType
+
+    # check refType
     if opt.refType.lower() == "both":
         refTypeList = ['9606', '10090']
     elif opt.refType.lower() == "human":
@@ -293,8 +313,8 @@ def main(testType, testFormat, testDS, testGenes, refDS, refTypeList, keepZeros,
         refTypeList = ['10090']
     else:
         sys.exit("The refType can only be 'human', 'mouse' or 'both'.")
-        
-    #check refDS
+
+    # check refDS
     if not os.path.exists(opt.refDS):
         sys.exit("The folder of reference dataset does not exist.")
     for refType in refTypeList:
@@ -304,8 +324,8 @@ def main(testType, testFormat, testDS, testGenes, refDS, refTypeList, keepZeros,
         else:
             if not os.path.exists(os.path.join(opt.refDS, '%s_HID.csv' % refType)):
                 sys.exit("The reference dataset '%s_HID.csv' dose not exist." % refType)
-        
-    #check dFormat and testDS
+
+    # check dFormat and testDS
     if opt.dFormat.lower() == 'csv':
         if not os.path.exists(opt.testDS):
             sys.exit("The test dataset file does not exist.")
@@ -321,18 +341,18 @@ def main(testType, testFormat, testDS, testGenes, refDS, refTypeList, keepZeros,
                 sys.exit("Cannot find 'genes.tsv' or 'features.tsv' file in the folder of test dataset.")
         if not os.path.exists(os.path.join(opt.testDS, 'barcodes.tsv')):
             sys.exit("Cannot find 'barcodes.tsv' file in the folder of test dataset.")
-            
-    #check gene list 
+
+    # check gene list
     if opt.testGenes != 'none' and opt.testGenes.endswith('.csv'):
         if not os.path.exists(opt.testGenes):
             sys.exit("Cannot find CSV file to obtain the test gene list.")
-    
-    #check coreNum
+
+    # check coreNum
     maxCoreNum = multiprocessing.cpu_count()
     if opt.coreNum > maxCoreNum:
         sys.exit("There are only %s cores available, less than %s cores." % (maxCoreNum, opt.coreNum))
-        
-    #pass argument check, show input data
+
+    # pass argument check, show input data
     print('===================================================')
     print('Input data:')
     print('Reference species: %s' % opt.refType)
@@ -352,7 +372,8 @@ def main(testType, testFormat, testDS, testGenes, refDS, refTypeList, keepZeros,
         print('Test gene list: %s' % opt.testGenes)
     print('The number of cores to use: %s' % opt.coreNum)
     print('===================================================')
-    
-    #start to annotate test dataset
-    main(testType, opt.dFormat, opt.testDS, opt.testGenes, opt.refDS, refTypeList, keepZeros, testMethodList, opt.coreNum)
-    #python scMatch.py --refDS FANTOM5 --dFormat csv --testDS GSE81861_Cell_Line_COUNT.csv --coreNum 4
+
+    # start to annotate test dataset
+    main(testType, opt.dFormat, opt.testDS, opt.testGenes, opt.refDS, refTypeList, keepZeros, testMethodList,
+         opt.coreNum)
+    # python scMatch.py --refDS FANTOM5 --dFormat csv --testDS GSE81861_Cell_Line_COUNT.csv --coreNum 4