diff --git a/PR_XTF_readme.md b/PR_XTF_readme.md
new file mode 100644
index 0000000..812a48b
--- /dev/null
+++ b/PR_XTF_readme.md
@@ -0,0 +1,36 @@
+## Pull Request Title: Enhancements and New Features for Genetic Data Processing
+Code only modified for `finemapper.py`
+### Summary of Changes
+
+This PR introduces several updates and new features to improve the handling and processing of genetic data. Key changes include:
+
+1. **Support for BGEN to  cal LD and save to .npz Conversion to accelate the loading speed compared to .bcor:**
+    
+    - Implemented functionality for converting BGEN files directly to LD matrices stored in .npz format, aligning with polyFun standards. Use `--geno genofile --ldstore2 $(which ldstore) --cache-dir ./ --cache-format npz` to save in npz format by default.
+2. **NPZ File Reading Capability:**
+    
+    - Added capability to read npz files using `--ld your_npz_prefix`.
+3. **PGEN File Support:**
+	- This may be slower than ldstore2 did!
+    - Introduced support for using pgen files as input via `--geno pgen_file_prefix`.
+    - Integrated `finemap_tools` for invoking Plink2 (version must be later than PLINK v2.00a6LM 64-bit Intel, dated 2 Mar 2024) to compute LD, using the command template: `plink2 --r2-unphased square`.
+    - **Note:** The `--geno` option matches files using prefixes, with bed files having higher priority over pgen to avoid conflicts when both file types are present.
+5. **Improvements in LD Matrix Handling:**
+    
+    - Removed the assertion that LD matrices must not contain any NaN values during reading. Instead, i modified `sync_ld_sumstats` function to exclude SNPs with NA values.
+    - Addressed cases where an SNP's LD calculation might result in NaN due to unfiltered genotype data or extreme scenarios. Users are advised to consider stricter QC or automatically exclude SNPs with NA in any LD calculation based on the number of SNPs dropped, as indicated by the output.
+5. **Enhancements in Summary Statistics (sumstats) Loading:**
+    
+    - For sumstats files that are bgz compressed and have an associated .tbi file, implemented reading via the `tabix` command-line tool. This approach is particularly efficient for genome-wide sumstats, allowing direct retrieval of data by chromosome, significantly reducing loading times.
+    - In scenarios where `finemap_tools` is unavailable, the original logic of reading the entire file will be followed.
+    - Sumstats files organized according to polyFun requirements (columns: SNP, A1, A2, BP, CHR, with SNP in the first column) can be processed using `tabix -s 2 -b 3 -e 3 -c S sumstats_with_bgz_compressed.bgz`.
+6. **Integration of `finemap_tools` Package:**
+    
+    - Included `finemap_tools` for filtering bialleic and ambiguous alleles during sumstats reading.
+7. **Code Formatting Updates:**
+    
+    - Applied code formatting improvements using Black.
+
+### Future Developments
+
+Further development and updates will continue in my own repository and will not be submitted as pull requests to this project.
\ No newline at end of file
diff --git a/finemapper.py b/finemapper.py
index 8f4dc70..a12aa30 100644
--- a/finemapper.py
+++ b/finemapper.py
@@ -23,6 +23,20 @@
 from urllib.parse import urlparse
 from packaging.version import Version
 
+from pathlib import Path
+
+def is_finemap_tools_installed():
+    try:
+        import finemap_tools
+        from finemap_tools.reader import is_tabix_installed
+        if  not is_tabix_installed():
+            logging.warning('finemap_tools is installed but tabix is not installed. Please install tabix to use the finemap_tools package to read tabix-indexed files')
+            return False
+        
+        return True
+    except:
+        return False
+
 
 def splash_screen():
     print('*********************************************************************')
@@ -31,8 +45,6 @@ def splash_screen():
     print('* (C) 2019-2022 Omer Weissbrod')
     print('*********************************************************************')
     print()
-    
-    
 
 
 def uri_validator(x):
@@ -44,52 +56,54 @@ def uri_validator(x):
         return all([result.scheme, result.netloc, result.path])
     except:
         return False
-    
-    
-    
+
+
 def load_ld_npz(ld_prefix):
 
     logging.info('Loading LD file %s'%(ld_prefix))
     t0 = time.time()
-        
-    #load SNPs info
+
+    # load SNPs info
     snps_filename_parquet = ld_prefix+'.parquet'
     snps_filename_gz = ld_prefix+'.gz'
     if os.path.exists(snps_filename_parquet):
         df_ld_snps = pd.read_parquet(snps_filename_parquet)
     elif os.path.exists(snps_filename_gz):
-        df_ld_snps = pd.read_table(snps_filename_gz, sep='\s+')
-        df_ld_snps.rename(columns={'allele1':'A1', 'allele2':'A2', 'position':'BP', 'chromosome':'CHR', 'rsid':'SNP'}, inplace=True, errors='ignore')
+        df_ld_snps = pd.read_table(snps_filename_gz, sep="\s+")
+
+        # df_ld_snps.rename(columns={'allele1':'A1', 'allele2':'A2', 'position':'BP', 'chromosome':'CHR', 'rsid':'SNP'}, inplace=True, errors='ignore')
     else:
         raise ValueError('couldn\'t find SNPs file %s or %s'%(snps_filename_parquet, snps_filename_gz))
-        
-    #load LD matrix
+
+    # load LD matrix
     R_filename = ld_prefix+'.npz'
     if not os.path.exists(R_filename):
         raise IOError('%s not found'%(R_filename))
     ld_arr = sparse.load_npz(R_filename).toarray()
     ld_arr = ld_arr+ld_arr.T
     assert np.allclose(np.diag(ld_arr), 1.0)
-    assert np.all(~np.isnan(ld_arr))
-    
-    #sanity checks
+    # assert np.all(~np.isnan(ld_arr))
+
+    # sanity checks
     assert ld_arr.shape[0] == ld_arr.shape[1]
     if ld_arr.shape[0] != df_ld_snps.shape[0]:
         raise ValueError('LD matrix has a different number of SNPs than the SNPs file')
-    
+
     logging.info('Done in %0.2f seconds'%(time.time() - t0))
     return ld_arr, df_ld_snps
-    
-    
-    
-    
+
+
 def get_bcor_meta(bcor_obj):
     df_ld_snps = bcor_obj.getMeta()
     df_ld_snps.rename(columns={'rsid':'SNP', 'position':'BP', 'chromosome':'CHR', 'allele1':'A1', 'allele2':'A2'}, inplace=True, errors='raise')
     ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int64)
     df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int64)
     return df_ld_snps
-    
+
+
+def ldstore_txt_to_npz(ldstore_txt_file, z_file, output_file):
+    pass
+
 
 def load_ld_bcor(ld_prefix):
     bcor_file = ld_prefix+'.bcor'
@@ -100,13 +114,13 @@ def load_ld_bcor(ld_prefix):
     bcor_obj = bcor(bcor_file)
     df_ld_snps = get_bcor_meta(bcor_obj)
     ld_arr = bcor_obj.readCorr([])
-    assert np.all(~np.isnan(ld_arr))
+    # assert np.all(~np.isnan(ld_arr))
     logging.info('Done in %0.2f seconds'%(time.time() - t0))
     return ld_arr, df_ld_snps
-    
-    
+
+
 def read_ld_from_file(ld_file):
-    #if ld_file is a prefix, make it into a full file name
+    # if ld_file is a prefix, make it into a full file name
     if not ld_file.endswith('.bcor') and not ld_file.endswith('.npz'):
         if os.path.exists(ld_file+'.npz'):
             ld_file = ld_file + '.npz'
@@ -115,18 +129,19 @@ def read_ld_from_file(ld_file):
         else:
             raise IOError('No suitable LD file found')
 
-    #read the LD file
+    # read the LD file
     if ld_file.endswith('.bcor'):
-        ld_arr, df_ld_snps = load_ld_bcor(ld_file[:-5])
+        ld_arr, df_ld_snps = load_ld_bcor(ld_file[:-5])  # TODO: modify
     elif ld_file.endswith('.npz'):
-        ld_arr, df_ld_snps = load_ld_npz(ld_file[:-4])
+        ld_arr, df_ld_snps = load_ld_npz(ld_file[:-4])  # TODO:modify
     else:
         raise ValueError('unknown LD format')
-    assert np.all(~np.isnan(ld_arr))
+    is_na_ld = np.all(~np.isnan(ld_arr)) # only keep this and I suppose this check is no need, only thing could do is to avoid the nan in the ld_arr, and drop them all.
+    logging.warning(f"there are {np.isnan(ld_arr).sum(axis=1).shape[0]} nan in R matrix in the ld_arr")
+
     return ld_arr, df_ld_snps
-    
-    
-    
+
+
 def download_ld_file(url_prefix):
     temp_dir = tempfile.mkdtemp()
     filename_prefix = os.path.join(temp_dir, 'ld')
@@ -141,9 +156,9 @@ def download_ld_file(url_prefix):
                     raise ValueError('URL %s wasn\'t found'%(url))
                 else:
                     raise
-                
+
     return filename_prefix
-    
+
 
 def run_executable(cmd, description, good_returncode=0, measure_time=True, check_errors=True, show_output=False, show_command=False):
     proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -158,7 +173,7 @@ def run_executable(cmd, description, good_returncode=0, measure_time=True, check
                 line_str = line.strip().decode("utf-8")
                 stdout.append(line_str)
                 print(line_str)
-        print()
+        print()  #  Why: to add a newline after the last line of the output
         stdout = '\n'.join(stdout)
         _, stderr = proc.communicate()
     else:
@@ -169,8 +184,8 @@ def run_executable(cmd, description, good_returncode=0, measure_time=True, check
     if stderr is not None:
         stderr = stderr.decode('ascii')
         if len(stderr)==0: stderr=None        
-        
-    #if (stderr is not None or proc.returncode != good_returncode):
+
+    # if (stderr is not None or proc.returncode != good_returncode):
     if proc.returncode != good_returncode:
         if stderr is not None:            
             logging.error('stderr:\n%s'%(stderr))
@@ -179,56 +194,60 @@ def run_executable(cmd, description, good_returncode=0, measure_time=True, check
         raise RuntimeError('%s error'%(description))
     if measure_time:
         logging.info('done in %0.2f seconds'%(time.time() - t0))
-        
+
     if check_errors and stdout is not None:        
         for l in stdout.split('\n'):
             if 'error' in l.lower():
                 logging.error(l)
                 raise RuntimeError('%s reported an error'%(description))
     if check_errors and stderr is not None:
-            for l in stderr.split('\n'):
-                if 'error' in l.lower():
-                    logging.error(l)
-                    raise RuntimeError('%s reported an error'%(description))
-        
+        for l in stderr.split("\n"):
+            if "error" in l.lower():
+                logging.error(l)
+                raise RuntimeError("%s reported an error" % (description))
+
     return stdout, stderr
-    
-    
+
+
 def save_ld_to_npz(ld_arr, df_ld_snps, npz_file):
-    
+
     assert npz_file.endswith('.npz')
     logging.info('Saving LD file %s'%(npz_file))
     t0 = time.time()
 
-    #save meta file
+    # save meta file
     meta_file = npz_file[:-4] + '.gz'
     df_ld_snps.to_csv(meta_file, sep='\t', index=False)
-    
-    #save .npz file
+
+    # save .npz file
     R = np.tril(ld_arr).astype(np.float64)
     np.fill_diagonal(R, np.diag(R)/2.0)    
     R = sparse.coo_matrix(R)
     sparse.save_npz(npz_file, R, compressed=True)
     logging.info('Done in %0.2f seconds'%(time.time() - t0))
-        
-    
 
 
-class Fine_Mapping(object):
-    def __init__(self, genotypes_file, sumstats_file, n, chr_num, ldstore_exe, 
-                    sample_file=None, incl_samples=None, cache_dir=None, n_threads=None, memory=None,
-                    allow_swapped_indel_alleles=False):
-    
-        #check that data is valid
-        if genotypes_file is not None:        
-            if genotypes_file.endswith('.bgen'):
-                if sample_file is None:
-                    raise IOError('sample-file must be provided with a bgen file')
-    
-    
-        #read sumstats and filter to target chromosome only
-        logging.info('Loading sumstats file...')
-        t0 = time.time()
+def load_sumstats(sumstats_file, chr_num, allow_swapped_indel_alleles=False):
+    # read sumstats and filter to target chromosome only
+    logging.info("Loading sumstats file...")
+    t0 = time.time()
+
+
+    if (
+        sumstats_file.endswith(".gz")
+        and Path(sumstats_file + ".tbi").exists()
+        and is_finemap_tools_installed()
+    ):
+
+        from finemap_tools.reader import tabix_reader
+
+        df_sumstats = tabix_reader(sumstats_file, region=f"{chr_num}")
+        if all([isinstance(i, str) for i in df_sumstats.iloc[0].values]):
+            logging.info(
+                f"this tabix with header info in simply load and the first row is {df_sumstats.iloc[:1].values}"
+            )
+
+    else:
         try:
             df_sumstats = pd.read_parquet(sumstats_file)
         except (ArrowIOError, ArrowInvalid):
@@ -237,13 +256,99 @@ def __init__(self, genotypes_file, sumstats_file, n, chr_num, ldstore_exe,
             raise IOError('sumstats file does not include any SNPs in chromosome %s'%(chr_num))
         if np.any(df_sumstats['CHR'] != chr_num):
             df_sumstats = df_sumstats.query('CHR==%s'%(chr_num)).copy()
-        df_sumstats = set_snpid_index(df_sumstats, allow_swapped_indel_alleles=allow_swapped_indel_alleles)
-        if 'P' not in df_sumstats.columns:
-            df_sumstats['P'] = stats.chi2(1).sf(df_sumstats['Z']**2)
-        logging.info('Loaded sumstats for %d SNPs in %0.2f seconds'%(df_sumstats.shape[0], time.time()-t0))
 
-        
-        #save class members
+    df_sumstats = set_snpid_index(
+        df_sumstats, allow_swapped_indel_alleles=allow_swapped_indel_alleles
+    )
+
+    if "P" not in df_sumstats.columns:
+        df_sumstats["P"] = stats.chi2(1).sf(df_sumstats["Z"] ** 2)
+    logging.info(
+        "Loaded sumstats for %d SNPs in %0.2f seconds"
+        % (df_sumstats.shape[0], time.time() - t0)
+    )
+    ## filter pipline provided by finemap_tools (if not installed will pass)
+
+    if is_finemap_tools_installed:
+        from finemap_tools.utils import add_ID
+        from finemap_tools.snpfilter import filter_pipline
+
+        df_sumstats["added_id"] = add_ID(df_sumstats, ["CHR", "BP", "A1", "A2"])
+
+        logging.info(
+            f"filtering SNP by finemap_tools with {df_sumstats.shape[0]} SNP at begining........"
+        )
+        df_sumstats = filter_pipline(sumstats=df_sumstats, id_col="added_id")
+
+        logging.info(f"after filtering, left {df_sumstats.shape[0]} SNP")
+        df_sumstats = df_sumstats.drop(columns=["added_id"])
+    return df_sumstats
+
+
+class Fine_Mapping(object):
+    def __init__(
+        self,
+        genotypes_file,
+        sumstats_file,
+        n,
+        chr_num,
+        ldstore_exe,
+        sample_file=None,
+        incl_samples=None,
+        cache_dir=None,
+        cache_format=None,
+        n_threads=None,
+        memory=None,
+        allow_swapped_indel_alleles=False,
+    ):
+
+        # check that data is valid
+        if genotypes_file is not None:
+            if genotypes_file.endswith(".bgen"):
+                if sample_file is None:
+                    raise IOError("sample-file must be provided with a bgen file")
+
+        df_sumstats = load_sumstats(
+            sumstats_file,
+            chr_num,
+            allow_swapped_indel_alleles=allow_swapped_indel_alleles,
+        )
+
+        # # read sumstats and filter to target chromosome only
+        # logging.info('Loading sumstats file...')
+        # t0 = time.time()
+        # try:
+        #     df_sumstats = pd.read_parquet(sumstats_file)
+        # except (ArrowIOError, ArrowInvalid):
+        #     df_sumstats = pd.read_table(sumstats_file, sep='\s+')
+        # if not np.any(df_sumstats['CHR'] == chr_num):
+        #     raise IOError('sumstats file does not include any SNPs in chromosome %s'%(chr_num))
+        # if np.any(df_sumstats['CHR'] != chr_num):
+        #     df_sumstats = df_sumstats.query('CHR==%s'%(chr_num)).copy()
+        # df_sumstats = set_snpid_index(df_sumstats, allow_swapped_indel_alleles=allow_swapped_indel_alleles)
+        # if 'P' not in df_sumstats.columns:
+        #     df_sumstats['P'] = stats.chi2(1).sf(df_sumstats['Z']**2)
+        # logging.info('Loaded sumstats for %d SNPs in %0.2f seconds'%(df_sumstats.shape[0], time.time()-t0))
+
+        # ## filter pipline provided by finemap_tools (if not installed will pass)
+        # try:
+        #     from finemap_tools.utils import add_ID
+        #     from finemap_tools.snpfilter import filter_pipline
+
+        #     df_z["added_id"] = add_ID(df_z, ["CHR", "BP", "A1", "A2"])
+
+        #     logging.info(
+        #         f"filtering SNP by finemap_tools with {df_z.shape[0]} SNP at begining........"
+        #     )
+        #     df_z = filter_pipline(df_z, id_col="added_id")
+
+        #     logging.info(f"after filtering, left {df_z.shape[0]} SNP")
+        #     df_z = df_z.drop(columns=["added_id"])
+        # except:
+        #     logging.info("finemap_tools not installed, will not filter SNPs")
+        #     pass
+
+        # save class members
         self.genotypes_file = genotypes_file
         self.n = n
         self.sample_file = sample_file
@@ -251,23 +356,30 @@ def __init__(self, genotypes_file, sumstats_file, n, chr_num, ldstore_exe,
         self.incl_samples = incl_samples
         self.ldstore_exe = ldstore_exe
         self.cache_dir = cache_dir
+        self.cache_format = cache_format
         self.n_threads = n_threads
         self.chr = chr_num
         self.memory = memory
         self.allow_swapped_indel_alleles = allow_swapped_indel_alleles
-        
-        
+
     def sync_ld_sumstats(self, ld_arr, df_ld_snps, allow_missing=False):
         df_ld_snps = set_snpid_index(df_ld_snps, allow_swapped_indel_alleles=self.allow_swapped_indel_alleles)
-        
+
         if ld_arr is None:
             df_ld = pd.DataFrame(np.zeros(len(df_ld_snps.index), dtype=np.int64), index=df_ld_snps.index, columns=['dummy'])
         else:
             assert ld_arr.shape[0] == df_ld_snps.shape[0]
             assert ld_arr.shape[0] == ld_arr.shape[1]
             df_ld = pd.DataFrame(ld_arr, index=df_ld_snps.index, columns=df_ld_snps.index)
-        
-        #make sure that all SNPs in the sumstats file are in the LD file
+        # TODO: rm some LD with NaNs
+
+        have_na_snp = df_ld[df_ld.isna().sum() >= 1].index.tolist()
+        logging.info(
+            f"remove {len(have_na_snp)} SNPs with NaNs in LD, this should be think carefully."
+        )
+        df_ld = df_ld.drop(index=have_na_snp, columns=have_na_snp)
+
+        # make sure that all SNPs in the sumstats file are in the LD file
         snps_in_ld_file = self.df_sumstats_locus.index.isin(df_ld.index)
         if not np.all(snps_in_ld_file):
             # Could the missing SNPs be due to mismatched indel alleles?
@@ -286,11 +398,11 @@ def sync_ld_sumstats(self, ld_arr, df_ld_snps, allow_missing=False):
                             ' these omitted SNPs may be causal, in which case you may get false positive results...'
                             ' If there should be no missing SNPs (e.g. you are using insample LD), see the flag --allow-swapped-indel-alleles')
                 raise ValueError(error_msg)
-        
-        #make sure that df_sumstats_locus is not empty
+
+        # make sure that df_sumstats_locus is not empty
         assert self.df_sumstats_locus.shape[0] > 0, 'no SNPs found in df_sumstats_locus. Please double-check that the SNPs in the LD file and in the sumstats file have the exact same positions'
-        
-        #filter LD to only SNPs found in the sumstats file
+
+        # filter LD to only SNPs found in the sumstats file
         assert not np.any(self.df_sumstats_locus.index.duplicated())
         if df_ld.shape[0] != self.df_sumstats_locus.shape[0] or np.any(df_ld.index != self.df_sumstats_locus.index):
             if ld_arr is None:
@@ -299,43 +411,41 @@ def sync_ld_sumstats(self, ld_arr, df_ld_snps, allow_missing=False):
                 df_ld = df_ld.loc[self.df_sumstats_locus.index, self.df_sumstats_locus.index]
             df_ld_snps = df_ld_snps.loc[df_ld.index]
 
-        #do a final verification that we're synced
+        # do a final verification that we're synced
         assert np.all(df_ld.index == self.df_sumstats_locus.index)
         assert np.all(df_ld_snps.index == self.df_sumstats_locus.index)
-        
-        #add leading zero to sumstats CHR column if needed
+
+        # add leading zero to sumstats CHR column if needed
         if np.any(df_ld_snps['CHR'].astype(str).str.startswith('0')):
             self.df_sumstats_locus = self.df_sumstats_locus.copy()
             self.df_sumstats_locus['CHR'] = self.df_sumstats_locus['CHR'].astype(str)
             is_1digit = self.df_sumstats_locus['CHR'].str.len()==1
             self.df_sumstats_locus.loc[is_1digit, 'CHR'] = '0' + self.df_sumstats_locus.loc[is_1digit, 'CHR']
-            
-        #update self.df_ld
+
+        # update self.df_ld
         self.df_ld = df_ld
         self.df_ld_snps = df_ld_snps
         assert self.df_ld.notnull().all().all()
-        
-
 
     def find_cached_ld_file(self, locus_start, locus_end, need_bcor=False):
-    
-        #if there's no cache dir, return None
+
+        # if there's no cache dir, return None
         if self.cache_dir is None:
             return None
-    
+
         if self.incl_samples is None:
             fname_pattern = '%s.%d'%(os.path.basename(self.genotypes_file), self.chr)
         else:
             fname_pattern = '%s.%s.%d'%(os.path.basename(self.genotypes_file), os.path.basename(self.incl_samples), self.chr)
-            
-        #search for suitable LD files
+
+        # search for suitable LD files
         bcor_files = glob.glob(os.path.join(self.cache_dir, fname_pattern+'*.bcor'))
         npz_files = glob.glob(os.path.join(self.cache_dir, fname_pattern+'*.npz'))
         if need_bcor:
             ld_files = bcor_files + npz_files
         else:
             ld_files = npz_files + bcor_files
-            
+
         for ld_file in ld_files:
             if os.stat(ld_file).st_size==0:
                 os.remove(ld_file)
@@ -345,12 +455,16 @@ def find_cached_ld_file(self, locus_start, locus_end, need_bcor=False):
             bp2 = int(ld_basename.split('.')[-2])
             assert bp1 < bp2
             if (bp1 > locus_start) or (bp2 < locus_end): continue
-            
-            #get the list of SNPs in the LD file
+
+            # get the list of SNPs in the LD file
             if ld_file.endswith('.npz'):
                 meta_file = ld_file[:-4] + '.gz'
                 if not os.path.exists(meta_file): continue
-                df_ld_snps = pd.read_table(meta_file)
+                df_ld_snps = pd.read_table(
+                    meta_file,
+                    sep="\s+",
+                    usecols=["A1", "A2", "BP", "CHR", "SNP"],
+                )
             elif ld_file.endswith('.bcor'):
                 bcor_obj = bcor(ld_file)
                 df_ld_snps = bcor_obj.getMeta()
@@ -359,19 +473,19 @@ def find_cached_ld_file(self, locus_start, locus_end, need_bcor=False):
                 ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int64)
                 df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int64)
             else:
-                raise IOError('unknown file extension')
+                raise IOError("unknown file extension")
+
             df_ld_snps = set_snpid_index(df_ld_snps, allow_swapped_indel_alleles=self.allow_swapped_indel_alleles)
-            
-            #make sure that the LD file includes data for all the SNPs in the locus
+
+            # make sure that the LD file includes data for all the SNPs in the locus
             if not np.all(self.df_sumstats_locus.index.isin(df_ld_snps.index)):
                 logging.warning('The available cached LD file was ignored because it does not contain data for all the SNPs in the locus')
                 continue
-    
-            #if we got here than we found a suitable d file
+
+            # if we got here than we found a suitable d file
             logging.info('Found a cached LD file containing all SNPs with sumstats in chromosome %d BP %d-%d: %s'%(self.chr, locus_start, locus_end, ld_file))
             return ld_file
-            
-            
+
     def get_ld_output_file_prefix(self, locus_start, locus_end, output_dir=None):
         if self.cache_dir is None:
             if output_dir is None:
@@ -382,16 +496,14 @@ def get_ld_output_file_prefix(self, locus_start, locus_end, output_dir=None):
                 output_prefix = os.path.join(self.cache_dir, '%s.%d.%d.%d'%(os.path.basename(self.genotypes_file), self.chr, locus_start, locus_end))
             else:
                 output_prefix = os.path.join(self.cache_dir, '%s.%s.%d.%d.%d'%(os.path.basename(self.genotypes_file), os.path.basename(self.incl_samples), self.chr, locus_start, locus_end))
-                
+
         return output_prefix
-        
-            
+
     def compute_ld_bgen(self, locus_start, locus_end, verbose=False):
-    
-        #create df_z
+
+        # create df_z
         df_z = self.df_sumstats_locus[['SNP', 'CHR', 'BP', 'A1', 'A2']].copy()
-        
-        #add leading zeros to chromosome numbers if needed
+
         try:
             import bgen
         except (ImportError, ModuleNotFoundError):
@@ -401,26 +513,28 @@ def compute_ld_bgen(self, locus_start, locus_end, verbose=False):
         bgen_chromosomes = bfile.chroms()
         if bgen_chromosomes[0].startswith('0'):
             df_z['CHR'] = '0' + df_z['CHR'].astype(str)
-            
-            
-        #sync the order of the alleles between the sumstats and the bgen file
+
+        # sync the order of the alleles between the sumstats and the bgen file
         list_bgen = []
-        #rsids = bfile.rsids()
-        #small change reduces the time for bgen processing
-        #the previous implementation would iterate through all the SNPs in the bgen file
-        #this implementation loops over just the snps in the locus
+        # rsids = bfile.rsids()
+        # small change reduces the time for bgen processing
+        # the previous implementation would iterate through all the SNPs in the bgen file
+        # this implementation loops over just the snps in the locus
         rsids = bfile.fetch(self.chr, locus_start, locus_end)
         for snp_i, rsid in enumerate(rsids):
-#             if rsid not in df_z['SNP'].values: continue
-#             snp_alleles = bfile[snp_i].alleles
-#             snp_chrom = bfile[snp_i].chrom
-#             snp_pos = bfile[snp_i].pos
-            if rsid.rsid not in df_z['SNP'].values: continue
+            #             if rsid not in df_z['SNP'].values: continue
+            #             snp_alleles = bfile[snp_i].alleles
+            #             snp_chrom = bfile[snp_i].chrom
+            #             snp_pos = bfile[snp_i].pos
+            if rsid.rsid not in df_z["SNP"].values:
+                continue  # NOTE: so this indeed take the intersection of the two sets
             snp_alleles = rsid.alleles
             snp_chrom = rsid.chrom
             snp_pos = rsid.pos
+            rsid = rsid.rsid  # NOTE: this is the change
             assert len(snp_alleles) == 2, 'cannot handle SNPs with more than two alleles'
-            df_snp = df_z.query('SNP == "%s"'%(rsid))
+            df_snp = df_z.query('SNP == "%s"' % (rsid))
+
             assert df_snp.shape[0]==1
             a1, a2 = df_snp['A1'].iloc[0], df_snp['A2'].iloc[0]
             snp_a1, snp_a2 = snp_alleles[0], snp_alleles[1]
@@ -436,55 +550,110 @@ def compute_ld_bgen(self, locus_start, locus_end, verbose=False):
         df_z = set_snpid_index(df_z, allow_swapped_indel_alleles=self.allow_swapped_indel_alleles)
         df_z = df_z[[]].merge(df_bgen, left_index=True, right_index=True)
         df_z = df_z[['SNP', 'CHR', 'BP', 'A1', 'A2']]
-            
-        
-        #rename columns
+
+        # rename columns
         df_z.rename(columns={'SNP':'rsid', 'CHR':'chromosome', 'BP':'position', 'A1':'allele1', 'A2':'allele2'}, inplace=True)
-                
-        #Create LDstore input files
+
+        # Create LDstore input files
         temp_dir = tempfile.mkdtemp()    
-        incl_file = os.path.join(temp_dir, 'incl.incl')
+        incl_file = os.path.join(
+            temp_dir, "incl.incl"
+        )  # NOTE: remove this line if not needed
         master_file = os.path.join(temp_dir, 'master.master')
         z_file = os.path.join(temp_dir, 'chr%s.%s_%s.z'%(self.chr, locus_start, locus_end))
         dose_file = os.path.join(temp_dir, 'dosages.bdose')
         df_z.to_csv(z_file, sep=' ', index=False)
-        
-        #find number of samples
-        if self.incl_samples is None:
+
+        # find number of samples
+        if (
+            self.incl_samples is None
+        ):  # NOTE: looks prety  bad as this is only the reason of wheather there is a header in it
             num_samples = pd.read_table(self.sample_file).shape[0]-1
         else:
             num_samples = pd.read_table(self.incl_samples, header=None).shape[0]
-            
-        #get output file name
-        bcor_file = self.get_ld_output_file_prefix(locus_start, locus_end, temp_dir) + '.bcor'
-        
-        #Create LDstore master file
+
+        # get output file name
+        bcor_file = os.path.join(
+            temp_dir, "chr%s.%s_%s.bcor" % (self.chr, locus_start, locus_end)
+        )
+        ## parse for cache_format
+        if self.cache_format == "npz":  #
+            ld_file = os.path.join(
+                temp_dir, "chr%s.%s_%s.ld" % (self.chr, locus_start, locus_end)
+            )
+
+        elif self.cache_format == "bcor":
+            bcor_file = (
+                self.get_ld_output_file_prefix(locus_start, locus_end, temp_dir)
+                + ".bcor"
+            )
+        else:
+            raise ValueError(f"unknown cache format {self.cache_format}")
+
+        # Create LDstore master file
         df_master = pd.DataFrame(columns=['z','bgen','bgi','bcor','dose','sample','n_samples'])
         df_master['z'] = [z_file]
         df_master['bgen'] = [self.genotypes_file]
         df_master['bgi'] = [self.genotypes_file+'.bgi']
         df_master['bcor'] = [bcor_file]
+        df_master["ld"] = [ld_file]
         df_master['bdose'] = [dose_file]
         df_master['sample'] = [self.sample_file]
         df_master['n_samples'] = num_samples
         if self.incl_samples is not None:
             df_master['incl'] = self.incl_samples
         df_master.to_csv(master_file, sep=';', header=True, index=False)    
-        
-        #run LDstore
-        ldstore_cmd = [self.ldstore_exe, '--in-files', master_file, '--write-bcor', '--write-bdose', '--bdose-version', '1.0']
+
+        # run LDstore
+        ldstore_cmd = [
+            self.ldstore_exe,
+            "--in-files",
+            master_file,
+            # "--write-bcor",
+            # "--write-text",
+            "--write-bdose",
+            "--bdose-version",
+            "1.0",
+        ]  # TODO: maybe for checking big files or for bdose 1.1
+
+        if self.cache_format == "npz":
+            ldstore_cmd += ["--write-text"]
+        elif self.cache_format == "bcor":
+            ldstore_cmd += ["--write-bcor"]
+
         if self.memory is not None:
             ldstore_cmd += ['--memory', str(self.memory)]
         if self.n_threads is not None:
             ldstore_cmd += ['--n-threads', str(self.n_threads)]
         run_executable(ldstore_cmd, 'LDStore', measure_time=True, show_output=verbose, show_command=verbose)
-        
-        if not os.path.exists(bcor_file):
-            raise IOError('Could not find output BCOR file')
-            
-        return bcor_file
-    
-    
+
+        if self.cache_format == "bcor":
+            if not os.path.exists(bcor_file):
+                raise IOError("Could not find output BCOR file")
+            return bcor_file
+        elif (
+            self.cache_format == "npz"
+        ):  # load txt file by np and return ld_arr and df_z
+            if not os.path.exists(ld_file):
+                raise IOError("Could not find output LD file")
+
+            ld_arr = np.loadtxt(ld_file)
+            assert ld_arr.shape[0] == ld_arr.shape[1] == df_z.shape[0]
+            df_ld_snps = df_z.rename(
+                columns={
+                    "allele1": "A1",
+                    "allele2": "A2",
+                    "position": "BP",
+                    "chromosome": "CHR",
+                    "rsid": "SNP",
+                },
+                inplace=False,
+                errors="ignore",
+            )
+            # save_ld_to_npz(ld_arr, df_z, npz_file) # NOTE: after this function will auto save to npz file, so only need to return ld_arr, df_z
+            # return
+            return ld_arr, df_ld_snps
+
     def read_plink_genotypes(self, bed):
         X = bed.compute().astype(np.float64)
         if np.any(np.isnan(X)):
@@ -496,13 +665,66 @@ def read_plink_genotypes(self, bed):
         is_polymorphic = X.std(axis=0)>0
         X[:, is_polymorphic] /= X[:, is_polymorphic].std(axis=0)
         return X
-    
-    
+
+    def compute_ld_plink_pgen(self, locus_start, locus_end, verbose):
+        from finemap_tools.plink import plink2_cal_LD
+        from finemap_tools.reader.plink import read_pvar
+        logging.info(
+            f"Computing LD from pgen fileset {self.genotypes_file} chromosome {self.chr} region {locus_start}-{locus_end}"
+        )
+        t0 = time.time()
+
+
+        ld_snp_df = read_pvar(Path(self.genotypes_file).with_suffix(".pvar")).rename(
+            columns={
+                "#CHROM": "CHR",
+                "ID": "SNP",
+                "POS": "BP",
+                "REF": "A2",  # REF as A2 is to make sure the A1 is the minor allele and consistent with the sumstats later
+                "ALT": "A1",
+            }
+        )
+        ld_snp_df = set_snpid_index(
+            ld_snp_df, allow_swapped_indel_alleles=self.allow_swapped_indel_alleles
+        )
+        logging.info(f"read {ld_snp_df.shape[0]} SNPs from pvar file")
+
+        # used sumstats to include the snp in analysis
+        df_z = self.df_sumstats_locus[["SNP", "CHR", "BP", "A1", "A2"]].copy()
+        df_z = set_snpid_index(
+            df_z, allow_swapped_indel_alleles=self.allow_swapped_indel_alleles
+        )
+        df_z = df_z[[]].merge(ld_snp_df, left_index=True, right_index=True)
+        ld_snp_df = df_z[["SNP", "CHR", "BP", "A1", "A2"]]
+        del df_z
+
+        logging.info(
+            f"only keep {ld_snp_df.shape[0]} SNPs in the sumstats file to cal LD to avoid the long time of cal LD"
+        )
+
+        # get_ld
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            print("created temporary directory", tmpdirname)
+            ld_df = plink2_cal_LD(
+                pgen=self.genotypes_file,
+                snplist=ld_snp_df["SNP"].tolist(),
+                outSuffix=tmpdirname + "/test",
+                thread=self.n_threads,
+                memory=self.memory * 1024,
+            )
+
+        assert ld_df.shape[0] == ld_snp_df.shape[0]
+        assert (
+            ld_df.index.tolist() == ld_snp_df["SNP"].tolist() == ld_df.columns.tolist()
+        )
+
+        return ld_df.values, ld_snp_df
+
     def compute_ld_plink(self, locus_start, locus_end, verbose):
         logging.info('Computing LD from plink fileset %s chromosome %s region %s-%s'%(self.genotypes_file, self.chr, locus_start, locus_end))
         t0 = time.time()
-        
-        #read the plink file
+
+        # read the plink file
         df_bim, df_fam, bed = read_plink(self.genotypes_file)
         df_bim.rename(columns={'snp':'SNP', 'pos':'BP', 'chrom':'CHR', 'a0':'A2', 'a1':'A1'}, inplace=True)
         df_bim['A1'] = df_bim['A1'].astype('str')
@@ -511,15 +733,15 @@ def compute_ld_plink(self, locus_start, locus_end, verbose):
         del df_bim['i']
         del df_bim['cm']
         bed = bed.T
-        
-        #zoom in on target locus
+
+        # zoom in on target locus
         is_snp_in_region = (df_bim['BP'].between(locus_start, locus_end)) & (df_bim['CHR']==self.chr)
         df_bim = df_bim.loc[is_snp_in_region]
         df_ld_snps = df_bim
         bed = bed[:, is_snp_in_region.values]
         assert bed.shape[1]>0, 'No SNPs found in the target region'
-        
-        #compute chunk size, using the formula MEM = bed.shape[0] * chunk_size * 4 / 2**30
+
+        # compute chunk size, using the formula MEM = bed.shape[0] * chunk_size * 4 / 2**30
         if self.memory is None:
             mem_limit = 1
         else:
@@ -532,8 +754,8 @@ def compute_ld_plink(self, locus_start, locus_end, verbose):
             assert chunk_size * (num_chunks-2) < bed.shape[1]-1
         if chunk_size * (num_chunks-1) >= bed.shape[1]:
             num_chunks-=1
-        
-        #compute LD in chunks
+
+        # compute LD in chunks
         logging.info('Found %d SNPs in target region. Computing LD in %d chunks...'%(bed.shape[1], num_chunks))
         ld_arr = np.empty((bed.shape[1], bed.shape[1]), dtype=np.float64)
         for chunk_i in tqdm(range(num_chunks)):
@@ -552,74 +774,81 @@ def compute_ld_plink(self, locus_start, locus_end, verbose):
         if np.any(np.isclose(ld_diag, 0.0)):
             ld_diag[np.isclose(ld_diag, 0.0)] = 1.0
             np.fill_diagonal(ld_arr, ld_diag)
-                            
+
         logging.info('Done in %0.2f seconds'%(time.time() - t0))
         return ld_arr, df_ld_snps
-        
-    
-            
+
     def set_locus(self, locus_start, locus_end):
-    
-        #update self.df_sumstats_locus
+
+        # update self.df_sumstats_locus
         self.df_sumstats_locus = self.df_sumstats.query('%d <= BP <= %d'%(locus_start, locus_end))
         num_snps = self.df_sumstats_locus.shape[0]
         if num_snps < 2:
             raise ValueError('%d SNP(s) found in sumstats file in the BP range %d-%d'%(num_snps, locus_start, locus_end))
-            
-            
-            
+
     def get_ld_data(self, locus_start, locus_end, need_bcor=False, verbose=False):
-    
+
         ld_arr, df_ld_snps, ld_file = None, None, None
-        
-        #check if we already have a suitable LD file in the cache dir
+
+        # check if we already have a suitable LD file in the cache dir
         ld_file = self.find_cached_ld_file(locus_start, locus_end, need_bcor=need_bcor)
-            
-        #compute LD if we couldn't find a suitable LD file
+
+        # compute LD if we couldn't find a suitable LD file
         if ld_file is None:
-            if self.genotypes_file.endswith('.bgen'):
+            if self.genotypes_file.endswith(".bgen"):  # this won't return None
                 if not os.path.exists(self.genotypes_file):
                     raise IOError('%s doesn\'t exist'%(self.genotypes_file))
-                ld_file = self.compute_ld_bgen(locus_start, locus_end, verbose=verbose)
+                if self.cache_format == "bcor":
+                    ld_file = self.compute_ld_bgen(
+                        locus_start, locus_end, verbose=verbose
+                    )
+                elif self.cache_format == "npz":
+                    ld_arr, df_ld_snps = self.compute_ld_bgen(
+                        locus_start, locus_end, verbose=verbose
+                    )
+                # ld_file = self.compute_ld_bgen(locus_start, locus_end, verbose=verbose)
             elif os.path.exists(self.genotypes_file+'.bed'):
                 ld_arr, df_ld_snps = self.compute_ld_plink(locus_start, locus_end, verbose=verbose)
+            elif os.path.exists(self.genotypes_file + ".pgen"):
+                ld_arr, df_ld_snps = self.compute_ld_plink_pgen(
+                    locus_start, locus_end, verbose=verbose
+                )
             else:
                 raise ValueError('no suitable file found for: %s'%(self.genotypes_file))
-                
-        #arrange the LD data
+
+        # arrange the LD data
         assert ld_file is None or (ld_arr is None and df_ld_snps is None)
-        
-        #if there is no LD file, return the LD data directly
-        if ld_file is None:
-            #cache output if possible
+
+        # if there is no LD file, return the LD data directly
+
+        if ld_file is None:  #  NOTE: this is not possiblely be None as the code
+            # cache output if possible
             if self.cache_dir is not None:
                 npz_file = self.get_ld_output_file_prefix(locus_start, locus_end) + '.npz'
                 save_ld_to_npz(ld_arr, df_ld_snps, npz_file)
             return ld_arr, df_ld_snps
-            
-        #if we have an LD file, return it if it's a bcor and we want a bcor, or return the LD data directly otherwise
-        else:
+        # if we have an LD file, return it if it's a bcor and we want a bcor, or return the LD data directly otherwise
+        else:  # NOTE: this code is quiet strange
             if ld_file.endswith('.bcor'):
+
                 if need_bcor:
                     return ld_file
                 else:
                     ld_arr, df_ld_snps = read_ld_from_file(ld_file)
-                    
-                    #cache output if possible
+
+                    # cache output if possible
                     if self.cache_dir is not None and ld_file.endswith('.bcor'):
                         npz_file = self.get_ld_output_file_prefix(locus_start, locus_end) + '.npz'
                         save_ld_to_npz(ld_arr, df_ld_snps, npz_file)
                     return ld_arr, df_ld_snps
-            
+
             else:
                 ld_arr, df_ld_snps = read_ld_from_file(ld_file)
                 return ld_arr, df_ld_snps
-    
-
 
     def finemap(self):
         raise NotImplementedError()
-        
+
     def estimate_h2_hess(self, prop_keep=0.005, R_cutoff=0.99, pvalue_bound=None):
         '''
             prop_keep:  Proportion of SNPs to use in the estimation (only the ones with the smallest p-values)
@@ -637,8 +866,8 @@ def estimate_h2_hess(self, prop_keep=0.005, R_cutoff=0.99, pvalue_bound=None):
             with the unbiased estimator (denominator of $n - m$) used in the original HESS publication
             (Shi et al., 2014; https://doi.org/10.1016/j.ajhg.2016.05.013).
         '''
-        
-        #keep only potential causal SNPs
+
+        # keep only potential causal SNPs
         pvalue_cutoff = self.df_sumstats_locus['P'].quantile(prop_keep)
         if pvalue_cutoff==0:
             pvalue_cutoff = np.min(self.df_sumstats_locus['P'].loc[lambda p:p>0])
@@ -650,21 +879,20 @@ def estimate_h2_hess(self, prop_keep=0.005, R_cutoff=0.99, pvalue_bound=None):
         else:
             return 0
 
-        #take a maximally independent subset
+        # take a maximally independent subset
         np.fill_diagonal(R_pot_csnp,0)
         import networkx as nx
         G = nx.from_numpy_array(np.abs(R_pot_csnp)>R_cutoff)
         np.fill_diagonal(R_pot_csnp,1)
         inds = np.sort(nx.maximal_independent_set(G))
-        
-        #estimate h2 using HESS
+
+        # estimate h2 using HESS
         R_subset = R_pot_csnp[np.ix_(inds, inds)]
         alpha_subset = self.df_sumstats_locus.loc[is_potential_csnp, 'Z'].iloc[inds].values / np.sqrt(self.n)
         h2_hess = alpha_subset.dot(np.linalg.solve(R_subset, alpha_subset)) - R_subset.shape[0]/self.n
-        
+
         return h2_hess
-        
-        
+
     def estimate_h2_hess_wrapper(self, prop_keep=0.005, R_cutoff=0.99, min_h2=None, num_samples=100):
         '''
             prop_keep:  Proprtion of SNPs to use in the estimation (only the ones with the smallest p-values)
@@ -686,21 +914,42 @@ def estimate_h2_hess_wrapper(self, prop_keep=0.005, R_cutoff=0.99, min_h2=None,
                         for try_num in range(num_samples)]
         h2_hess = np.mean(h2_hess_list)
         return h2_hess        
-            
-    
-    
-    
-    
-    
-class SUSIE_Wrapper(Fine_Mapping):
 
-    def __init__(self, genotypes_file, sumstats_file, n, chr_num, ldstore_exe, sample_file=None,
-                 incl_samples=None, cache_dir=None, n_threads=None, memory=None,
-                 allow_swapped_indel_alleles=False):
 
-        super(SUSIE_Wrapper, self).__init__(genotypes_file, sumstats_file, n, chr_num, ldstore_exe=ldstore_exe, sample_file=sample_file, incl_samples=incl_samples, cache_dir=cache_dir, n_threads=n_threads, memory=memory, allow_swapped_indel_alleles=allow_swapped_indel_alleles)
-                       
-        #load SuSiE R package
+class SUSIE_Wrapper(Fine_Mapping):
+
+    def __init__(
+        self,
+        genotypes_file,
+        sumstats_file,
+        n,
+        chr_num,
+        ldstore_exe,
+        sample_file=None,
+        incl_samples=None,
+        cache_dir=None,
+        cache_format=None,
+        n_threads=None,
+        memory=None,
+        allow_swapped_indel_alleles=False,
+    ):
+
+        super(SUSIE_Wrapper, self).__init__(
+            genotypes_file,
+            sumstats_file,
+            n,
+            chr_num,
+            ldstore_exe=ldstore_exe,
+            sample_file=sample_file,
+            incl_samples=incl_samples,
+            cache_dir=cache_dir,
+            cache_format=cache_format,
+            n_threads=n_threads,
+            memory=memory,
+            allow_swapped_indel_alleles=allow_swapped_indel_alleles,
+        )
+
+        # load SuSiE R package
         import rpy2
         import rpy2.robjects.numpy2ri as numpy2ri
         import rpy2.robjects as ro
@@ -709,30 +958,27 @@ def __init__(self, genotypes_file, sumstats_file, n, chr_num, ldstore_exe, sampl
         from rpy2.robjects.packages import importr
         self.susieR = importr('susieR')
         self.R_null = ro.rinterface.NULL
-        #self.RNULLType = rpy2.rinterface.RNULLType
-
-
-
+        # self.RNULLType = rpy2.rinterface.RNULLType
 
     def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob=True, prior_var=None, residual_var=None, residual_var_init=None, hess_resvar=False, hess=False, hess_iter=100, hess_min_h2=None, susie_max_iter=100, verbose=False, ld_file=None, debug_dir=None, allow_missing=False, susie_outfile=None, finemap_dir=None):
 
-        #check params
+        # check params
         if use_prior_causal_prob and 'SNPVAR' not in self.df_sumstats.columns:
             raise ValueError('SNPVAR column not found in sumstats file')
         if hess_resvar:
             assert hess, 'hess_resvar cannot be specified if hess is FALSE'
-            
-        #set locus
+
+        # set locus
         self.set_locus(locus_start, locus_end)
-        
-        #download LD file if it's a url
+
+        # download LD file if it's a url
         if uri_validator(ld_file):
             ld_file = download_ld_file(ld_file)
             delete_ld_files_on_exit = True
         else:
             delete_ld_files_on_exit = False
-    
-        #Load LD data into memory if num_causal_snps>1
+
+        # Load LD data into memory if num_causal_snps>1
         if num_causal_snps==1:
             if hess:
                 raise ValueError('Cannot use HESS-based variance estimator when assuming a single causal SNP per locus')
@@ -743,18 +989,20 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
                 ld_arr, df_ld_snps = self.get_ld_data(locus_start, locus_end, need_bcor=False, verbose=verbose)
             else:
                 ld_arr, df_ld_snps = read_ld_from_file(ld_file)
-            assert np.all(~np.isnan(ld_arr))
+
+            # assert np.all(~np.isnan(ld_arr)) # NOTE: this may often happned with some ld is NaN, I supposed to rm them instead of raise errors; And code in self.sync_ld_sumstats will do this, so i comments this code here
+
             self.sync_ld_sumstats(ld_arr, df_ld_snps, allow_missing=allow_missing)
             del ld_arr
             del df_ld_snps
-        
-        #define prior causal probabilities
+
+        # define prior causal probabilities
         if use_prior_causal_prob:
             prior_weights = self.df_sumstats_locus['SNPVAR'].copy().values
             prior_weights /= prior_weights.sum()
             assert np.isclose(prior_weights.sum(), 1)
-            
-        #flip effect sizes if needed
+
+        # flip effect sizes if needed
         assert np.all(self.df_ld_snps['BP'] == self.df_sumstats_locus['BP'])
         is_flipped = self.df_ld_snps['A1'] == self.df_sumstats_locus['A2']
         is_not_flipped = self.df_ld_snps['A1'] == self.df_sumstats_locus['A1']
@@ -763,9 +1011,8 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
         if np.any(is_flipped):
             bhat[is_flipped.values] *= -1
             logging.info('Flipping the effect-sign of %d SNPs that are flipped compared to the LD panel'%(is_flipped.sum()))
-            
-            
-        #Use HESS to estimate causal effect sizes
+
+        # Use HESS to estimate causal effect sizes
         if hess:
             if prior_var is not None:
                 raise ValueError('cannot specify both hess and a custom prior_var')
@@ -785,18 +1032,18 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
             if prior_var >= 1:
                 raise ValueError('HESS-estimated prior-var >1. The HESS estimator cannot be used in this locus.')
             logging.info('HESS estimated causal effect size variance: %0.4e'%(prior_var))
-            
+
             if hess_resvar:
                 residual_var = 1 - h2_hess
                 logging.info('Residual variance using the HESS estimate: %0.4e'%(residual_var))
                 assert residual_var>=0
-    
-        #rpy2 bug fix
+
+        # rpy2 bug fix
         import rpy2.robjects.numpy2ri as numpy2ri
         reload(numpy2ri)
         numpy2ri.activate()
-        
-        #run SuSiE
+
+        # run SuSiE
         t0 = time.time()
         m = self.df_sumstats_locus.shape[0]        
         logging.info('Starting %s SuSiE fine-mapping for chromosome %d BP %d-%d (%d SNPs)'%(
@@ -806,28 +1053,31 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
             locus_end,
             self.df_ld.shape[0]
             ))
-            
-        #save variables to debug dir if needed
+
+        # save variables to debug dir if needed
         if debug_dir is not None:
             os.makedirs(debug_dir, exist_ok=True)
             logging.info('Saving debug info to: %s'%(debug_dir))
             self.df_sumstats_locus.to_csv(os.path.join(debug_dir, 'df_sumstats_locus.txt'), index=False, sep='\t')
             np.savetxt(os.path.join(debug_dir, 'bhat.txt'), bhat)
-            #np.savez_compressed(os.path.join(debug_dir, 'R.npz'), R=self.df_ld.values)
+            # np.savez_compressed(os.path.join(debug_dir, 'R.npz'), R=self.df_ld.values)
             np.savetxt(os.path.join(debug_dir, 'n.txt'), [self.n])
             np.savetxt(os.path.join(debug_dir, 'L.txt'), [num_causal_snps])
             np.savetxt(os.path.join(debug_dir, 'residual_var.txt'), [np.nan] if (residual_var is None) else [residual_var])
             np.savetxt(os.path.join(debug_dir, 'prior_var.txt'), [np.nan] if (prior_var is None) else [prior_var])
             np.savetxt(os.path.join(debug_dir, 'prior_weights.txt'), prior_weights if use_prior_causal_prob else [np.nan])
-            
-            #create a zipped debug file
+
+            # create a zipped debug file
             import zipfile
             debug_files = glob.glob(os.path.join(debug_dir, '*.txt'))
             zip_file = os.path.join(debug_dir, 'debug.zip')
             zf = zipfile.ZipFile(zip_file, mode='w')
             for debug_file in debug_files:
-                zf.write(debug_file, os.path.basename(debug_file), compress_type=zipfile.ZIP_DEFLATED)
-                
+                zf.write(
+                    debug_file,
+                    os.path.basename(debug_file),
+                    compress_type=zipfile.ZIP_DEFLATED,
+                )
 
         assert self.df_ld.notnull().all().all()
         if residual_var is not None: residual_var_init = residual_var
@@ -869,34 +1119,34 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
         susie_time = time.time()-t0        
         logging.info('Done in %0.2f seconds'%(susie_time))
 
-        #extract pip and beta_mean
+        # extract pip and beta_mean
         pip = np.array(self.susieR.susie_get_pip(susie_obj))
         beta_mean = np.array(self.susieR.coef_susie(susie_obj)[1:])
         assert np.allclose(beta_mean, np.sum(np.array(susie_obj.rx2('mu')) * np.array(susie_obj.rx2('alpha')), axis=0) / np.array(susie_obj.rx2('X_column_scale_factors')))
 
-        #compute the posterior mean of beta^2
+        # compute the posterior mean of beta^2
         s_alpha = np.array(susie_obj.rx2('alpha'))
         s_mu = np.array(susie_obj.rx2('mu'))
         s_mu2 = np.array(susie_obj.rx2('mu2'))
         s_X_column_scale_factors = np.array(susie_obj.rx2('X_column_scale_factors'))
         beta_var = np.sum(s_alpha*s_mu2 - (s_alpha*s_mu)**2, axis=0) / (s_X_column_scale_factors**2)
         assert np.all(beta_var>=0)
-        
-        #create output df
+
+        # create output df
         df_susie = self.df_sumstats_locus.copy()
         df_susie['PIP'] = pip
         df_susie['BETA_MEAN'] = beta_mean
         # flip back the finemap BETA, as the alleles are in original order
         df_susie.loc[is_flipped, 'BETA_MEAN'] *= (-1)
         df_susie['BETA_SD'] = np.sqrt(beta_var)
-        
-        #add distance from center
+
+        # add distance from center
         start = df_susie['BP'].min()
         end = df_susie['BP'].max()
         middle = (start+end)//2
         df_susie['DISTANCE_FROM_CENTER'] = np.abs(df_susie['BP'] - middle)        
-        
-        #mark causal sets
+
+        # mark causal sets
         import rpy2
         logging.info('Using rpy2 version %s'%(rpy2.__version__))
         if Version(rpy2.__version__) >= Version('3.5.9'):
@@ -906,7 +1156,7 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
             self.susie_dict = {key:np.array(susie_obj.rx2(key), dtype=object) for key in list(susie_obj.names)}
         df_susie['CREDIBLE_SET'] = 0
         susie_sets = self.susie_dict['sets'][0]
-        #if type(susie_sets) != self.RNULLType:
+        # if type(susie_sets) != self.RNULLType:
         try:
             for set_i, susie_set in enumerate(susie_sets):
                 is_in_set = np.zeros(df_susie.shape[0], dtype=bool)
@@ -915,43 +1165,60 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
                 df_susie.loc[is_in_set, 'CREDIBLE_SET'] = set_i+1
         except TypeError:
             pass
-            
-            
-        #save SuSiE object if requested
+
+        # save SuSiE object if requested
         if susie_outfile is not None:
             from rpy2.robjects.packages import importr
             R_base = importr('base', robject_translations = {'print.me': 'print_dot_me', 'print_me': 'print_uscore_me'})
             R_base.saveRDS(susie_obj, file=susie_outfile)
             logging.info('Saved SuSiE object to RDS file: %s'%(susie_outfile))
 
-        
-        #delete the LD file if needed
+        # delete the LD file if needed
         if delete_ld_files_on_exit:
             ld_file_dir = os.path.dirname(ld_file)
             if os.path.exists(ld_file_dir): shutil.rmtree(ld_file_dir)
-        
+
         return df_susie
-        
-    
-    
-    
-    
 
 
 class FINEMAP_Wrapper(Fine_Mapping):
 
-    def __init__(self, genotypes_file, sumstats_file, n, chr_num, finemap_exe, ldstore_exe, sample_file=None,
-                 incl_samples=None, cache_dir=None, n_threads=None, memory=None,
-                 allow_swapped_indel_alleles=False):
-
-        super(FINEMAP_Wrapper, self).__init__(genotypes_file, sumstats_file, n, chr_num, ldstore_exe=ldstore_exe, sample_file=sample_file, incl_samples=incl_samples, cache_dir=cache_dir, n_threads=n_threads, memory=memory, allow_swapped_indel_alleles=allow_swapped_indel_alleles)
+    def __init__(
+        self,
+        genotypes_file,
+        sumstats_file,
+        n,
+        chr_num,
+        finemap_exe,
+        ldstore_exe,
+        sample_file=None,
+        incl_samples=None,
+        cache_dir=None,
+        cache_format=None,
+        n_threads=None,
+        memory=None,
+        allow_swapped_indel_alleles=False,
+    ):
+
+        super(FINEMAP_Wrapper, self).__init__(
+            genotypes_file,
+            sumstats_file,
+            n,
+            chr_num,
+            ldstore_exe=ldstore_exe,
+            sample_file=sample_file,
+            incl_samples=incl_samples,
+            cache_dir=cache_dir,
+            cache_format=cache_format,
+            n_threads=n_threads,
+            memory=memory,
+            allow_swapped_indel_alleles=allow_swapped_indel_alleles,
+        )
         self.finemap_exe = finemap_exe
 
-
-
     def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob=True, prior_var=None, residual_var=None, hess=False, hess_iter=100, hess_min_h2=None, susie_max_iter=100, verbose=False, ld_file=None, debug_dir=None, allow_missing=False, susie_outfile=None, residual_var_init=None, hess_resvar=False, finemap_dir=None):
 
-        #check params
+        # check params
         if use_prior_causal_prob and 'SNPVAR' not in self.df_sumstats.columns:
             raise ValueError('SNPVAR column not found in sumstats file')
         if hess:
@@ -965,13 +1232,13 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
         if residual_var_init is not None:
             raise NotImplementedError('FINEMAP object does not support --susie-resvar-init')
         # if allow_missing:
-            # raise ValueError('FINEMAP object does not support --allow-missing')
-            
-        #download LD file if it's a url
+        # raise ValueError('FINEMAP object does not support --allow-missing')
+
+        # download LD file if it's a url
         if uri_validator(ld_file):
             ld_file = download_ld_file(ld_file)            
-            
-        #create prefix of output files
+
+        # create prefix of output files
         if finemap_dir is None:
             finemap_dir = tempfile.mkdtemp()
         else:
@@ -979,12 +1246,11 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
             logging.info('Saving FINEMAP files to directory: %s'%(finemap_dir))
         assert os.path.isdir(finemap_dir)
         finemap_output_prefix = os.path.join(finemap_dir, 'finemap')
-                    
-    
-        #set locus
+
+        # set locus
         self.set_locus(locus_start, locus_end)
-        
-        #find or create a suitable ld_file
+
+        # find or create a suitable ld_file
         if num_causal_snps==1:
             if ld_file is not None:
                 raise ValueError('cannot specify an ld file when assuming a single causal SNP per locus')
@@ -1007,24 +1273,23 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
             elif ld_file.endswith('.bcor'):
                 pass
             elif ld_file.endswith('.npz') or os.path.exists(ld_file+'.npz'):
-                    ld_arr, df_ld_snps = read_ld_from_file(ld_file)
-                    self.sync_ld_sumstats(ld_arr, df_ld_snps, allow_missing=allow_missing)
-                    del ld_arr, df_ld_snps
-                    ld_file = finemap_output_prefix + '.ld'
-                    np.savetxt(ld_file, self.df_ld.values, fmt='%0.5f')
+                ld_arr, df_ld_snps = read_ld_from_file(ld_file)
+                self.sync_ld_sumstats(ld_arr, df_ld_snps, allow_missing=allow_missing)
+                del ld_arr, df_ld_snps
+                ld_file = finemap_output_prefix + ".ld"
+                np.savetxt(ld_file, self.df_ld.values, fmt="%0.5f")
             else:
-                raise ValueError('unknown LD file format for file: %s'%(ld_file))
-        
+                raise ValueError("unknown LD file format for file: %s" % (ld_file))
 
-        #define file names
+        # define file names
         master_file = finemap_output_prefix+'.master'
         snp_filename = finemap_output_prefix+'.snp'
         config_filename = finemap_output_prefix+'.config'
         cred_filename = finemap_output_prefix+'.cred'
         log_filename = finemap_output_prefix+'.log'        
         z_filename = finemap_output_prefix+'.z'
-        
-        #flip some of the alleles
+
+        # flip some of the alleles
         if num_causal_snps == 1:
             is_flipped = np.zeros(self.df_sumstats_locus.shape[0], dtype=bool)
         else:
@@ -1039,13 +1304,13 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
             assert np.all(is_flipped | is_not_flipped)
             if np.any(is_flipped):
                 logging.info('Flipping the effect-sign of %d SNPs that are flipped compared to the LD panel'%(is_flipped.sum()))
-            
-        #create df_z and save it to disk
+
+        # create df_z and save it to disk
         df_z = self.df_sumstats_locus[['SNP', 'CHR', 'BP', 'A1', 'A2', 'Z']].copy()
         df_z.loc[is_flipped, 'A1'] = self.df_sumstats_locus.loc[is_flipped, 'A2']
         df_z.loc[is_flipped, 'A2'] = self.df_sumstats_locus.loc[is_flipped, 'A1']
         df_z.loc[is_flipped, 'Z'] *= (-1)
-        
+
         df_z.rename(columns={'SNP':'rsid', 'CHR':'chromosome', 'BP':'position', 'A1':'allele1', 'A2':'allele2', 'Z':'beta'}, inplace=True, errors='ignore')
         df_z['se'] = 1
         df_z['maf'] = 0.05
@@ -1054,7 +1319,7 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
             df_z['prob'] = self.df_sumstats_locus['SNPVAR'] / self.df_sumstats_locus['SNPVAR'].sum()
         df_z.to_csv(z_filename, header=True, index=False, sep=' ', float_format='%0.5f')
 
-        #create the master file
+        # create the master file
         df_master = pd.DataFrame()
         df_master['z'] = [z_filename]
         df_master['snp'] = [snp_filename]
@@ -1069,8 +1334,8 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
         else:
             raise ValueError('Illegal LD file format')
         df_master.to_csv(master_file, sep=';', header=True, index=False)
-    
-        #prepare the FINEMAP command
+
+        # prepare the FINEMAP command
         finemap_cmd = [self.finemap_exe]        
         finemap_cmd += ['--in-files', master_file, '--sss']
         finemap_cmd += ['--force-n-samples']
@@ -1085,7 +1350,7 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
         if use_prior_causal_prob:
             finemap_cmd += ['--prior-snps']
 
-        #run FINEMAP
+        # run FINEMAP
         t0 = time.time()
         m = self.df_sumstats_locus.shape[0]        
         logging.info('Starting %s FINEMAP fine-mapping for chromosome %d BP %d-%d (%d SNPs)'%(
@@ -1098,8 +1363,8 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
         run_executable(finemap_cmd, 'FINEMAP', measure_time=True, show_output=verbose, show_command=verbose)
         if not os.path.exists(log_filename+'_sss'):
             raise IOError('FINEMAP output files not found')
-            
-        #load log file
+
+        # load log file
         found_post_csnps = False
         with open(log_filename+'_sss') as f:
             for line in f:
@@ -1109,12 +1374,12 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
                     break
         if not found_post_csnps:
             raise IOError('corrupt log file found: %s'%(log_filename+'_sss'))
-                
-        #load results
+
+        # load results
         df_finemap = pd.read_table(snp_filename, sep=' ', usecols=['rsid', 'chromosome', 'position', 'allele1', 'allele2', 'prob', 'mean', 'sd'])
         df_finemap.rename(columns={'rsid':'SNP', 'position':'BP', 'chromosome':'CHR', 'prob':'PIP', 'mean':'BETA_MEAN', 'sd':'BETA_SD', 'allele1':'A1', 'allele2':'A2'}, inplace=True, errors='raise')
 
-        #read log10bf
+        # read log10bf
         log10bf = None
         with open(log_filename+'_sss', 'r') as f:
             for line in f:
@@ -1123,14 +1388,14 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
                     break
         if log10bf is None:
             raise ValueError('FINEMP did not report Log10-BF')
-            
-        #add distance from center
+
+        # add distance from center
         start = df_finemap['BP'].min()
         end = df_finemap['BP'].max()
         middle = (start+end)//2
         df_finemap['DISTANCE_FROM_CENTER'] = np.abs(df_finemap['BP'] - middle)
-        
-        #add causal set info
+
+        # add causal set info
         df_finemap['CREDIBLE_SET'] = 0
         cred_file = None
         for m in range(num_causal_snps, 0, -1):
@@ -1144,17 +1409,11 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
         for c_i, c in enumerate(df_cred.columns):
             df_finemap.loc[df_cred[c].dropna(), 'CREDIBLE_SET'] = c_i+1
         df_finemap.reset_index(inplace=True, drop=True)
-            
+
         finemap_time = time.time()-t0
         logging.info('Done in %0.2f seconds'%(finemap_time))
-        
-        
+
         return df_finemap
-        
-    
-    
-    
-    
 
 
 if __name__ == '__main__':
@@ -1162,14 +1421,18 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
     import argparse
     parser = argparse.ArgumentParser()
 
-    #general parameters
+    # general parameters
     parser.add_argument('--method', required=True, help='Fine-mapping method (currently susie and finemap are supported)')
     parser.add_argument('--sumstats', required=True, help='Name of sumstats file')
     parser.add_argument('--chr', required=True, type=int, help='Target chromosome')
     parser.add_argument('--start', required=True, type=int, help='First base-pair in the region to finemap')
     parser.add_argument('--end', required=True, type=int, help='Last base-pair in the region to finemap')
     parser.add_argument('--n', required=True, type=int, help='Sample size')
-    parser.add_argument('--geno', default=None, help='Genotypes file (plink or bgen format)')
+    parser.add_argument(
+        "--geno",
+        default=None,
+        help="Genotypes file (plink1 format, plink2 format(pgen) or bgen format)",
+    )
     parser.add_argument('--ld', default=None, help='prefix or fill name of an LD matrix file')
     parser.add_argument('--out', required=True, help='name of the output file')
     parser.add_argument('--verbose', action='store_true', default=False, help='If specified, show verbose output')
@@ -1177,7 +1440,7 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
     parser.add_argument('--sample-file', default=None, help='BGEN files must be used together with a sample file')
     parser.add_argument('--incl-samples', default=None, help='A single-column text file specifying the ids of individuals to include in fine-mapping')
 
-    #fine-mapping parameters
+    # fine-mapping parameters
     parser.add_argument('--max-num-causal', required=True, type=int, help='Number of causal SNPs')
     parser.add_argument('--non-funct', action='store_true', default=False, help='Perform non-functionally informed fine-mapping')
     parser.add_argument('--allow-missing', default=False, action='store_true', help='If specified, SNPs with sumstats that are not \
@@ -1192,13 +1455,18 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
     parser.add_argument('--no-sort-pip', default=False, action='store_true',
                         help='Do **not** sort results by PIP. Recommended for use with --susie-outfile')
 
-    #LDstore related parameters
+    # LDstore related parameters
     parser.add_argument('--ldstore2', default=None, help='Path to an LDstore 2.0 executable file')
     parser.add_argument('--finemap-exe', default=None, help='Path to FINEMAP v1.4 executable file')
     parser.add_argument('--memory', type=int, default=1, help='Maximum amount of memory in GB to allocate to LDStore')
     parser.add_argument('--threads', type=int, default=None, help='The number of CPU cores LDstore will use (if not specified, LDstore will use the max number of CPU cores available')
     parser.add_argument('--cache-dir', default=None, help='If specified, this is a path of a directory that will cache LD matrices that have already been computed')
-
+    parser.add_argument(
+        "--cache-format",
+        default=None,
+        help="Format of the LDstore cache files (default: bcor); npz will with a metafile containing snpname named like .npz.gz should have columns: A1 (effect allele) A2 BP, SNP, chromosome",
+        choices=["bcor", "npz", None],
+    )
     # FINEMAP-specific parameters
     parser.add_argument('--finemap-dir', default=None, help='If specified, the FINEMAP files will be saved to this directory')
 
@@ -1212,25 +1480,28 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
     parser.add_argument('--hess-iter', type=int, default=100, help='Average HESS over this number of iterations (default: 100)')
     parser.add_argument('--hess-min-h2', type=float, default=None, help='When estimating causal effect variance via HESS, exclude SNPs that tag less than this amount of heritability (default: None)')
 
-    #check package versions
+    # twas fine-mapping parameters
+    # comming soon
+
+    # check package versions
     check_package_versions()
-    
-    #show splash screen
+
+    # show splash screen
     splash_screen()
-    
-    #extract args
+
+    # extract args
     args = parser.parse_args()
-    
-    #check that the output directory exists
+
+    # check that the output directory exists
     if len(os.path.dirname(args.out))>0 and not os.path.exists(os.path.dirname(args.out)):
         raise ValueError('output directory %s doesn\'t exist'%(os.path.dirname(args.out)))    
     if args.susie_outfile is not None and len(os.path.dirname(args.susie_outfile))>0 and not os.path.exists(os.path.dirname(args.susie_outfile)):
         raise ValueError('output directory %s doesn\'t exist'%(os.path.dirname(args.susie_outfile)))    
 
-    #configure logger
+    # configure logger
     configure_logger(args.out)
-    
-    #check params
+
+    # check params
     if args.max_num_causal==1:
         if args.geno is not None or args.ld is not None:
             raise ValueError('When max_num_causal=1 fine-mapping please omit the flags --geno and --ld (we cannot use LD information in that setting)')
@@ -1249,15 +1520,24 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
     if args.susie_outfile is not None and not args.no_sort_pip:
         logging.warning('--susie-outfile was set but not --no-sort-pip. This will make it difficult to assign SNP names to the SuSiE R object')
 
-    #Create a fine-mapping class member
+    # Create a fine-mapping class member
     if args.method == 'susie':
         if args.finemap_dir is not None:
             raise ValueError('--finemap-dir cannot be specified with susie method')
-        finemap_obj = SUSIE_Wrapper(genotypes_file=args.geno, sumstats_file=args.sumstats, n=args.n, chr_num=args.chr,
-                                    sample_file=args.sample_file, incl_samples=args.incl_samples,
-                                    ldstore_exe=args.ldstore2, n_threads=args.threads,
-                                    cache_dir=args.cache_dir, memory=args.memory,
-                                    allow_swapped_indel_alleles=args.allow_swapped_indel_alleles)
+        finemap_obj = SUSIE_Wrapper(
+            genotypes_file=args.geno,
+            sumstats_file=args.sumstats,
+            n=args.n,
+            chr_num=args.chr,
+            sample_file=args.sample_file,
+            incl_samples=args.incl_samples,
+            ldstore_exe=args.ldstore2,
+            n_threads=args.threads,
+            cache_dir=args.cache_dir,
+            cache_format=args.cache_format,
+            memory=args.memory,
+            allow_swapped_indel_alleles=args.allow_swapped_indel_alleles,
+        )
     elif args.method == 'finemap':
         if args.susie_outfile is not None:
             raise ValueError('--susie-outfile cannot be specified with finemap method')
@@ -1265,15 +1545,25 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
             raise ValueError('need to specify --finemap-exe')
         if args.hess:
             raise ValueError('FINEMAP cannot be used with --hess')
-        finemap_obj = FINEMAP_Wrapper(genotypes_file=args.geno, sumstats_file=args.sumstats, n=args.n, chr_num=args.chr, 
-                                    sample_file=args.sample_file, incl_samples=args.incl_samples,
-                                    ldstore_exe=args.ldstore2, finemap_exe=args.finemap_exe, n_threads=args.threads,
-                                    cache_dir=args.cache_dir, memory=args.memory,
-                                    allow_swapped_indel_alleles=args.allow_swapped_indel_alleles)
+        finemap_obj = FINEMAP_Wrapper(
+            genotypes_file=args.geno,
+            sumstats_file=args.sumstats,
+            n=args.n,
+            chr_num=args.chr,
+            sample_file=args.sample_file,
+            incl_samples=args.incl_samples,
+            ldstore_exe=args.ldstore2,
+            finemap_exe=args.finemap_exe,
+            n_threads=args.threads,
+            cache_dir=args.cache_dir,
+            cache_format=args.cache_format,
+            memory=args.memory,
+            allow_swapped_indel_alleles=args.allow_swapped_indel_alleles,
+        )
     else:
         raise ValueError('unknown method specified in --method')
-        
-    #run fine-mapping
+
+    # run fine-mapping
     df_finemap = finemap_obj.finemap(locus_start=args.start, locus_end=args.end, num_causal_snps=args.max_num_causal,
                  use_prior_causal_prob=not args.non_funct, prior_var=None,
                  hess=args.hess, hess_iter=args.hess_iter, hess_min_h2=args.hess_min_h2,
@@ -1287,8 +1577,4 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
     if args.out.endswith('.parquet'):
         df_finemap.to_parquet(args.out, index=False)
     else:
-        df_finemap.to_csv(args.out, sep='\t', index=False, float_format='%0.5e')
-
-
-
-
+        df_finemap.to_csv(args.out, sep="\t", index=False, float_format="%0.5e")
diff --git a/plink2.log b/plink2.log
new file mode 100644
index 0000000..9c204ca
--- /dev/null
+++ b/plink2.log
@@ -0,0 +1,12 @@
+PLINK v2.00a6LM 64-bit Intel (2 Mar 2024)
+Options in effect:
+  --v
+
+Hostname: DESKTOP-JUVLFQO
+Working directory: /home/xutingfeng/github_code/others/polyfun
+Start time: Sun Mar 17 16:37:08 2024
+
+Error: Unrecognized flag ('--v').
+For more info, try "plink2 --help <flag name>" or "plink2 --help | more".
+
+End time: Sun Mar 17 16:37:08 2024