From c8c5c05814db54431244223c77c41a39c705a84c Mon Sep 17 00:00:00 2001 From: Vineet Bansal Date: Mon, 22 Aug 2022 17:57:52 -0400 Subject: [PATCH 1/5] A better bcftools version check (HATCHet seems to work with bcftools>=1.11) --- pyproject.toml | 2 +- src/hatchet/__init__.py | 2 +- src/hatchet/utils/check.py | 38 ++++++++++++++++++++++---------------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 06324d4e..6e459162 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "hatchet" -version = "1.0.3" +version = "1.0.4" authors = [ { name="Simone Zaccaria", email="s.zaccaria@ucl.ac.uk" }, { name="Ben Raphael", email="braphael@cs.princeton.edu" }, diff --git a/src/hatchet/__init__.py b/src/hatchet/__init__.py index c8fc7edb..c5d42434 100644 --- a/src/hatchet/__init__.py +++ b/src/hatchet/__init__.py @@ -1,4 +1,4 @@ -__version__ = '1.0.3' +__version__ = '1.0.4' import os.path from importlib.resources import path diff --git a/src/hatchet/utils/check.py b/src/hatchet/utils/check.py index 1cf99253..fc69823a 100644 --- a/src/hatchet/utils/check.py +++ b/src/hatchet/utils/check.py @@ -29,7 +29,7 @@ def suppress_stdout(): sys.stderr = old_stderr -def _check_cmd(exe_path, exe_name, *args): +def _check_cmd(exe_path, exe_name, cwd=None, *args): # This function should never raise Exceptions unless it's a genuine implementation bug # Only use exe and args that return a return code of 0 # Use exe_path as '' if you expect to be on PATH @@ -39,6 +39,7 @@ def _check_cmd(exe_path, exe_name, *args): cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, + cwd=cwd, universal_newlines=True, ) p.communicate() @@ -49,14 +50,14 @@ def _check_cmd(exe_path, exe_name, *args): return True -# Most command-line commands can be checked using _check_cmd(, , '--version') +# Most command-line commands can be checked using _check_cmd(, , , '--version') # Others, like below, need special handling because they have no simple invocations that return 0 def _check_tabix(): with tempfile.TemporaryDirectory() as tempdirname: with importlib.resources.path(hatchet.data, 'sample.sorted.gff.gz') as gz_path: _temp_gz_path = os.path.join(tempdirname, 'sample.sorted.gff.gz') shutil.copy(gz_path, _temp_gz_path) - return _check_cmd(config.paths.tabix, 'tabix', '-p', 'gff', _temp_gz_path, '-f') + return _check_cmd(config.paths.tabix, 'tabix', None, '-p', 'gff', _temp_gz_path, '-f') def _check_bgzip(): @@ -64,7 +65,7 @@ def _check_bgzip(): with importlib.resources.path(hatchet.data, 'sample.bbc') as file_path: _temp_file_path = os.path.join(tempdirname, 'sample.bbc') shutil.copy(file_path, _temp_file_path) - return _check_cmd(config.paths.bgzip, 'bgzip', _temp_file_path, '-f') + return _check_cmd(config.paths.bgzip, 'bgzip', None, _temp_file_path, '-f') def _check_picard(): @@ -92,6 +93,7 @@ def _check_picard(): return _check_cmd( exe_path, exe_name, + None, *args_pre, 'BuildBamIndex', '--INPUT', @@ -101,6 +103,14 @@ def _check_picard(): ) +def _check_bcftools(): + # The bcftools version we select should be capable of querying remote .vcf.gz files while also specifying + # a region; This is the use case in HATCHet's genotype_snps step; Seems to work with bcftools>=1.11 + with tempfile.TemporaryDirectory() as tempdirname: + return _check_cmd(config.paths.bcftools, 'bcftools', tempdirname, 'query', '-f', '\'%CHROM\t%POS\n\'', + '-r', '7', 'https://ftp.ncbi.nih.gov/snp/organisms/archive/apple_3750/VCF/00-All.vcf.gz') + + def _check_python_import(which): try: importlib.import_module(which) @@ -131,6 +141,7 @@ def _check_python_import(which): _check_cmd, config.paths.samtools, 'samtools', + None, '--version', ), ( @@ -142,6 +153,7 @@ def _check_python_import(which): _check_cmd, config.paths.mosdepth, 'mosdepth', + None, '--version', ), ], @@ -155,6 +167,7 @@ def _check_python_import(which): _check_cmd, config.paths.samtools, 'samtools', + None, '--version', ), ( @@ -163,10 +176,7 @@ def _check_python_import(which): 'Please install bcftools executable and either ensure its on your PATH, or its location specified in ' 'hatchet.ini as config.paths.bcftools, or its location specified using the environment variable ' 'HATCHET_PATHS_BCFTOOLS', - _check_cmd, - config.paths.bcftools, - 'bcftools', - '--version', + _check_bcftools, ), ], 'count-alleles': [ @@ -179,6 +189,7 @@ def _check_python_import(which): _check_cmd, config.paths.samtools, 'samtools', + None, '--version', ), ( @@ -187,10 +198,7 @@ def _check_python_import(which): 'Please install bcftools executable and either ensure its on your PATH, or its location specified in ' 'hatchet.ini as config.paths.bcftools, or its location specified using the environment variable ' 'HATCHET_PATHS_BCFTOOLS', - _check_cmd, - config.paths.bcftools, - 'bcftools', - '--version', + _check_bcftools, ), ], 'phase-snps': [ @@ -200,10 +208,7 @@ def _check_python_import(which): 'Please install bcftools executable and either ensure its on your PATH, or its location specified in ' 'hatchet.ini as config.paths.samtools, or its location specified using the environment variable ' 'HATCHET_PATHS_BCFTOOLS', - _check_cmd, - config.paths.bcftools, - 'bcftools', - '--version', + _check_bcftools, ), ( 'picard', @@ -223,6 +228,7 @@ def _check_python_import(which): _check_cmd, config.paths.shapeit, 'shapeit', + None, '--version', ), ( From e7b0340dbf4c6e70e1da11a1c98c260296fa6c5e Mon Sep 17 00:00:00 2001 From: Vineet Bansal Date: Mon, 22 Aug 2022 18:34:15 -0400 Subject: [PATCH 2/5] bumped samtools/bcftools version in CI to 1.11 --- .github/workflows/main.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ab5dae33..fe362f4a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -75,17 +75,17 @@ jobs: - name: Install SAMtools run: | - wget https://sourceforge.net/projects/samtools/files/samtools/1.7/samtools-1.7.tar.bz2/download -O samtools-1.7.tar.bz2 - tar xvjf samtools-1.7.tar.bz2 - (cd samtools-1.7 && ./configure && make) - echo "HATCHET_PATHS_SAMTOOLS=$(realpath samtools-1.7)" >> $GITHUB_ENV + wget https://sourceforge.net/projects/samtools/files/samtools/1.11/samtools-1.11.tar.bz2/download -O samtools-1.11.tar.bz2 + tar xvjf samtools-1.11.tar.bz2 + (cd samtools-1.11 && ./configure && make) + echo "HATCHET_PATHS_SAMTOOLS=$(realpath samtools-1.11)" >> $GITHUB_ENV - name: Install BCFTools run: | - wget https://sourceforge.net/projects/samtools/files/samtools/1.7/bcftools-1.7.tar.bz2/download -O bcftools-1.7.tar.bz2 - tar xvjf bcftools-1.7.tar.bz2 - (cd bcftools-1.7 && ./configure && make) - echo "HATCHET_PATHS_BCFTOOLS=$(realpath bcftools-1.7)" >> $GITHUB_ENV + wget https://sourceforge.net/projects/samtools/files/samtools/1.11/bcftools-1.11.tar.bz2/download -O bcftools-1.11.tar.bz2 + tar xvjf bcftools-1.11.tar.bz2 + (cd bcftools-1.11 && ./configure && make) + echo "HATCHET_PATHS_BCFTOOLS=$(realpath bcftools-1.11)" >> $GITHUB_ENV - name: Install tabix run: | From baaadaa5155d330737fb48a12a8d2038f7c8d5c7 Mon Sep 17 00:00:00 2001 From: Vineet Bansal Date: Mon, 22 Aug 2022 18:38:15 -0400 Subject: [PATCH 3/5] precommit checked --- src/hatchet/utils/check.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/hatchet/utils/check.py b/src/hatchet/utils/check.py index fc69823a..8a7e4793 100644 --- a/src/hatchet/utils/check.py +++ b/src/hatchet/utils/check.py @@ -107,8 +107,17 @@ def _check_bcftools(): # The bcftools version we select should be capable of querying remote .vcf.gz files while also specifying # a region; This is the use case in HATCHet's genotype_snps step; Seems to work with bcftools>=1.11 with tempfile.TemporaryDirectory() as tempdirname: - return _check_cmd(config.paths.bcftools, 'bcftools', tempdirname, 'query', '-f', '\'%CHROM\t%POS\n\'', - '-r', '7', 'https://ftp.ncbi.nih.gov/snp/organisms/archive/apple_3750/VCF/00-All.vcf.gz') + return _check_cmd( + config.paths.bcftools, + 'bcftools', + tempdirname, + 'query', + '-f', + "'%CHROM\t%POS\n'", + '-r', + '7', + 'https://ftp.ncbi.nih.gov/snp/organisms/archive/apple_3750/VCF/00-All.vcf.gz', + ) def _check_python_import(which): From 560e27f900c3ae8cdcf1ddba30bf4c2b524fbdd7 Mon Sep 17 00:00:00 2001 From: Vineet Bansal Date: Mon, 22 Aug 2022 21:18:35 -0400 Subject: [PATCH 4/5] checking bcftools version directly --- src/hatchet/utils/check.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/hatchet/utils/check.py b/src/hatchet/utils/check.py index 8a7e4793..b4380a5b 100644 --- a/src/hatchet/utils/check.py +++ b/src/hatchet/utils/check.py @@ -7,6 +7,7 @@ from contextlib import contextmanager import tempfile import shutil +from packaging import version from hatchet import config import hatchet.data @@ -105,19 +106,13 @@ def _check_picard(): def _check_bcftools(): # The bcftools version we select should be capable of querying remote .vcf.gz files while also specifying - # a region; This is the use case in HATCHet's genotype_snps step; Seems to work with bcftools>=1.11 - with tempfile.TemporaryDirectory() as tempdirname: - return _check_cmd( - config.paths.bcftools, - 'bcftools', - tempdirname, - 'query', - '-f', - "'%CHROM\t%POS\n'", - '-r', - '7', - 'https://ftp.ncbi.nih.gov/snp/organisms/archive/apple_3750/VCF/00-All.vcf.gz', - ) + # a region; This is the use case in HATCHet's genotype_snps step; This seems to work with bcftools>=1.11 + try: + cmd = os.path.join(config.paths.bcftools, 'bcftools') + bcftools_version = subprocess.check_output([cmd, '--version-only']).decode('utf-8') + return version.parse(bcftools_version) >= version.parse('1.11') + except: # noqa: E722 + return False def _check_python_import(which): @@ -182,7 +177,7 @@ def _check_python_import(which): ( 'bcftools', '', - 'Please install bcftools executable and either ensure its on your PATH, or its location specified in ' + 'Please install bcftools>=1.11 executable and either ensure its on your PATH, or its location specified in ' 'hatchet.ini as config.paths.bcftools, or its location specified using the environment variable ' 'HATCHET_PATHS_BCFTOOLS', _check_bcftools, From 50d2d1c2caab0a3509825d84b9abb533c159667b Mon Sep 17 00:00:00 2001 From: Vineet Bansal Date: Tue, 23 Aug 2022 10:09:20 -0400 Subject: [PATCH 5/5] bumped cache key --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fe362f4a..6d2199b5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -59,7 +59,7 @@ jobs: uses: actions/cache@v2 with: path: testdata - key: hatchetcache04 + key: hatchetcache05 - name: Download Testing Data if: steps.cache-test-data.outputs.cache-hit != 'true'