From 38702539ac6b566296d8303ab940d5c42cc5f36b Mon Sep 17 00:00:00 2001 From: jkvis Date: Tue, 21 Jul 2020 11:33:12 +0200 Subject: [PATCH 1/5] Prefer MIN_DP over DP --- README.md | 3 +++ gvcf2coverage/Makefile | 21 ++++++++++++++++----- gvcf2coverage/src/main.c | 6 +++++- pygvcf2coverage/pygvcf2coverage/__init__.py | 8 ++++++-- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 03986a0..9c8d7e2 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,9 @@ NB: ## Coverage To extract the coverage from gVCF files, the following steps are required. +Coverage is measuered in read depth. We use the `MIN_DP` field where +available (non-variant regions) with a fallback to the `DP` field +(variants). - gvcf2coverage: - `gvcf2coverage < {input} > {output}` diff --git a/gvcf2coverage/Makefile b/gvcf2coverage/Makefile index 6d9e6f4..150f3b0 100644 --- a/gvcf2coverage/Makefile +++ b/gvcf2coverage/Makefile @@ -1,8 +1,19 @@ -.PHONY: install +TARGET = gvcf2coverage -gvcf2coverage: src/main.c - $(CC) -o $@ -Wall -Wextra -Wpedantic -std=c99 -O3 -I$(HTSLIB_INCDIR) -L$(HTSLIB_LIBDIR) src/main.c -lhts +CC = gcc +CFLAGS = -std=c99 -march=native -Wall -Wextra -Wpedantic \ + -Wformat=2 -Wshadow -Wwrite-strings -Wstrict-prototypes \ + -Wold-style-definition -Wredundant-decls -Wnested-externs \ + -O3 -DNDEBUG -install: gvcf2coverage +.PHONY: clean install + +$(TARGET): src/main.c + $(CC) -o $@ $(CFLAGS) -I$(HTSLIB_INCDIR) -L$(HTSLIB_LIBDIR) $< -lhts + +clean: + rm $(TARGET) + +install: $(TARGET) mkdir -p $(PREFIX)/bin - install -m 755 gvcf2coverage $(PREFIX)/bin + install -m 755 $< $(PREFIX)/bin diff --git a/gvcf2coverage/src/main.c b/gvcf2coverage/src/main.c index 1df134e..b47195e 100644 --- a/gvcf2coverage/src/main.c +++ b/gvcf2coverage/src/main.c @@ -130,10 +130,14 @@ main(int argc, char* argv[]) while (0 == bcf_read(fh, hdr, rec)) { int32_t depth = 0; - if (1 == bcf_get_format_int32(hdr, rec, "DP", &dp, &(int){0})) + if (1 == bcf_get_format_int32(hdr, rec, "MIN_DP", &dp, &(int){0})) { depth = dp[0]; } // if + else if (1 == bcf_get_format_int32(hdr, rec, "DP", &dp, &(int){0})) + { + depth = dp[0]; + } // if // // If depth is below the threshold, no need to proceed diff --git a/pygvcf2coverage/pygvcf2coverage/__init__.py b/pygvcf2coverage/pygvcf2coverage/__init__.py index c49cf90..7ac60ac 100644 --- a/pygvcf2coverage/pygvcf2coverage/__init__.py +++ b/pygvcf2coverage/pygvcf2coverage/__init__.py @@ -27,10 +27,14 @@ def gvcf2coverage(threshold, merge, distance): jump = False # Depth - dp = entry.format('DP') + dp = entry.format('MIN_DP') if dp is None: - depth = 0 + dp = entry.format('DP') + if dp is None: + depth = 0 + else + depth = dp[0][0] else: depth = dp[0][0] From 91867e52a555ad0eaf20d669b5c6314a916d42e1 Mon Sep 17 00:00:00 2001 From: jkvis Date: Wed, 22 Jul 2020 13:02:41 +0200 Subject: [PATCH 2/5] Fixed typo in Python --- .gitignore | 2 ++ pygvcf2coverage/pygvcf2coverage/__init__.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ae945fa..567ffb9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .idea/ venv/ +data/ + diff --git a/pygvcf2coverage/pygvcf2coverage/__init__.py b/pygvcf2coverage/pygvcf2coverage/__init__.py index 7ac60ac..39779f5 100644 --- a/pygvcf2coverage/pygvcf2coverage/__init__.py +++ b/pygvcf2coverage/pygvcf2coverage/__init__.py @@ -33,7 +33,7 @@ def gvcf2coverage(threshold, merge, distance): dp = entry.format('DP') if dp is None: depth = 0 - else + else: depth = dp[0][0] else: depth = dp[0][0] From 924e331578b5092481f5f0f1b4194a8ae53f16c8 Mon Sep 17 00:00:00 2001 From: jkvis Date: Wed, 22 Jul 2020 13:21:29 +0200 Subject: [PATCH 3/5] Fixed #5: output last coverage interval --- gvcf2coverage/src/main.c | 6 ++---- pygvcf2coverage/pygvcf2coverage/__init__.py | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/gvcf2coverage/src/main.c b/gvcf2coverage/src/main.c index b47195e..2621bf1 100644 --- a/gvcf2coverage/src/main.c +++ b/gvcf2coverage/src/main.c @@ -120,7 +120,6 @@ main(int argc, char* argv[]) int32_t* gt = NULL; bool first = true; - bool jump = false; int window_start = 0; int window_end = 0; @@ -197,7 +196,6 @@ main(int argc, char* argv[]) window_ploidy != ploidy || window_end + distance < start) { - jump = true; // eprint(f"Chrom changed from {window_chrom} to {chrom}.") (void) fprintf(stdout, "%s\t%d\t%d\t%d\n", window_chrom, window_start, window_end, window_ploidy); @@ -208,7 +206,6 @@ main(int argc, char* argv[]) } // if else { - jump = false; window_start = imin(window_start, start); window_end = imax(window_end, end); // eprint(f"No jump! s:{start}, w_s={window_start} e:{end} w_e={window_end}") @@ -217,8 +214,9 @@ main(int argc, char* argv[]) // // If the last iteration of the loop was not a jump, we still need to print + // JKV: I'm not sure about the jump // - if (merge && !jump) + if (merge) { (void) fprintf(stdout, "%s\t%d\t%d\t%d\n", window_chrom, window_start, window_end, window_ploidy); } // if diff --git a/pygvcf2coverage/pygvcf2coverage/__init__.py b/pygvcf2coverage/pygvcf2coverage/__init__.py index 39779f5..2a70f02 100644 --- a/pygvcf2coverage/pygvcf2coverage/__init__.py +++ b/pygvcf2coverage/pygvcf2coverage/__init__.py @@ -103,8 +103,9 @@ def gvcf2coverage(threshold, merge, distance): # # If the last iteration of the loop was not a jump, we still need to print + # JKV: I'm not sure about the jump # - if merge and not jump: + if merge: print(window_chrom, window_start, window_end, window_ploidy, sep="\t") From d0331a47b139dbcca05ff10dfc62094893a8dbdf Mon Sep 17 00:00:00 2001 From: Mark Santcroos Date: Mon, 3 Aug 2020 21:54:24 +0200 Subject: [PATCH 4/5] Fix for no entries and sync python and c. --- gvcf2coverage/src/main.c | 16 +++++----- pygvcf2coverage/pygvcf2coverage/__init__.py | 33 ++++++--------------- 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/gvcf2coverage/src/main.c b/gvcf2coverage/src/main.c index 2621bf1..aba0557 100644 --- a/gvcf2coverage/src/main.c +++ b/gvcf2coverage/src/main.c @@ -177,7 +177,7 @@ main(int argc, char* argv[]) } // if // - // We just started + // Open window for first entry // if (first) { @@ -186,17 +186,18 @@ main(int argc, char* argv[]) window_chrom = chrom; window_ploidy = ploidy; - // eprint(f"First! c:{window_chrom} s:{start}, w_s={window_start} e:{end} w_e={window_end}") - first = false; continue; } // if + // + // Detect if we need to close and open a new window + // if (window_chrom != chrom || window_ploidy != ploidy || window_end + distance < start) { - // eprint(f"Chrom changed from {window_chrom} to {chrom}.") + // Close the window (void) fprintf(stdout, "%s\t%d\t%d\t%d\n", window_chrom, window_start, window_end, window_ploidy); window_start = start; @@ -206,17 +207,16 @@ main(int argc, char* argv[]) } // if else { + // Extend the window window_start = imin(window_start, start); window_end = imax(window_end, end); - // eprint(f"No jump! s:{start}, w_s={window_start} e:{end} w_e={window_end}") } // else } // while // - // If the last iteration of the loop was not a jump, we still need to print - // JKV: I'm not sure about the jump + // Always print the last entry when merging, except if there were no entries // - if (merge) + if (merge && !first) { (void) fprintf(stdout, "%s\t%d\t%d\t%d\n", window_chrom, window_start, window_end, window_ploidy); } // if diff --git a/pygvcf2coverage/pygvcf2coverage/__init__.py b/pygvcf2coverage/pygvcf2coverage/__init__.py index 2a70f02..60897ef 100644 --- a/pygvcf2coverage/pygvcf2coverage/__init__.py +++ b/pygvcf2coverage/pygvcf2coverage/__init__.py @@ -60,52 +60,37 @@ def gvcf2coverage(threshold, merge, distance): continue # - # We just started + # Open window for first entry # if first: - # First entry window_start = start window_end = end window_chrom = chrom window_ploidy = ploidy first = False - - # eprint(f"First! c:{window_chrom} s:{start}, w_s={window_start} e:{end} w_e={window_end}") - continue - if window_chrom != chrom: - # eprint(f"Chrom changed from {window_chrom} to {chrom}.") - jump = True - - elif window_ploidy != ploidy: - # eprint(f"Ploidy changed from {window_ploidy} to {ploidy}") - jump = True - - elif window_end + distance < start: - # eprint("Gap! (window_end:%d < start:%d)" % (window_end + distance, start)) - jump = True - - if jump: - # eprint("Jump!") + # + # Detect if we need to close and open a new window + # + if window_chrom != chrom or window_ploidy != ploidy or window_end + distance < start: + # Close the window print(window_chrom, window_start, window_end, window_ploidy, sep="\t") window_start = start window_end = end window_chrom = chrom window_ploidy = ploidy - else: + # Extend the window window_start = min(window_start, start) window_end = max(window_end, end) - # eprint(f"No jump! s:{start}, w_s={window_start} e:{end} w_e={window_end}") # - # If the last iteration of the loop was not a jump, we still need to print - # JKV: I'm not sure about the jump + # Always print the last entry when merging, except if there were no entries # - if merge: + if merge and not first: print(window_chrom, window_start, window_end, window_ploidy, sep="\t") From ed71be06178a533ae9532e274ebc3e81e1326fc4 Mon Sep 17 00:00:00 2001 From: Mark Santcroos Date: Mon, 3 Aug 2020 22:07:39 +0200 Subject: [PATCH 5/5] Bump versions. --- gvcf2coverage/src/main.c | 2 +- pygvcf2coverage/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gvcf2coverage/src/main.c b/gvcf2coverage/src/main.c index aba0557..9fe6a65 100644 --- a/gvcf2coverage/src/main.c +++ b/gvcf2coverage/src/main.c @@ -1,4 +1,4 @@ -// VERSION 0.1 +// VERSION 0.4 #include // bool, false, true #include // size_t diff --git a/pygvcf2coverage/setup.py b/pygvcf2coverage/setup.py index 7cce966..ab2867e 100644 --- a/pygvcf2coverage/setup.py +++ b/pygvcf2coverage/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='pygvcf2coverage', - version='0.3', + version='0.4', description='A python tool to extract coverage from gvcf files.', url='http://github.com/varda/varda2_preprocessing', author='Mark Santcroos',