Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.idea/
venv/
data/

3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ NB:
## Coverage

To extract the coverage from gVCF files, the following steps are required.
Coverage is measuered in read depth. We use the `MIN_DP` field where
available (non-variant regions) with a fallback to the `DP` field
(variants).

- gvcf2coverage:
- `gvcf2coverage < {input} > {output}`
Expand Down
21 changes: 16 additions & 5 deletions gvcf2coverage/Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
.PHONY: install
TARGET = gvcf2coverage

gvcf2coverage: src/main.c
$(CC) -o $@ -Wall -Wextra -Wpedantic -std=c99 -O3 -I$(HTSLIB_INCDIR) -L$(HTSLIB_LIBDIR) src/main.c -lhts
CC = gcc
CFLAGS = -std=c99 -march=native -Wall -Wextra -Wpedantic \
-Wformat=2 -Wshadow -Wwrite-strings -Wstrict-prototypes \
-Wold-style-definition -Wredundant-decls -Wnested-externs \
-O3 -DNDEBUG

install: gvcf2coverage
.PHONY: clean install

$(TARGET): src/main.c
$(CC) -o $@ $(CFLAGS) -I$(HTSLIB_INCDIR) -L$(HTSLIB_LIBDIR) $< -lhts

clean:
rm $(TARGET)

install: $(TARGET)
mkdir -p $(PREFIX)/bin
install -m 755 gvcf2coverage $(PREFIX)/bin
install -m 755 $< $(PREFIX)/bin
26 changes: 14 additions & 12 deletions gvcf2coverage/src/main.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// VERSION 0.1
// VERSION 0.4

#include <stdbool.h> // bool, false, true
#include <stddef.h> // size_t
Expand Down Expand Up @@ -120,7 +120,6 @@ main(int argc, char* argv[])
int32_t* gt = NULL;

bool first = true;
bool jump = false;

int window_start = 0;
int window_end = 0;
Expand All @@ -130,10 +129,14 @@ main(int argc, char* argv[])
while (0 == bcf_read(fh, hdr, rec))
{
int32_t depth = 0;
if (1 == bcf_get_format_int32(hdr, rec, "DP", &dp, &(int){0}))
if (1 == bcf_get_format_int32(hdr, rec, "MIN_DP", &dp, &(int){0}))
{
depth = dp[0];
} // if
else if (1 == bcf_get_format_int32(hdr, rec, "DP", &dp, &(int){0}))
{
depth = dp[0];
} // if

//
// If depth is below the threshold, no need to proceed
Expand Down Expand Up @@ -174,7 +177,7 @@ main(int argc, char* argv[])
} // if

//
// We just started
// Open window for first entry
//
if (first)
{
Expand All @@ -183,18 +186,18 @@ main(int argc, char* argv[])
window_chrom = chrom;
window_ploidy = ploidy;

// eprint(f"First! c:{window_chrom} s:{start}, w_s={window_start} e:{end} w_e={window_end}")

first = false;
continue;
} // if

//
// Detect if we need to close and open a new window
//
if (window_chrom != chrom ||
window_ploidy != ploidy ||
window_end + distance < start)
{
jump = true;
// eprint(f"Chrom changed from {window_chrom} to {chrom}.")
// Close the window
(void) fprintf(stdout, "%s\t%d\t%d\t%d\n", window_chrom, window_start, window_end, window_ploidy);

window_start = start;
Expand All @@ -204,17 +207,16 @@ main(int argc, char* argv[])
} // if
else
{
jump = false;
// Extend the window
window_start = imin(window_start, start);
window_end = imax(window_end, end);
// eprint(f"No jump! s:{start}, w_s={window_start} e:{end} w_e={window_end}")
} // else
} // while

//
// If the last iteration of the loop was not a jump, we still need to print
// Always print the last entry when merging, except if there were no entries
//
if (merge && !jump)
if (merge && !first)
{
(void) fprintf(stdout, "%s\t%d\t%d\t%d\n", window_chrom, window_start, window_end, window_ploidy);
} // if
Expand Down
40 changes: 15 additions & 25 deletions pygvcf2coverage/pygvcf2coverage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,14 @@ def gvcf2coverage(threshold, merge, distance):
jump = False

# Depth
dp = entry.format('DP')
dp = entry.format('MIN_DP')

if dp is None:
depth = 0
dp = entry.format('DP')
if dp is None:
depth = 0
else:
depth = dp[0][0]
else:
depth = dp[0][0]

Expand All @@ -56,51 +60,37 @@ def gvcf2coverage(threshold, merge, distance):
continue

#
# We just started
# Open window for first entry
#
if first:
# First entry
window_start = start
window_end = end
window_chrom = chrom
window_ploidy = ploidy

first = False

# eprint(f"First! c:{window_chrom} s:{start}, w_s={window_start} e:{end} w_e={window_end}")

continue

if window_chrom != chrom:
# eprint(f"Chrom changed from {window_chrom} to {chrom}.")
jump = True

elif window_ploidy != ploidy:
# eprint(f"Ploidy changed from {window_ploidy} to {ploidy}")
jump = True

elif window_end + distance < start:
# eprint("Gap! (window_end:%d < start:%d)" % (window_end + distance, start))
jump = True

if jump:
# eprint("Jump!")
#
# Detect if we need to close and open a new window
#
if window_chrom != chrom or window_ploidy != ploidy or window_end + distance < start:
# Close the window
print(window_chrom, window_start, window_end, window_ploidy, sep="\t")

window_start = start
window_end = end
window_chrom = chrom
window_ploidy = ploidy

else:
# Extend the window
window_start = min(window_start, start)
window_end = max(window_end, end)
# eprint(f"No jump! s:{start}, w_s={window_start} e:{end} w_e={window_end}")

#
# If the last iteration of the loop was not a jump, we still need to print
# Always print the last entry when merging, except if there were no entries
#
if merge and not jump:
if merge and not first:
print(window_chrom, window_start, window_end, window_ploidy, sep="\t")


Expand Down
2 changes: 1 addition & 1 deletion pygvcf2coverage/setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup

setup(name='pygvcf2coverage',
version='0.3',
version='0.4',
description='A python tool to extract coverage from gvcf files.',
url='http://github.com/varda/varda2_preprocessing',
author='Mark Santcroos',
Expand Down