Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
CC= gcc
CFLAGS= -g -Wall -O2
#LDFLAGS= -Wl,-rpath,\$$ORIGIN/../lib
DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=1
DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=1 -DKNETFILE_HOOKS
KNETFILE_O= knetfile.o
LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o bedidx.o \
Expand Down
2 changes: 2 additions & 0 deletions bam.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ bam_header_t *bam_header_read(bamFile fp)
char buf[4];
int magic_len;
int32_t i = 1, name_len;
#if 0
// check EOF
i = bgzf_check_EOF(fp);
if (i < 0) {
Expand All @@ -92,6 +93,7 @@ bam_header_t *bam_header_read(bamFile fp)
if (errno != ESPIPE) perror("[bam_header_read] bgzf_check_EOF");
}
else if (i == 0) fprintf(stderr, "[bam_header_read] EOF marker is absent. The input is probably truncated.\n");
#endif
// read "BAM1"
magic_len = bam_read(fp, buf, 4);
if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) {
Expand Down
102 changes: 90 additions & 12 deletions bam_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,55 @@ void bam_index_save(const bam_index_t *idx, FILE *fp)
fflush(fp);
}

static bam_index_t *bam_index_load_core(FILE *fp)
typedef size_t (*index_read_f)(void *ptr, size_t size, size_t nmemb, void *fp);

#ifdef KNETFILE_HOOKS
// Use buffered knetfile I/O instead of saving index file to local directory.
#define KNETBUFSIZE (1024 * 1024)
struct knet_buf {
knetFile *fp; // knetFile (may belong to knet_alt_* hooks)
int offset; // offset of first buffered byte that has not been read
int len; // number of unread buffered bytes
unsigned char eof; // set to 1 when we knet_read fewer bytes than expected
unsigned char buf[KNETBUFSIZE];
};

struct knet_buf *knet_buf_new(knetFile *fp)
{
struct knet_buf *kb = (struct knet_buf *)malloc(sizeof(struct knet_buf));
kb->fp = fp;
kb->offset = kb->len = kb->eof = 0;
memset(&(kb->buf[0]), 0, KNETBUFSIZE);
return kb;
}

size_t index_knet_read(void *ptr, size_t size, size_t nmemb, void *fp)
{
struct knet_buf *kb = fp;
size_t remaining = (size * nmemb);
while (remaining > 0) {
if (kb->len > 0) {
size_t count = (kb->len < remaining) ? kb->len : remaining;
memcpy(ptr, kb->buf+kb->offset, count);
ptr += count;
kb->offset += count;
kb->len -= count;
remaining -= count;
}
if (kb->eof)
break;
if (remaining > 0) {
kb->len = knet_read(kb->fp, kb->buf, KNETBUFSIZE);
kb->offset = 0;
if (kb->len < KNETBUFSIZE)
kb->eof = 1;
}
}
return ((size * nmemb) - remaining) / size;
}
#endif

static bam_index_t *bam_index_load_core(void *fp, index_read_f index_read)
{
int i;
char magic[4];
Expand All @@ -332,14 +380,13 @@ static bam_index_t *bam_index_load_core(FILE *fp)
fprintf(stderr, "[bam_index_load_core] fail to load index.\n");
return 0;
}
fread(magic, 1, 4, fp);
index_read(magic, 1, 4, fp);
if (strncmp(magic, "BAI\1", 4)) {
fprintf(stderr, "[bam_index_load] wrong magic number.\n");
fclose(fp);
return 0;
}
idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
fread(&idx->n, 4, 1, fp);
index_read(&idx->n, 4, 1, fp);
if (bam_is_be) bam_swap_endian_4p(&idx->n);
idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t));
Expand All @@ -352,18 +399,18 @@ static bam_index_t *bam_index_load_core(FILE *fp)
bam_binlist_t *p;
index = idx->index[i] = kh_init(i);
// load binning index
fread(&size, 4, 1, fp);
index_read(&size, 4, 1, fp);
if (bam_is_be) bam_swap_endian_4p(&size);
for (j = 0; j < (int)size; ++j) {
fread(&key, 4, 1, fp);
index_read(&key, 4, 1, fp);
if (bam_is_be) bam_swap_endian_4p(&key);
k = kh_put(i, index, key, &ret);
p = &kh_value(index, k);
fread(&p->n, 4, 1, fp);
index_read(&p->n, 4, 1, fp);
if (bam_is_be) bam_swap_endian_4p(&p->n);
p->m = p->n;
p->list = (pair64_t*)malloc(p->m * 16);
fread(p->list, 16, p->n, fp);
index_read(p->list, 16, p->n, fp);
if (bam_is_be) {
int x;
for (x = 0; x < p->n; ++x) {
Expand All @@ -373,15 +420,15 @@ static bam_index_t *bam_index_load_core(FILE *fp)
}
}
// load linear index
fread(&index2->n, 4, 1, fp);
index_read(&index2->n, 4, 1, fp);
if (bam_is_be) bam_swap_endian_4p(&index2->n);
index2->m = index2->n;
index2->offset = (uint64_t*)calloc(index2->m, 8);
fread(index2->offset, index2->n, 8, fp);
index_read(index2->offset, 8, index2->n, fp);
if (bam_is_be)
for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]);
}
if (fread(&idx->n_no_coor, 8, 1, fp) == 0) idx->n_no_coor = 0;
if (index_read(&idx->n_no_coor, 8, 1, fp) == 0) idx->n_no_coor = 0;
if (bam_is_be) bam_swap_endian_8p(&idx->n_no_coor);
return idx;
}
Expand Down Expand Up @@ -411,12 +458,13 @@ bam_index_t *bam_index_load_local(const char *_fn)
}
free(fnidx); free(fn);
if (fp) {
bam_index_t *idx = bam_index_load_core(fp);
bam_index_t *idx = bam_index_load_core(fp, (index_read_f)fread);
fclose(fp);
return idx;
} else return 0;
}

#ifndef KNETFILE_HOOKS
#ifdef _USE_KNETFILE
static void download_from_remote(const char *url)
{
Expand Down Expand Up @@ -454,10 +502,39 @@ static void download_from_remote(const char *url)
return;
}
#endif
#endif

bam_index_t *bam_index_load(const char *fn)
{
bam_index_t *idx;
#if (defined _USE_KNETFILE && defined KNETFILE_HOOKS)
if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn ||
strstr(fn, "https://") == fn) {
knetFile *fp;
struct knet_buf *kb;
size_t len = strlen(fn);
char *fnidx = (char*)calloc(len + 5, 1);
strcpy(fnidx, fn); strcat(fnidx, ".bai");
fp = knet_open(fnidx, "r");
if (fp == NULL && !strcmp(fn+len-4, ".bam")) {
char *fnidx2 = (char*)calloc(len, 1);
strcpy(fnidx2, fn);
strncpy(fnidx2+len-4, ".bai", 5);
fp = knet_open(fnidx2, "r");
if (fp == NULL) {
fprintf(stderr, "Unable to open index file for %s. Tried %s and %s.",
fn, fnidx, fnidx2);
return NULL;
}
}
kb = knet_buf_new(fp);
idx = bam_index_load_core(kb, index_knet_read);
knet_close(fp);
free(kb);
} else {
idx = bam_index_load_local(fn);
}
#else
idx = bam_index_load_local(fn);
if (idx == 0 && (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)) {
char *fnidx = calloc(strlen(fn) + 5, 1);
Expand All @@ -466,6 +543,7 @@ bam_index_t *bam_index_load(const char *fn)
download_from_remote(fnidx);
idx = bam_index_load_local(fn);
}
#endif
if (idx == 0) fprintf(stderr, "[bam_index_load] fail to load BAM index.\n");
return idx;
}
Expand Down
3 changes: 2 additions & 1 deletion bcftools/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ int bcf_cat(int n, char * const *fn)
h = bcf_hdr_read(in);
if (i == 0) bcf_hdr_write(out, h);
bcf_hdr_destroy(h);
#ifdef _USE_KNETFILE
#if (defined _USE_KNETFILE && !defined KNETFILE_HOOKS)
fstat(knet_fileno(in->fp->x.fpr), &s);
end = s.st_size - 28;
while (knet_tell(in->fp->x.fpr) < end) {
Expand All @@ -37,6 +37,7 @@ int bcf_cat(int n, char * const *fn)
fwrite(buf, 1, size, out->fp->x.fpw);
}
#else
fprintf(stderr, "Sorry, bcftools cat is not implemented unless compiled with _USE_KNETFILE without KNETFILE_HOOKS\n");
abort(); // FIXME: not implemented
#endif
bcf_close(in);
Expand Down
89 changes: 88 additions & 1 deletion knetfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,63 @@
#endif

#include "knetfile.h"
#include <stdint.h>
#include <fcntl.h>

#ifndef _WIN32
#define netread(fd, ptr, len) read(fd, ptr, len)
#define netwrite(fd, ptr, len) write(fd, ptr, len)
#define netclose(fd) close(fd)
#else
#include <winsock2.h>
#define netread(fd, ptr, len) recv(fd, ptr, len, 0)
#define netwrite(fd, ptr, len) send(fd, ptr, len, 0)
#define netclose(fd) closesocket(fd)
#endif

// FIXME: currently I/O is unbuffered

#define KNF_TYPE_LOCAL 1
#define KNF_TYPE_FTP 2
#define KNF_TYPE_HTTP 3

#ifdef KNETFILE_HOOKS
// Static global function pointers that may be set by knet_init_alt()
// to replace the usual knet functionality with alternate I/O implementation.
static knet_alt_open_f alt_open = NULL;
static knet_alt_dopen_f alt_dopen = NULL;
static knet_alt_read_f alt_read = NULL;
static knet_alt_seek_f alt_seek = NULL;
static knet_alt_tell_f alt_tell = NULL;
static knet_alt_close_f alt_close = NULL;

void knet_init_alt(knet_alt_open_f open, knet_alt_dopen_f dopen, knet_alt_read_f read,
knet_alt_seek_f seek, knet_alt_tell_f tell, knet_alt_close_f close)
{
alt_open = open;
alt_dopen = dopen;
alt_read = read;
alt_seek = seek;
alt_tell = tell;
alt_close = close;
}
#endif

struct knetFile_s {
int type, fd;
int64_t offset;
char *host, *port;

// the following are for FTP only
int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
char *response, *retr, *size_cmd;
int64_t seek_offset; // for lazy seek
int64_t file_size;

// the following are for HTTP only
char *path, *http_host;
}; // typedef'd to knetFile in knetfile.h


/* In winsock.h, the type of a socket is SOCKET, which is: "typedef
* u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed
Expand Down Expand Up @@ -85,6 +142,7 @@ static int socket_wait(int fd, int is_read)
* Guide to Network Programming" (http://beej.us/guide/bgnet/). */
static int socket_connect(const char *host, const char *port)
{
#define __err_connect_no_res(func) do { perror(func); return -1; } while (0)
#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)

int on = 1, fd;
Expand All @@ -95,7 +153,7 @@ static int socket_connect(const char *host, const char *port)
hints.ai_socktype = SOCK_STREAM;
/* In Unix/Mac, getaddrinfo() is the most convenient way to get
* server information. */
if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");
if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect_no_res("getaddrinfo");
if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
/* The following two setsockopt() are used by ftplib
* (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
Expand Down Expand Up @@ -450,6 +508,10 @@ int khttp_connect_file(knetFile *fp)

knetFile *knet_open(const char *fn, const char *mode)
{
#ifdef KNETFILE_HOOKS
if (alt_open)
return alt_open(fn, mode);
#endif
knetFile *fp = 0;
if (mode[0] != 'r') {
fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
Expand Down Expand Up @@ -494,6 +556,10 @@ knetFile *knet_open(const char *fn, const char *mode)

knetFile *knet_dopen(int fd, const char *mode)
{
#ifdef KNETFILE_HOOKS
if (alt_dopen)
return alt_dopen(fd, mode);
#endif
knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
fp->type = KNF_TYPE_LOCAL;
fp->fd = fd;
Expand All @@ -502,6 +568,10 @@ knetFile *knet_dopen(int fd, const char *mode)

off_t knet_read(knetFile *fp, void *buf, off_t len)
{
#ifdef KNETFILE_HOOKS
if (alt_read)
return alt_read(fp, buf, len);
#endif
off_t l = 0;
if (fp->fd == -1) return 0;
if (fp->type == KNF_TYPE_FTP) {
Expand Down Expand Up @@ -530,6 +600,10 @@ off_t knet_read(knetFile *fp, void *buf, off_t len)

off_t knet_seek(knetFile *fp, int64_t off, int whence)
{
#ifdef KNETFILE_HOOKS
if (alt_seek)
return alt_seek(fp, off, whence);
#endif
if (whence == SEEK_SET && off == fp->offset) return 0;
if (fp->type == KNF_TYPE_LOCAL) {
/* Be aware that lseek() returns the offset after seeking,
Expand Down Expand Up @@ -573,8 +647,21 @@ off_t knet_seek(knetFile *fp, int64_t off, int whence)
return -1;
}

off_t knet_tell(knetFile *fp)
{
#ifdef KNETFILE_HOOKS
if (alt_tell)
return alt_tell(fp);
#endif
return fp->offset;
}

int knet_close(knetFile *fp)
{
#ifdef KNETFILE_HOOKS
if (alt_close)
return alt_close(fp);
#endif
if (fp == 0) return 0;
if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
if (fp->fd != -1) {
Expand Down
Loading