Skip to content

Add in-memory I/O using hFILE fixed buffers #590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 133 additions & 18 deletions hfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,15 @@ then there is a non-empty read buffer, and if begin == end then both buffers
are empty. In all cases, the stream's file position indicator corresponds
to the position pointed to by begin.

The above is the normal scenario of a mobile window. For in-memory streams,
a fixed (immobile) buffer can be used as the full contents without any separate
backend behind it. These always have at_eof set, offset set to 0, need no
read() method, and should just return EINVAL for seek():
The above is the normal scenario of a mobile window. For in-memory
streams (eg via hfile_init_fixed) the buffer can be used as the full
contents without any separate backend behind it. These always have at_eof
set, offset set to 0, need no read() method, and should just return EINVAL
for seek():

abcdefghijkLMNOPQRSTUVWXYZ------
^buffer ^begin ^end ^limit

Use hfile_init_fixed() to create one of these. */
*/

hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
{
Expand Down Expand Up @@ -138,6 +138,8 @@ hFILE *hfile_init_fixed(size_t struct_size, const char *mode,
return fp;
}

static const struct hFILE_backend mem_backend;

void hfile_destroy(hFILE *fp)
{
int save = errno;
Expand Down Expand Up @@ -404,7 +406,7 @@ off_t hseek(hFILE *fp, off_t offset, int whence)
{
off_t curpos, pos;

if (writebuffer_is_nonempty(fp)) {
if (writebuffer_is_nonempty(fp) && fp->mobile) {
int ret = flush_buffer(fp);
if (ret < 0) return ret;
}
Expand Down Expand Up @@ -615,6 +617,56 @@ static hFILE *hopen_fd(const char *filename, const char *mode)
return NULL;
}

// Loads the contents of filename to produced a read-only, in memory,
// immobile hfile. fp is the already opened file. We always close this
// input fp, irrespective of whether we error or whether we return a new
// immobile hfile.
static hFILE *hpreload(hFILE *fp) {
hFILE *mem_fp;
char *buf = NULL;
off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len;

for (;;) {
if (buf_a - buf_sz < 5000) {
buf_a += buf_inc;
char *t = realloc(buf, buf_a);
if (!t) goto err;
buf = t;
if (buf_inc < 1000000) buf_inc *= 1.3;
}
len = hread(fp, buf+buf_sz, buf_a-buf_sz);
if (len > 0)
buf_sz += len;
else
break;
}

if (len < 0) goto err;
mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a);
if (!mem_fp) goto err;
mem_fp->backend = &mem_backend;

if (hclose(fp) < 0) {
hclose_abruptly(mem_fp);
goto err;
}
return mem_fp;

err:
free(buf);
hclose_abruptly(fp);
return NULL;
}

static int is_preload_url_remote(const char *url){
return hisremote(url + 8); // len("preload:") = 8
}

static hFILE *hopen_preload(const char *url, const char *mode){
hFILE* fp = hopen(url + 8, mode);
return hpreload(fp);
}

hFILE *hdopen(int fd, const char *mode)
{
hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
Expand Down Expand Up @@ -711,6 +763,16 @@ static int cmp_prefix(const char *key, const char *s)
return 0;
}

static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size)
{
hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size);
if (fp == NULL)
return NULL;

fp->base.backend = &mem_backend;
return &fp->base;
}

static hFILE *hopen_mem(const char *url, const char *mode)
{
size_t length, size;
Expand All @@ -734,13 +796,59 @@ static hFILE *hopen_mem(const char *url, const char *mode)
if (buffer == NULL) return NULL;
hts_decode_percent(buffer, &length, data);
}
hFILE* hf;

hFILE_mem *fp = (hFILE_mem *)
hfile_init_fixed(sizeof (hFILE_mem), mode, buffer, length, size);
if (fp == NULL) { free(buffer); return NULL; }
if(!(hf = create_hfile_mem(buffer, mode, length, size))){
free(buffer);
return NULL;
}

fp->base.backend = &mem_backend;
return &fp->base;
return hf;
}

hFILE *hopenv_mem(const char *filename, const char *mode, va_list args)
{
char* buffer = va_arg(args, char*);
size_t sz = va_arg(args, size_t);
va_end(args);

hFILE* hf;

if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){
free(buffer);
return NULL;
}

return hf;
}

char *hfile_mem_get_buffer(hFILE *file, size_t *length) {
if (file->backend != &mem_backend) {
errno = EINVAL;
return NULL;
}

if (length)
*length = file->buffer - file->limit;

return file->buffer;
}

char *hfile_mem_steal_buffer(hFILE *file, size_t *length) {
char *buf = hfile_mem_get_buffer(file, length);
if (buf)
file->buffer = NULL;
return buf;
}

int hfile_plugin_init_mem(struct hFILE_plugin *self)
{
// mem files are declared remote so they work with a tabix index
static const struct hFILE_scheme_handler handler =
{NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem};
self->name = "mem";
hfile_add_scheme_handler("mem", &handler);
return 0;
}


Expand Down Expand Up @@ -825,14 +933,17 @@ static void load_hfile_plugins()
{
static const struct hFILE_scheme_handler
data = { hopen_mem, hfile_always_local, "built-in", 80 },
file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 };
file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 },
preload = { hopen_preload, is_preload_url_remote, "built-in", 80 };

schemes = kh_init(scheme_string);
if (schemes == NULL) abort();

hfile_add_scheme_handler("data", &data);
hfile_add_scheme_handler("file", &file);
hfile_add_scheme_handler("preload", &preload);
init_add_plugin(NULL, hfile_plugin_init_net, "knetfile");
init_add_plugin(NULL, hfile_plugin_init_mem, "mem");

#ifdef ENABLE_PLUGINS
struct hts_path_itr path;
Expand Down Expand Up @@ -908,21 +1019,25 @@ static const struct hFILE_scheme_handler *find_scheme_handler(const char *s)

hFILE *hopen(const char *fname, const char *mode, ...)
{
hFILE *fp = NULL;
Copy link
Contributor

@jkbonfield jkbonfield Dec 14, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Edit: ignore that - confused by diff here not showing entire function.


const struct hFILE_scheme_handler *handler = find_scheme_handler(fname);
if (handler) {
if (strchr(mode, ':') == NULL) return handler->open(fname, mode);
if (strchr(mode, ':') == NULL) fp = handler->open(fname, mode);
else if (handler->priority >= 2000 && handler->vopen) {
hFILE *fp;
va_list arg;
va_start(arg, mode);
fp = handler->vopen(fname, mode, arg);
va_end(arg);
return fp;
}
else { errno = ENOTSUP; return NULL; }
}
else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
else return hopen_fd(fname, mode);
else if (strcmp(fname, "-") == 0) fp = hopen_fd_stdinout(mode);
else fp = hopen_fd(fname, mode);

if (!fp) return NULL;

return fp;
}

int hfile_always_local (const char *fname) { return 0; }
Expand Down
30 changes: 28 additions & 2 deletions htslib/hfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ hread(hFILE *fp, void *buffer, size_t nbytes)
if (n > nbytes) n = nbytes;
memcpy(buffer, fp->begin, n);
fp->begin += n;
return (n == nbytes)? (ssize_t) n : hread2(fp, buffer, nbytes, n);
return (n == nbytes || !fp->mobile)? (ssize_t) n : hread2(fp, buffer, nbytes, n);
}

/// Write a character to the stream
Expand Down Expand Up @@ -239,7 +239,15 @@ static inline ssize_t HTS_RESULT_USED
hwrite(hFILE *fp, const void *buffer, size_t nbytes)
{
extern ssize_t hwrite2(hFILE *, const void *, size_t, size_t);

extern int hfile_set_blksize(hFILE *fp, size_t bufsiz);

if(!fp->mobile){
if (fp->limit - fp->begin < nbytes){
hfile_set_blksize(fp, fp->limit - fp->buffer + nbytes);
fp->end = fp->limit;
}
}

size_t n = fp->limit - fp->begin;
if (n > nbytes) n = nbytes;
memcpy(fp->begin, buffer, n);
Expand All @@ -254,6 +262,24 @@ This includes low-level flushing such as via `fdatasync(2)`.
*/
int hflush(hFILE *fp) HTS_RESULT_USED;

/// For hfile_mem: get the internal buffer and it's size from a hfile
/** @return buffer if successful, or NULL if an error occurred

The buffer returned should not be freed as this will happen when the
hFILE is closed.
*/
char *hfile_mem_get_buffer(hFILE *file, size_t *length);

/// For hfile_mem: get the internal buffer and it's size from a hfile.
/** @return buffer if successful, or NULL if an error occurred

This is similar to hfile_mem_get_buffer except that ownership of the
buffer is granted to the caller, who now has responsibility for freeing
it. From this point onwards, the hFILE should not be used for any
purpose other than closing.
*/
char *hfile_mem_steal_buffer(hFILE *file, size_t *length);

#ifdef __cplusplus
}
#endif
Expand Down
41 changes: 41 additions & 0 deletions test/hfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,47 @@ int main(void)
if ((c = hgetc(fin)) != EOF) fail("chars: hgetc (EOF) returned %d", c);
if (hclose(fin) != 0) fail("hclose(test/hfile_chars.tmp) for reading");

fin = hopen("preload:test/hfile_chars.tmp", "r");
if (fin == NULL) fail("preloading \"test/hfile_chars.tmp\" for reading");
for (i = 0; i < 256; i++)
if ((c = hgetc(fin)) != i)
fail("preloading chars: hgetc (%d = 0x%x) returned %d = 0x%x", i, i, c, c);
if ((c = hgetc(fin)) != EOF) fail("preloading chars: hgetc (EOF) returned %d", c);
if (hclose(fin) != 0) fail("preloading hclose(test/hfile_chars.tmp) for reading");

char* test_string = strdup("Test string");
fin = hopen("mem:", "r:", test_string, 12);
if (fin == NULL) fail("hopen(\"mem:\", \"r:\", ...)");
if (hread(fin, buffer, 12) != 12)
fail("hopen('mem:', 'r') failed read");
if(strcmp(buffer, test_string) != 0)
fail("hopen('mem:', 'r') missread '%s' != '%s'", buffer, test_string);
char* internal_buf;
size_t interval_buf_len;
if((internal_buf = hfile_mem_get_buffer(fin, &interval_buf_len)) == NULL){
fail("hopen('mem:', 'r') failed to get internal buffer");
}
if (hclose(fin) != 0) fail("hclose mem for reading");

test_string = strdup("Test string");
fin = hopen("mem:", "wr:", test_string, 12);
if (fin == NULL) fail("hopen(\"mem:\", \"w:\", ...)");
if (hseek(fin, -1, SEEK_END) < 0)
fail("hopen('mem:', 'wr') failed seek");
if (hwrite(fin, " extra", 7) != 7)
fail("hopen('mem:', 'wr') failed write");
if (hseek(fin, 0, SEEK_SET) < 0)
fail("hopen('mem:', 'wr') failed seek");
if (hread(fin, buffer, 18) != 18)
fail("hopen('mem:', 'wr') failed read");
if (strcmp(buffer, "Test string extra") != 0)
fail("hopen('mem:', 'wr') misswrote '%s' != '%s'", buffer, "Test string extra");
if((internal_buf = hfile_mem_steal_buffer(fin, &interval_buf_len)) == NULL){
fail("hopen('mem:', 'wr') failed to get internal buffer");
}
free(internal_buf);
if (hclose(fin) != 0) fail("hclose mem for writing");

fin = hopen("data:,hello, world!%0A", "r");
if (fin == NULL) fail("hopen(\"data:...\")");
n = hread(fin, buffer, 300);
Expand Down