Skip to content

Commit 3e6d65c

Browse files
committed
Add in-memory I/O
1 parent daae2ea commit 3e6d65c

File tree

4 files changed

+362
-0
lines changed

4 files changed

+362
-0
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ LIBHTS_OBJS = \
148148
faidx.o \
149149
hfile.o \
150150
hfile_net.o \
151+
hfile_mem.o \
151152
hts.o \
152153
md5.o \
153154
probaln.o \
@@ -280,6 +281,7 @@ knetfile.o knetfile.pico: knetfile.c config.h $(htslib_knetfile_h)
280281
hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(hts_internal_h) $(htslib_khash_h)
281282
hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hts_internal_h) $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h)
282283
hfile_net.o hfile_net.pico: hfile_net.c config.h $(hfile_internal_h) $(htslib_knetfile_h)
284+
hfile_mem.o hfile_mem.pico: hfile_mem.c config.h $(hfile_internal_h) $(htslib_knetfile_h)
283285
hts.o hts.pico: hts.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) version.h $(hts_internal_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h)
284286
vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_khash_h) $(htslib_kseq_h)
285287
sam.o sam.pico: sam.c config.h $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h)

hfile.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,8 @@ static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *),
714714
return 0;
715715
}
716716

717+
extern int hfile_plugin_init_mem(struct hFILE_plugin *self);
718+
717719
static void load_hfile_plugins()
718720
{
719721
static const struct hFILE_scheme_handler
@@ -726,6 +728,7 @@ static void load_hfile_plugins()
726728
hfile_add_scheme_handler("data", &data);
727729
hfile_add_scheme_handler("file", &file);
728730
init_add_plugin(NULL, hfile_plugin_init_net, "knetfile");
731+
init_add_plugin(NULL, hfile_plugin_init_mem, "mem");
729732

730733
#ifdef ENABLE_PLUGINS
731734
struct hts_path_itr path;

hfile_mem.c

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
/* The MIT License
2+
3+
Copyright (c) 2016 Illumina Cambridge Ltd.
4+
5+
Author: Peter Krusche <[email protected]>
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in
15+
all copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23+
THE SOFTWARE.
24+
*/
25+
26+
#include "htslib/hfile.h"
27+
#include "htslib/hfile_mem.h"
28+
#include "hfile_internal.h"
29+
30+
#include <stdio.h>
31+
#include <string.h>
32+
#include <stdlib.h>
33+
#include <malloc.h>
34+
#include <stdint.h>
35+
#include <errno.h>
36+
37+
38+
static buffer_lookup_fn hfile_mem_lookup_buffer = NULL;
39+
void hfile_mem_set_lookup_function(buffer_lookup_fn fn)
40+
{
41+
hfile_mem_lookup_buffer = fn;
42+
}
43+
44+
45+
typedef struct
46+
{
47+
hFILE base;
48+
char *filename;
49+
char *mode;
50+
size_t buffer_size;
51+
size_t used_size;
52+
off_t offset;
53+
uint8_t *buffer;
54+
int buffer_is_mine;
55+
int write_flag;
56+
} hFILE_mem;
57+
58+
59+
/*
60+
* Implementation
61+
*/
62+
63+
64+
static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes)
65+
{
66+
hFILE_mem *fp = (hFILE_mem *) fpv;
67+
const size_t max_read = fp->used_size - fp->offset;
68+
const size_t to_read = max_read < nbytes ? max_read : nbytes;
69+
70+
if(fp->offset >= fp->buffer_size)
71+
{
72+
return 0;
73+
}
74+
memcpy(buffer, fp->buffer + fp->offset, to_read);
75+
fp->offset += to_read;
76+
return to_read;
77+
}
78+
79+
static ssize_t mem_write(hFILE *fpv, const void *buffer, size_t nbytes)
80+
{
81+
hFILE_mem *fp = (hFILE_mem *) fpv;
82+
const ssize_t available = fp->buffer_size - fp->offset;
83+
const size_t round_mask = ((ssize_t) -1) << 10;
84+
void *tmp = NULL;
85+
size_t new_buffer_size;
86+
87+
if(!fp->buffer_is_mine)
88+
{
89+
// Cannot write: we don't own the buffer and can only read
90+
errno = EROFS;
91+
return -1;
92+
}
93+
94+
if(available < nbytes)
95+
{
96+
new_buffer_size = (fp->offset + nbytes + 1023) & round_mask;
97+
tmp = realloc(fp->buffer, new_buffer_size) ;
98+
if(tmp == NULL)
99+
{
100+
return -1;
101+
}
102+
fp->buffer_size = new_buffer_size;
103+
fp->buffer = tmp;
104+
}
105+
fp->write_flag = 1;
106+
memcpy(fp->buffer + fp->offset, buffer, nbytes);
107+
fp->offset += nbytes;
108+
if(fp->offset > fp->used_size)
109+
{
110+
fp->used_size = (size_t) fp->offset;
111+
}
112+
return nbytes;
113+
}
114+
115+
static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
116+
{
117+
hFILE_mem *fp = (hFILE_mem *) fpv;
118+
if(whence == SEEK_END)
119+
{
120+
fp->offset = (off_t) fp->buffer_size + offset;
121+
return fp->offset;
122+
}
123+
else if(whence == SEEK_CUR)
124+
{
125+
fp->offset += offset;
126+
return fp->offset;
127+
}
128+
else if(whence == SEEK_SET)
129+
{
130+
fp->offset = offset;
131+
return fp->offset;
132+
}
133+
else return -1;
134+
}
135+
136+
static int mem_flush(hFILE *fpv)
137+
{
138+
return 0;
139+
}
140+
141+
static int mem_close(hFILE *fpv)
142+
{
143+
hFILE_mem *fp = (hFILE_mem *) fpv;
144+
if(fp->filename)
145+
{
146+
free(fp->filename);
147+
}
148+
if(fp->mode)
149+
{
150+
free(fp->mode);
151+
}
152+
if(fp->buffer_is_mine && fp->buffer)
153+
{
154+
free(fp->buffer);
155+
}
156+
return 0;
157+
}
158+
159+
static const struct hFILE_backend mem_backend = {
160+
mem_read, mem_write, mem_seek, mem_flush, mem_close
161+
};
162+
163+
hFILE *hopen_mem(const char *filename, const char *mode)
164+
{
165+
hFILE_mem *fp;
166+
FILE *fpr;
167+
size_t len;
168+
169+
const char *realfilename = strchr(filename, ':');
170+
if(!realfilename)
171+
{
172+
realfilename = filename;
173+
}
174+
else
175+
{
176+
++realfilename;
177+
}
178+
fp = (hFILE_mem *) hfile_init(sizeof(hFILE_mem), mode, 0);
179+
if(!fp)
180+
{
181+
return NULL;
182+
}
183+
184+
fp->base.backend = &mem_backend;
185+
fp->buffer = NULL;
186+
fp->buffer_size = 0;
187+
fp->used_size = 0;
188+
fp->write_flag = 0;
189+
fp->offset = 0;
190+
fp->mode = strdup(mode);
191+
fp->buffer_is_mine = 0;
192+
193+
if(realfilename[0] == '@')
194+
{
195+
if(hfile_mem_lookup_buffer == NULL)
196+
{
197+
free(fp);
198+
errno = EINVAL;
199+
return NULL;
200+
}
201+
++realfilename;
202+
fp->filename = NULL;
203+
if(hfile_mem_lookup_buffer(realfilename, (void**)&fp->buffer, &fp->buffer_size))
204+
{
205+
free(fp);
206+
errno = EINVAL;
207+
return NULL;
208+
}
209+
210+
fp->used_size = fp->buffer_size;
211+
}
212+
else
213+
{
214+
fp->filename = strdup(realfilename);
215+
216+
if(strchr(mode, 'r'))
217+
{
218+
fpr = fopen(realfilename, mode);
219+
if(!fpr)
220+
{
221+
// fprintf(stderr, "[E::mem_file] Cannot open %s for reading.\n", filename);
222+
// don't write an error, this happens all the time when htslib tries to open a
223+
// csi file that doesn't exist
224+
free(fp);
225+
return NULL;
226+
}
227+
fseek(fpr, 0, SEEK_END);
228+
len = ftell(fpr);
229+
fseek(fpr, 0, SEEK_SET);
230+
fp->buffer_is_mine = 1;
231+
fp->buffer = malloc(len);
232+
if(fp->buffer == NULL)
233+
{
234+
free(fp);
235+
fclose(fpr);
236+
errno = ENOMEM;
237+
return NULL;
238+
}
239+
if(fread(fp->buffer, 1, len, fpr) != len)
240+
{
241+
free(fp);
242+
fclose(fpr);
243+
errno = EIO;
244+
return NULL;
245+
}
246+
fp->buffer_size = len;
247+
fp->used_size = len;
248+
fclose(fpr);
249+
}
250+
else
251+
{
252+
fp->buffer = malloc(1024);
253+
fp->buffer_size = 1024;
254+
fp->buffer_is_mine = 1;
255+
}
256+
}
257+
return &fp->base;
258+
}
259+
260+
int hfile_mem_get_buffer(hFILE * file, void ** buffer, size_t * length)
261+
{
262+
if(file->backend != &mem_backend)
263+
{
264+
errno = EINVAL;
265+
return -1;
266+
}
267+
hFILE_mem *fp = (hFILE_mem *) file;
268+
269+
if(fp->buffer)
270+
{
271+
*buffer = fp->buffer;
272+
*length = fp->used_size;
273+
}
274+
else
275+
{
276+
return -1;
277+
}
278+
return 0;
279+
}
280+
281+
int hfile_plugin_init_mem(struct hFILE_plugin *self)
282+
{
283+
// mem files are declared remote so they work with a tabix index
284+
static const struct hFILE_scheme_handler handler =
285+
{hopen_mem, hfile_always_remote, "mem", 0};
286+
self->name = "mem";
287+
hfile_add_scheme_handler("mem", &handler);
288+
return 0;
289+
}
290+

htslib/hfile_mem.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/* The MIT License
2+
3+
Copyright (c) 2016 Illumina Cambridge Ltd.
4+
5+
Author: Peter Krusche <[email protected]>
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in
15+
all copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23+
THE SOFTWARE.
24+
*/
25+
26+
#ifndef HTSLIB_HFILE_MEM_H
27+
#define HTSLIB_HFILE_MEM_H
28+
29+
#include "hfile.h"
30+
31+
#ifdef __cplusplus
32+
extern "C" {
33+
#endif
34+
35+
/**
36+
* Buffer lookup callback. Given a file name, returns a buffer and size.
37+
*
38+
* When hopen_mem is called to read a file with a name that starts with '@',
39+
* it will use such a function to obtain a buffer pointer. This allows us to
40+
* feed arbitrary memory blocks into htslib for decompression / parsing.
41+
*
42+
* @param name the file / internal handle name.
43+
* @param buffer void pointer that will receive the buffer
44+
* @param length size_t pointer that will receive the length of the data pointed to in buffer
45+
*/
46+
typedef int (*buffer_lookup_fn)(const char * name, void** buffer, size_t * length);
47+
48+
/**
49+
* Set buffer lookup function for memory files.
50+
* @param fn function of type buffer_lookup_fn
51+
*/
52+
extern void hfile_mem_set_lookup_function(buffer_lookup_fn fn);
53+
54+
/**
55+
* Get buffer for a hfile
56+
* @param file the file to use. This should be a hFILE that was opened using hfile_mem
57+
* @param buffer void pointer that will receive the buffer
58+
* @param length size_t pointer that will receive the length of the data pointed to in buffer
59+
*
60+
* @return 0 if successful an error code otherwise
61+
*/
62+
extern int hfile_mem_get_buffer(hFILE * file, void ** buffer, size_t * length);
63+
64+
#ifdef __cplusplus
65+
};
66+
#endif
67+
#endif //HTSLIB_HFILE_MEM_H

0 commit comments

Comments
 (0)