libBigWig
bigWig.h
Go to the documentation of this file.
1 #include "bigWigIO.h"
2 #include "bwValues.h"
3 #include <inttypes.h>
4 #include <zlib.h>
5 
6 #ifdef __cplusplus
7 extern "C" {
8 #endif
9 
56 #define LIBBIGWIG_VERSION 0.4.4
57 
61 #ifdef NOCURL
62 #define LIBBIGWIG_CURL 0
63 typedef int CURLcode;
64 typedef void CURL;
65 #else
66 #define LIBBIGWIG_CURL 1
67 #endif
68 
72 #define BIGWIG_MAGIC 0x888FFC26
73 
76 #define BIGBED_MAGIC 0x8789F2EB
77 
80 #define CIRTREE_MAGIC 0x78ca8c91
81 
84 #define IDX_MAGIC 0x2468ace0
85 
88 #define DEFAULT_nCHILDREN 64
89 
92 #define DEFAULT_BLOCKSIZE 32768
93 
98  doesNotExist = -1,
99  mean = 0,
100  average = 0,
101  stdev = 1,
102  dev = 1,
103  max = 2,
104  min = 3,
105  cov = 4,
106  coverage = 4,
107  sum = 5
108 };
109 
110 //Should hide this from end users
116 typedef struct {
117  uint32_t *level;
118  //There's 4 bytes of padding between these
119  uint64_t *dataOffset;
120  uint64_t *indexOffset;
122 } bwZoomHdr_t;
123 
129 typedef struct {
130  uint16_t version;
131  uint16_t nLevels;
132  uint64_t ctOffset;
133  uint64_t dataOffset;
134  uint64_t indexOffset;
135  uint16_t fieldCount;
136  uint16_t definedFieldCount;
137  uint64_t sqlOffset;
138  uint64_t summaryOffset;
139  uint32_t bufSize;
140  uint64_t extensionOffset;
142  //total Summary
143  uint64_t nBasesCovered;
144  double minVal;
145  double maxVal;
146  double sumData;
147  double sumSquared;
148 } bigWigHdr_t;
149 
150 //Should probably replace this with a hash
154 typedef struct {
155  int64_t nKeys;
156  char **chrom;
157  uint32_t *len;
158 } chromList_t;
159 
160 //TODO remove from bigWig.h
162 typedef struct bwLL bwLL;
163 struct bwLL {
164  bwRTreeNode_t *node;
165  struct bwLL *next;
166 };
167 typedef struct bwZoomBuffer_t bwZoomBuffer_t;
168 struct bwZoomBuffer_t { //each individual entry takes 32 bytes
169  void *p;
170  uint32_t l, m;
171  struct bwZoomBuffer_t *next;
172 };
174 
179 typedef struct {
180  uint64_t nBlocks;
181  uint32_t blockSize;
182  uint64_t nEntries;
183  uint64_t runningWidthSum;
184  uint32_t tid;
185  uint32_t start;
186  uint32_t end;
187  uint32_t span;
188  uint32_t step;
189  uint8_t ltype;
190  uint32_t l;
191  void *p;
194  bwZoomBuffer_t **firstZoomBuffer;
195  bwZoomBuffer_t **lastZoomBuffer;
196  uint64_t *nNodes;
197  uLongf compressPsz;
198  void *compressP;
200 
204 typedef struct {
210  int isWrite;
211  int type;
212 } bigWigFile_t;
213 
217 typedef struct {
218  uint32_t l;
219  uint32_t m;
220  uint32_t *start;
221  uint32_t *end;
222  float *value;
224 
228 typedef struct {
229  uint32_t l;
230  uint32_t m;
231  uint32_t *start;
232  uint32_t *end;
233  char **str;
235 
240 typedef struct {
242  uint32_t tid;
243  uint32_t start;
244  uint32_t end;
245  uint64_t offset;
248  void *blocks;
251  void *data;
253 
261 int bwInit(size_t bufSize);
262 
267 void bwCleanup(void);
268 
276 int bwIsBigWig(char *fname, CURLcode (*callBack)(CURL*));
277 
285 int bbIsBigBed(char *fname, CURLcode (*callBack)(CURL*));
286 
295 bigWigFile_t *bwOpen(char *fname, CURLcode (*callBack)(CURL*), const char* mode);
296 
304 bigWigFile_t *bbOpen(char *fname, CURLcode (*callBack)(CURL*));
305 
314 char *bbGetSQL(bigWigFile_t *fp);
315 
321 void bwClose(bigWigFile_t *fp);
322 
323 /*******************************************************************************
324 *
325 * The following are in bwStats.c
326 *
327 *******************************************************************************/
328 
336 uint32_t bwGetTid(bigWigFile_t *fp, char *chrom);
337 
344 
351 
364 bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end);
365 
378 bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString);
379 
394 bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration);
395 
412 bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration);
413 
422 
428 
442 bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA);
443 
456 double *bwStats(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
457 
470 double *bwStatsFromFull(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
471 
472 //Writer functions
473 
481 int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms);
482 
491 chromList_t *bwCreateChromList(char **chroms, uint32_t *lengths, int64_t n);
492 
500 int bwWriteHdr(bigWigFile_t *bw);
501 
518 int bwAddIntervals(bigWigFile_t *fp, char **chrom, uint32_t *start, uint32_t *end, float *values, uint32_t n);
519 
532 int bwAppendIntervals(bigWigFile_t *fp, uint32_t *start, uint32_t *end, float *values, uint32_t n);
533 
550 int bwAddIntervalSpans(bigWigFile_t *fp, char *chrom, uint32_t *start, uint32_t span, float *values, uint32_t n);
551 
563 int bwAppendIntervalSpans(bigWigFile_t *fp, uint32_t *start, float *values, uint32_t n);
564 
582 int bwAddIntervalSpanSteps(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t span, uint32_t step, float *values, uint32_t n);
583 
594 int bwAppendIntervalSpanSteps(bigWigFile_t *fp, float *values, uint32_t n);
595 
596 #ifdef __cplusplus
597 }
598 #endif
uint64_t summaryOffset
Definition: bigWig.h:138
void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o)
Frees space allocated by bbGetOverlappingEntries
Definition: bwValues.c:332
Holds interval:str associations.
Definition: bigWig.h:228
This structure holds the file pointers and buffers needed for raw access to local and remote files...
Definition: bigWigIO.h:32
uint64_t nBlocks
Definition: bigWig.h:180
bwZoomHdr_t * zoomHdrs
Definition: bigWig.h:141
double maxVal
Definition: bigWig.h:145
uint16_t fieldCount
Definition: bigWig.h:135
int bwAppendIntervalSpans(bigWigFile_t *fp, uint32_t *start, float *values, uint32_t n)
Append to a previous block of variable-step entries. If you previously used bwAddIntervalSpans(), this will continue appending more values to the block(s) it created.
Definition: bwWrite.c:495
int bwAddIntervals(bigWigFile_t *fp, char **chrom, uint32_t *start, uint32_t *end, float *values, uint32_t n)
Write a new block of bedGraph-like intervals to a bigWig file Adds entries of the form: chromosome st...
Definition: bwWrite.c:374
Definition: bigWig.h:104
bbOverlappingEntries_t * bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString)
Return bigBed entries overlapping an interval. Find all bigBed entries overlapping a range and return...
Definition: bwValues.c:574
int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms)
Create a largely empty bigWig header Every bigWig file has a header, this creates the template for on...
Definition: bwWrite.c:56
uint32_t span
Definition: bigWig.h:187
bwWriteBuffer_t * writeBuffer
Definition: bigWig.h:209
uint32_t m
Definition: bigWig.h:219
uint32_t * start
Definition: bigWig.h:220
Definition: bigWig.h:98
bwOverlappingIntervals_t * intervals
Definition: bigWig.h:249
uint64_t offset
Definition: bigWig.h:245
char * bbGetSQL(bigWigFile_t *fp)
Returns a string containing the SQL entry (or NULL). The "auto SQL" field contains the names and valu...
Definition: bwRead.c:315
Definition: bigWig.h:99
uint32_t start
Definition: bigWig.h:185
bbOverlappingEntries_t * entries
Definition: bigWig.h:250
bigWigFile_t * bw
Definition: bigWig.h:241
uint32_t l
Definition: bigWig.h:218
uint32_t l
Definition: bigWig.h:190
uint32_t * len
Definition: bigWig.h:157
uint32_t end
Definition: bigWig.h:186
uint32_t blocksPerIteration
Definition: bigWig.h:246
bigWigFile_t * bwOpen(char *fname, CURLcode(*callBack)(CURL *), const char *mode)
Opens a local or remote bigWig file. This will open a local or remote bigWig file. Writing of local bigWig files is also supported.
Definition: bwRead.c:345
bwZoomBuffer_t ** lastZoomBuffer
Definition: bigWig.h:195
double sumData
Definition: bigWig.h:146
bwLL * currentIndexNode
Definition: bigWig.h:193
uint16_t version
Definition: bigWig.h:130
int type
Definition: bigWig.h:211
Definition: bigWig.h:106
uint8_t ltype
Definition: bigWig.h:189
Definition: bigWig.h:102
uint32_t bwGetTid(bigWigFile_t *fp, char *chrom)
Converts between chromosome name and ID.
Definition: bwValues.c:283
uint32_t l
Definition: bigWig.h:229
A structure to hold iterations One of intervals and entries should be used to access records from big...
Definition: bigWig.h:240
bwZoomBuffer_t ** firstZoomBuffer
Definition: bigWig.h:194
int bwWriteHdr(bigWigFile_t *bw)
Write a the header to a bigWig file. You must have already opened the output file, created a header and a chromosome list.
Definition: bwWrite.c:187
uint64_t nEntries
Definition: bigWig.h:182
uint32_t tid
Definition: bigWig.h:242
bwOverlapIterator_t * bbOverlappingEntriesIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration)
Creates an iterator over entries in a bigBed file Iterators can be traversed with bwIteratorNext() an...
Definition: bwValues.c:614
The header section of a bigWig file.
Definition: bigWig.h:129
uint64_t sqlOffset
Definition: bigWig.h:137
bwOverlappingIntervals_t * bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA)
Return all per-base bigWig values in a given interval. Given an interval (e.g., chr1:0-100), return the value at each position in a bigWig file. Positions without associated values are suppressed by default, but may be returned if includeNA is not 0.
Definition: bwValues.c:712
Definition: bigWig.h:100
uint32_t blockSize
Definition: bigWig.h:181
int bwAppendIntervalSpanSteps(bigWigFile_t *fp, float *values, uint32_t n)
Append to a previous block of fixed-step entries. If you previously used bwAddIntervalSpanSteps(), this will continue appending more values to the block(s) it created.
Definition: bwWrite.c:552
int bwAddIntervalSpanSteps(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t span, uint32_t step, float *values, uint32_t n)
Add a new block of fixed-step entries to a bigWig file Adds entries for the form value to the file...
Definition: bwWrite.c:520
uint32_t start
Definition: bigWig.h:243
uint32_t bufSize
Definition: bigWig.h:139
uint64_t ctOffset
Definition: bigWig.h:132
This is only needed for writing bigWig files (and won&#39;t be created otherwise) This should be removed ...
Definition: bigWig.h:179
uint32_t tid
Definition: bigWig.h:184
bwRTree_t ** idx
Definition: bigWig.h:121
Definition: bigWig.h:103
uint64_t nBasesCovered
Definition: bigWig.h:143
int bwIsBigWig(char *fname, CURLcode(*callBack)(CURL *))
Determine if a file is a bigWig file. This function will quickly check either local or remote files t...
Definition: bwRead.c:302
char ** str
Definition: bigWig.h:233
BigWig files have multiple "zoom" levels, each of which has its own header. This hold those headers...
Definition: bigWig.h:116
uint32_t * start
Definition: bigWig.h:231
uint64_t dataOffset
Definition: bigWig.h:133
Definition: bigWig.h:107
void * blocks
Definition: bigWig.h:248
bwOverlappingIntervals_t * bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end)
Return bigWig entries overlapping an interval. Find all bigWig entries overlapping a range and return...
Definition: bwValues.c:562
void * p
Definition: bigWig.h:191
URL_t * URL
Definition: bigWig.h:205
double sumSquared
Definition: bigWig.h:147
chromList_t * bwCreateChromList(char **chroms, uint32_t *lengths, int64_t n)
Take a list of chromosome names and lengths and return a pointer to a chromList_t This MUST be run be...
Definition: bwWrite.c:22
int bwAddIntervalSpans(bigWigFile_t *fp, char *chrom, uint32_t *start, uint32_t span, float *values, uint32_t n)
Add a new block of variable-step entries to a bigWig file Adds entries for the form chromosome start ...
Definition: bwWrite.c:462
uint32_t step
Definition: bigWig.h:188
uint16_t nLevels
Definition: bigWig.h:131
bwLL * firstIndexNode
Definition: bigWig.h:192
int bwInit(size_t bufSize)
Initializes curl and global variables. This MUST be called before other functions (at least if you wa...
Definition: bwRead.c:38
bigWigHdr_t * hdr
Definition: bigWig.h:206
void bwIteratorDestroy(bwOverlapIterator_t *iter)
Destroys a bwOverlapIterator_t.
Definition: bwValues.c:642
void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o)
Frees space allocated by bwGetOverlappingIntervals
Definition: bwValues.c:324
void * data
Definition: bigWig.h:251
int isWrite
Definition: bigWig.h:210
bwOverlapIterator_t * bwIteratorNext(bwOverlapIterator_t *iter)
Traverses to the entries/intervals in the next group of blocks.
Definition: bwValues.c:651
float * value
Definition: bigWig.h:222
char ** chrom
Definition: bigWig.h:156
Holds interval:value associations.
Definition: bigWig.h:217
uint64_t runningWidthSum
Definition: bigWig.h:183
double * bwStatsFromFull(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type)
Determines per-interval bigWig statistics Can determine mean/min/max/coverage/standard deviation of v...
Definition: bwStats.c:485
uint16_t definedFieldCount
Definition: bigWig.h:136
A structure that holds everything needed to access a bigWig file.
Definition: bigWig.h:204
int withString
Definition: bigWig.h:247
double * bwStats(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type)
Determines per-interval bigWig statistics Can determine mean/min/max/coverage/standard deviation of v...
Definition: bwStats.c:530
bwStatsType
Definition: bigWig.h:97
bwRTree_t * idx
Definition: bigWig.h:208
int bbIsBigBed(char *fname, CURLcode(*callBack)(CURL *))
Determine is a file is a bigBed file. This function will quickly check either local or remote files t...
Definition: bwRead.c:332
uint64_t * nNodes
Definition: bigWig.h:196
uint32_t m
Definition: bigWig.h:230
double minVal
Definition: bigWig.h:144
void bwCleanup(void)
The counterpart to bwInit, this cleans up curl.
Definition: bwRead.c:52
uint32_t * end
Definition: bigWig.h:221
Holds the chromosomes and their lengths.
Definition: bigWig.h:154
uint32_t end
Definition: bigWig.h:244
Definition: bwValues.h:36
uLongf compressPsz
Definition: bigWig.h:197
uint64_t * indexOffset
Definition: bigWig.h:120
uint64_t indexOffset
Definition: bigWig.h:134
int bwAppendIntervals(bigWigFile_t *fp, uint32_t *start, uint32_t *end, float *values, uint32_t n)
Append bedGraph-like intervals to a previous block of bedGraph-like intervals in a bigWig file...
Definition: bwWrite.c:434
uint64_t extensionOffset
Definition: bigWig.h:140
Definition: bigWig.h:101
uint64_t * dataOffset
Definition: bigWig.h:119
chromList_t * cl
Definition: bigWig.h:207
uint32_t * level
Definition: bigWig.h:117
A node within an R-tree holding the index for data.
Definition: bwValues.h:17
uint32_t * end
Definition: bigWig.h:232
void bwClose(bigWigFile_t *fp)
Closes a bigWigFile_t and frees up allocated memory This closes both bigWig and bigBed files...
Definition: bwRead.c:289
bwOverlapIterator_t * bwOverlappingIntervalsIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration)
Creates an iterator over intervals in a bigWig file Iterators can be traversed with bwIteratorNext() ...
Definition: bwValues.c:586
int64_t nKeys
Definition: bigWig.h:155
void * compressP
Definition: bigWig.h:198
Definition: bigWig.h:105
bigWigFile_t * bbOpen(char *fname, CURLcode(*callBack)(CURL *))
Opens a local or remote bigBed file. This will open a local or remote bigBed file. Note that this file format can only be read and NOT written!
Definition: bwRead.c:397