VariantKey  5.4.1
Numerical Encoding for Human Genetic Variants
rsidvar.h
Go to the documentation of this file.
1 // VariantKey
2 //
3 // rsidvar.h
4 //
5 // @category Libraries
6 // @author Nicola Asuni <nicola.asuni@genomicsplc.com>
7 // @copyright 2017-2018 GENOMICS plc
8 // @license MIT (see LICENSE)
9 // @link https://github.com/genomicsplc/variantkey
10 //
11 // LICENSE
12 //
13 // Copyright (c) 2017-2018 GENOMICS plc
14 //
15 // Permission is hereby granted, free of charge, to any person obtaining a copy
16 // of this software and associated documentation files (the "Software"), to deal
17 // in the Software without restriction, including without limitation the rights
18 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19 // copies of the Software, and to permit persons to whom the Software is
20 // furnished to do so, subject to the following conditions:
21 //
22 // The above copyright notice and this permission notice shall be included in
23 // all copies or substantial portions of the Software.
24 //
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
31 // THE SOFTWARE.
32 
53 #ifndef VARIANTKEY_RSIDVAR_H
54 #define VARIANTKEY_RSIDVAR_H
55 
56 #include "binsearch.h"
57 #include "variantkey.h"
58 
62 typedef struct rsidvar_cols_t
63 {
64  const uint64_t *vk;
65  const uint32_t *rs;
66  uint64_t nrows;
68 
78 static inline void mmap_vkrs_file(const char *file, mmfile_t *mf, rsidvar_cols_t *cvr)
79 {
80  mmap_binfile(file, mf);
81  cvr->vk = (const uint64_t *)(mf->src + mf->index[0]);
82  cvr->rs = (const uint32_t *)(mf->src + mf->index[1]);
83  cvr->nrows = mf->nrows;
84 }
85 
95 static inline void mmap_rsvk_file(const char *file, mmfile_t *mf, rsidvar_cols_t *crv)
96 {
97  mmap_binfile(file, mf);
98  crv->rs = (const uint32_t *)(mf->src + mf->index[0]);
99  crv->vk = (const uint64_t *)(mf->src + mf->index[1]);
100  crv->nrows = mf->nrows;
101 }
102 
114 static inline uint64_t find_rv_variantkey_by_rsid(rsidvar_cols_t crv, uint64_t *first, uint64_t last, uint32_t rsid)
115 {
116  uint64_t max = last;
117  uint64_t found = col_find_first_uint32_t(crv.rs, first, &max, rsid);
118  if (found >= last)
119  {
120  return 0;
121  }
122  *first = found;
123  return *(crv.vk + found);
124 }
125 
138 static inline uint64_t get_next_rv_variantkey_by_rsid(rsidvar_cols_t crv, uint64_t *pos, uint64_t last, uint32_t rsid)
139 {
140  if (col_has_next_uint32_t(crv.rs, pos, last, rsid))
141  {
142  return *(crv.vk + *pos);
143  }
144  return 0;
145 }
146 
158 static inline uint32_t find_vr_rsid_by_variantkey(rsidvar_cols_t cvr, uint64_t *first, uint64_t last, uint64_t vk)
159 {
160  uint64_t max = last;
161  uint64_t found = col_find_first_uint64_t(cvr.vk, first, &max, vk);
162  if (found >= last)
163  {
164  return 0; // not found
165  }
166  *first = found;
167  return *(cvr.rs + found);
168 }
169 
182 static inline uint32_t get_next_vr_rsid_by_variantkey(rsidvar_cols_t cvr, uint64_t *pos, uint64_t last, uint64_t vk)
183 {
184  if (col_has_next_uint64_t(cvr.vk, pos, last, vk))
185  {
186  return *(cvr.rs + *pos);
187  }
188  return 0;
189 }
190 
203 static inline uint32_t find_vr_chrompos_range(rsidvar_cols_t cvr, uint64_t *first, uint64_t *last, uint8_t chrom, uint32_t pos_min, uint32_t pos_max)
204 {
205  uint64_t ckey = ((uint64_t)chrom << 59);
206  uint64_t min = *first;
207  uint64_t max = *last;
208  *first = col_find_first_sub_uint64_t(cvr.vk, 0, 32, &min, &max, (ckey | ((uint64_t)pos_min << 31)) >> 31);
209  if (*first >= *last)
210  {
211  return 0;
212  }
213  min = *first;
214  max = *last;
215  uint64_t end = col_find_last_sub_uint64_t(cvr.vk, 0, 32, &min, &max, (ckey | ((uint64_t)pos_max << 31)) >> 31);
216  if (end < *last)
217  {
218  ++end;
219  }
220  *last = end;
221  return *(cvr.rs + *first);
222 }
223 
224 #endif // VARIANTKEY_RSIDVAR_H
static uint64_t col_find_first_uint32_t(const uint32_t *src, uint64_t *first, uint64_t *last, uint32_t search)
Definition: binsearch.h:707
static uint64_t get_next_rv_variantkey_by_rsid(rsidvar_cols_t crv, uint64_t *pos, uint64_t last, uint32_t rsid)
Definition: rsidvar.h:138
static void mmap_vkrs_file(const char *file, mmfile_t *mf, rsidvar_cols_t *cvr)
Definition: rsidvar.h:78
uint64_t nrows
Number of rows.
Definition: binsearch.h:236
static uint32_t find_vr_chrompos_range(rsidvar_cols_t cvr, uint64_t *first, uint64_t *last, uint8_t chrom, uint32_t pos_min, uint32_t pos_max)
Definition: rsidvar.h:203
Definition: binsearch.h:229
uint8_t * src
Pointer to the memory map.
Definition: binsearch.h:231
static uint64_t col_find_first_uint64_t(const uint64_t *src, uint64_t *first, uint64_t *last, uint64_t search)
Definition: binsearch.h:708
static uint64_t col_find_first_sub_uint64_t(const uint64_t *src, uint8_t bitstart, uint8_t bitend, uint64_t *first, uint64_t *last, uint64_t search)
Definition: binsearch.h:741
static uint64_t col_find_last_sub_uint64_t(const uint64_t *src, uint8_t bitstart, uint8_t bitend, uint64_t *first, uint64_t *last, uint64_t search)
Definition: binsearch.h:804
static void mmap_binfile(const char *file, mmfile_t *mf)
Definition: binsearch.h:995
static bool col_has_next_uint64_t(const uint64_t *src, uint64_t *pos, uint64_t last, uint64_t search)
Definition: binsearch.h:832
static uint32_t get_next_vr_rsid_by_variantkey(rsidvar_cols_t cvr, uint64_t *pos, uint64_t last, uint64_t vk)
Definition: rsidvar.h:182
const uint32_t * rs
Pointer to the rsID column.
Definition: rsidvar.h:65
VariantKey main functions.
const uint64_t * vk
Pointer to the VariantKey column.
Definition: rsidvar.h:64
Functions to search values in binary files made of constant-length items.
uint64_t nrows
Number of rows.
Definition: rsidvar.h:66
static uint32_t find_vr_rsid_by_variantkey(rsidvar_cols_t cvr, uint64_t *first, uint64_t last, uint64_t vk)
Definition: rsidvar.h:158
static bool col_has_next_uint32_t(const uint32_t *src, uint64_t *pos, uint64_t last, uint32_t search)
Definition: binsearch.h:831
uint64_t index[256]
Index of the offsets to the beginning of each column.
Definition: binsearch.h:239
static uint64_t find_rv_variantkey_by_rsid(rsidvar_cols_t crv, uint64_t *first, uint64_t last, uint32_t rsid)
Definition: rsidvar.h:114
static void mmap_rsvk_file(const char *file, mmfile_t *mf, rsidvar_cols_t *crv)
Definition: rsidvar.h:95
Definition: rsidvar.h:62
struct rsidvar_cols_t rsidvar_cols_t