VariantKey  5.4.1
Numerical Encoding for Human Genetic Variants
set.h
Go to the documentation of this file.
1 // VariantKey
2 //
3 // set.h
4 //
5 // @category Libraries
6 // @author Nicola Asuni <nicola.asuni@genomicsplc.com>
7 // @copyright 2017-2018 GENOMICS plc
8 // @license MIT (see LICENSE)
9 // @link https://github.com/genomicsplc/variantkey
10 //
11 // LICENSE
12 //
13 // Copyright (c) 2017-2018 GENOMICS plc
14 //
15 // Permission is hereby granted, free of charge, to any person obtaining a copy
16 // of this software and associated documentation files (the "Software"), to deal
17 // in the Software without restriction, including without limitation the rights
18 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19 // copies of the Software, and to permit persons to whom the Software is
20 // furnished to do so, subject to the following conditions:
21 //
22 // The above copyright notice and this permission notice shall be included in
23 // all copies or substantial portions of the Software.
24 //
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
31 // THE SOFTWARE.
32 
40 #ifndef VARIANTKEY_SET_H
41 #define VARIANTKEY_SET_H
42 
43 #include <inttypes.h>
44 #include <stdlib.h>
45 
46 #define RADIX_SORT_COUNT_BLOCK \
47  uint32_t c7[256]= {0}, c6[256]= {0}, c5[256]= {0}, c4[256]= {0}, c3[256]= {0}, c2[256]= {0}, c1[256]= {0}, c0[256]= {0}; \
48  uint32_t o7=0, o6=0, o5=0, o4=0, o3=0, o2=0, o1=0, o0=0; \
49  uint32_t t7, t6, t5, t4, t3, t2, t1, t0; \
50  uint32_t i; \
51  uint64_t v; \
52  for(i = 0; i < nitems; i++) \
53  { \
54  v = arr[i]; \
55  c7[(v & 0xff)]++; \
56  c6[((v >> 8) & 0xff)]++; \
57  c5[((v >> 16) & 0xff)]++; \
58  c4[((v >> 24) & 0xff)]++; \
59  c3[((v >> 32) & 0xff)]++; \
60  c2[((v >> 40) & 0xff)]++; \
61  c1[((v >> 48) & 0xff)]++; \
62  c0[((v >> 56) & 0xff)]++; \
63  } \
64  for(i = 0; i < 256; i++) \
65  { \
66  t7 = (o7 + c7[i]); \
67  c7[i] = o7; \
68  o7 = t7; \
69  t6 = (o6 + c6[i]); \
70  c6[i] = o6; \
71  o6 = t6; \
72  t5 = (o5 + c5[i]); \
73  c5[i] = o5; \
74  o5 = t5; \
75  t4 = (o4 + c4[i]); \
76  c4[i] = o4; \
77  o4 = t4; \
78  t3 = (o3 + c3[i]); \
79  c3[i] = o3; \
80  o3 = t3; \
81  t2 = (o2 + c2[i]); \
82  c2[i] = o2; \
83  o2 = t2; \
84  t1 = (o1 + c1[i]); \
85  c1[i] = o1; \
86  o1 = t1; \
87  t0 = (o0 + c0[i]); \
88  c0[i] = o0; \
89  o0 = t0; \
90  }
91 
92 #define RADIX_SORT_ITERATION_BLOCK(A, B, BYTE, SHIFT) \
93  for (i = 0; i < nitems; i++) \
94  { \
95  v = (A)[i]; \
96  (B)[c##BYTE[((v >> (SHIFT)) & 0xff)]++] = v; \
97  }
98 
106 static inline void sort_uint64_t(uint64_t *arr, uint64_t *tmp, uint32_t nitems)
107 {
109  RADIX_SORT_ITERATION_BLOCK(arr, tmp, 7, 0)
110  RADIX_SORT_ITERATION_BLOCK(tmp, arr, 6, 8)
111  RADIX_SORT_ITERATION_BLOCK(arr, tmp, 5, 16)
112  RADIX_SORT_ITERATION_BLOCK(tmp, arr, 4, 24)
113  RADIX_SORT_ITERATION_BLOCK(arr, tmp, 3, 32)
114  RADIX_SORT_ITERATION_BLOCK(tmp, arr, 2, 40)
115  RADIX_SORT_ITERATION_BLOCK(arr, tmp, 1, 48)
116  RADIX_SORT_ITERATION_BLOCK(tmp, arr, 0, 56)
117 }
118 
128 static inline void order_uint64_t(uint64_t *arr, uint64_t *tmp, uint32_t *idx, uint32_t *tdx, uint32_t nitems)
129 {
130  uint32_t j;
132  for (i = 0; i < nitems; i++)
133  {
134  v = arr[i];
135  t7 = (v & 0xff);
136  j = c7[t7];
137  tmp[j] = v;
138  c7[t7]++;
139  tdx[j] = i;
140  }
141  for (i = 0; i < nitems; i++)
142  {
143  v = tmp[i];
144  t6 = ((v >> 8) & 0xff);
145  j = c6[t6];
146  arr[j] = v;
147  c6[t6]++;
148  idx[j] = tdx[i];
149  }
150  for (i = 0; i < nitems; i++)
151  {
152  v = arr[i];
153  t5 = ((v >> 16) & 0xff);
154  j = c5[t5];
155  tmp[j] = v;
156  c5[t5]++;
157  tdx[j] = idx[i];
158  }
159  for (i = 0; i < nitems; i++)
160  {
161  v = tmp[i];
162  t4 = ((v >> 24) & 0xff);
163  j = c4[t4];
164  arr[j] = v;
165  c4[t4]++;
166  idx[j] = tdx[i];
167  }
168  for (i = 0; i < nitems; i++)
169  {
170  v = arr[i];
171  t3 = ((v >> 32) & 0xff);
172  j = c3[t3];
173  tmp[j] = v;
174  c3[t3]++;
175  tdx[j] = idx[i];
176  }
177  for (i = 0; i < nitems; i++)
178  {
179  v = tmp[i];
180  t2 = ((v >> 40) & 0xff);
181  j = c2[t2];
182  arr[j] = v;
183  c2[t2]++;
184  idx[j] = tdx[i];
185  }
186  for (i = 0; i < nitems; i++)
187  {
188  v = arr[i];
189  t1 = ((v >> 48) & 0xff);
190  j = c1[t1];
191  tmp[j] = v;
192  c1[t1]++;
193  tdx[j] = idx[i];
194  }
195  for (i = 0; i < nitems; i++)
196  {
197  v = tmp[i];
198  t0 = ((v >> 56) & 0xff);
199  j = c0[t0];
200  arr[j] = v;
201  c0[t0]++;
202  idx[tdx[i]] = j;
203  }
204 }
205 
212 static inline void reverse_uint64_t(uint64_t *arr, uint64_t nitems)
213 {
214  uint64_t *last = (arr + nitems);
215  uint64_t tmp;
216  while ((arr != last) && (arr != --last))
217  {
218  tmp = *last;
219  *last = *arr;
220  *arr++ = tmp;
221  }
222 }
223 
232 static inline uint64_t *unique_uint64_t(uint64_t *arr, uint64_t nitems)
233 {
234  if (nitems == 0)
235  {
236  return arr;
237  }
238  uint64_t *last = (arr + nitems);
239  uint64_t *p = arr;
240  while (++arr != last)
241  {
242  if ((*p != *arr) && (*p++ != *arr))
243  {
244  *p = *arr;
245  }
246  }
247  return ++p;
248 }
249 
261 static inline uint64_t *intersection_uint64_t(uint64_t *a_arr, uint64_t a_nitems, uint64_t *b_arr, uint64_t b_nitems, uint64_t *o_arr)
262 {
263  uint64_t *a_last = (a_arr + a_nitems);
264  uint64_t *b_last = (b_arr + b_nitems);
265  while ((a_arr != a_last) && (b_arr != b_last))
266  {
267  if (*a_arr < *b_arr)
268  {
269  ++a_arr;
270  continue;
271  }
272  if (*a_arr == *b_arr)
273  {
274  *o_arr++ = *a_arr++;
275  }
276  ++b_arr;
277  }
278  return o_arr;
279 }
280 
292 static inline uint64_t *union_uint64_t(uint64_t *a_arr, uint64_t a_nitems, uint64_t *b_arr, uint64_t b_nitems, uint64_t *o_arr)
293 {
294  uint64_t *a_last = (a_arr + a_nitems);
295  uint64_t *b_last = (b_arr + b_nitems);
296  while ((a_arr != a_last) && (b_arr != b_last))
297  {
298  if (*a_arr < *b_arr)
299  {
300  *o_arr++ = *a_arr++;
301  continue;
302  }
303  if (*a_arr > *b_arr)
304  {
305  *o_arr++ = *b_arr++;
306  continue;
307  }
308  *o_arr++ = *a_arr++;
309  b_arr++;
310  }
311  while (a_arr != a_last)
312  {
313  *o_arr++ = *a_arr++;
314  }
315  while (b_arr != b_last)
316  {
317  *o_arr++ = *b_arr++;
318  }
319  return o_arr;
320 }
321 
322 #endif // VARIANTKEY_SET_H
#define RADIX_SORT_COUNT_BLOCK
Definition: set.h:46
static void order_uint64_t(uint64_t *arr, uint64_t *tmp, uint32_t *idx, uint32_t *tdx, uint32_t nitems)
Definition: set.h:128
static void sort_uint64_t(uint64_t *arr, uint64_t *tmp, uint32_t nitems)
Definition: set.h:106
#define RADIX_SORT_ITERATION_BLOCK(A, B, BYTE, SHIFT)
Definition: set.h:92
static uint64_t * union_uint64_t(uint64_t *a_arr, uint64_t a_nitems, uint64_t *b_arr, uint64_t b_nitems, uint64_t *o_arr)
Definition: set.h:292
static uint64_t * intersection_uint64_t(uint64_t *a_arr, uint64_t a_nitems, uint64_t *b_arr, uint64_t b_nitems, uint64_t *o_arr)
Definition: set.h:261
static void reverse_uint64_t(uint64_t *arr, uint64_t nitems)
Definition: set.h:212
static uint64_t * unique_uint64_t(uint64_t *arr, uint64_t nitems)
Definition: set.h:232