VariantKey  5.4.1
Numerical Encoding for Human Genetic Variants
regionkey.h File Reference

RegionKey main functions. More...

#include <stdio.h>
#include "nrvk.h"

Go to the source code of this file.

Data Structures

struct  regionkey_t
 
struct  regionkey_rev_t
 

Macros

#define RK_MAX_POS   0x000000000FFFFFFF
 Maximum position value (2^28 - 1) More...
 
#define RKMASK_CHROM   0xF800000000000000
 RegionKey binary mask for CHROM [ 11111000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ]. More...
 
#define RKMASK_STARTPOS   0x07FFFFFF80000000
 RegionKey binary mask for START POS [ 00000111 11111111 11111111 11111111 10000000 00000000 00000000 00000000 ]. More...
 
#define RKMASK_ENDPOS   0x000000007FFFFFF8
 RegionKey binary mask for END POS [ 00000000 00000000 00000000 00000000 01111111 11111111 11111111 11111000 ]. More...
 
#define RKMASK_STRAND   0x0000000000000006
 RegionKey binary mask for STRAND [ 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000110 ]. More...
 
#define RKMASK_NOPOS   0xF800000000000007
 RegionKey binary mask WITHOUT POS [ 11111000 00000000 00000000 00000000 00000000 00000000 00000000 00000111 ]. More...
 
#define RKSHIFT_CHROM   59
 CHROM LSB position from the VariantKey LSB. More...
 
#define RKSHIFT_STARTPOS   31
 START POS LSB position from the VariantKey LSB. More...
 
#define RKSHIFT_ENDPOS   3
 END POS LSB position from the VariantKey LSB. More...
 
#define RKSHIFT_STRAND   1
 STRAND LSB position from the VariantKey LSB. More...
 
#define RK_CHROM   ((rk & RKMASK_CHROM) >> RKSHIFT_CHROM)
 Extract the CHROM code from RegionKey. More...
 
#define RK_STARTPOS   ((rk & RKMASK_STARTPOS) >> RKSHIFT_STARTPOS)
 Extract the START POS code from RegionKey. More...
 
#define RK_ENDPOS   ((rk & RKMASK_ENDPOS) >> RKSHIFT_ENDPOS)
 Extract the END POS code from RegionKey. More...
 
#define RK_STRAND   ((rk & RKMASK_STRAND) >> RKSHIFT_STRAND)
 Extract the STRAND from RegionKey. More...
 

Typedefs

typedef struct regionkey_t regionkey_t
 
typedef struct regionkey_rev_t regionkey_rev_t
 

Functions

static uint8_t encode_region_strand (int8_t strand)
 Encode the strand direction (-1 > 2, 0 > 0, +1 > 1). More...
 
static int8_t decode_region_strand (uint8_t strand)
 Decode the strand direction code (0 > 0, 1 > +1, 2 > -1). More...
 
static uint64_t encode_regionkey (uint8_t chrom, uint32_t startpos, uint32_t endpos, uint8_t strand)
 Returns a 64 bit regionkey. More...
 
static uint8_t extract_regionkey_chrom (uint64_t rk)
 Extract the CHROM code from RegionKey. More...
 
static uint32_t extract_regionkey_startpos (uint64_t rk)
 Extract the START POS code from RegionKey. More...
 
static uint32_t extract_regionkey_endpos (uint64_t rk)
 Extract the END POS code from RegionKey. More...
 
static uint8_t extract_regionkey_strand (uint64_t rk)
 Extract the STRAND from RegionKey. More...
 
static void decode_regionkey (uint64_t code, regionkey_t *rk)
 Decode a RegionKey code and returns the components as regionkey_t structure. More...
 
static void reverse_regionkey (uint64_t rk, regionkey_rev_t *rev)
 
static uint64_t regionkey (const char *chrom, size_t sizechrom, uint32_t startpos, uint32_t endpos, int8_t strand)
 Returns a 64 bit regionkey based on CHROM, START POS (0-based), END POS and STRAND. More...
 
static uint64_t extend_regionkey (uint64_t rk, uint32_t size)
 Extend a regionkey region by a fixed amount from the start and end position. More...
 
static size_t regionkey_hex (uint64_t rk, char *str)
 Returns RegionKey hexadecimal string (16 characters). More...
 
static uint64_t parse_regionkey_hex (const char *rs)
 Parses a RegionKey hexadecimal string and returns the code. More...
 
static uint64_t get_regionkey_chrom_startpos (uint64_t rk)
 Get the CHROM + START POS encoding from RegionKey. More...
 
static uint64_t get_regionkey_chrom_endpos (uint64_t rk)
 Get the CHROM + END POS encoding from RegionKey. More...
 
static uint8_t are_overlapping_regions (uint8_t a_chrom, uint32_t a_startpos, uint32_t a_endpos, uint8_t b_chrom, uint32_t b_startpos, uint32_t b_endpos)
 Check if two regions are overlapping. More...
 
static uint8_t are_overlapping_region_regionkey (uint8_t chrom, uint32_t startpos, uint32_t endpos, uint64_t rk)
 Check if a region and a regionkey are overlapping. More...
 
static uint8_t are_overlapping_regionkeys (uint64_t rka, uint64_t rkb)
 Check if two regionkeys are overlapping. More...
 
static uint8_t are_overlapping_variantkey_regionkey (nrvk_cols_t nvc, uint64_t vk, uint64_t rk)
 Check if variantkey and regionkey are overlapping. More...
 
static uint64_t variantkey_to_regionkey (nrvk_cols_t nvc, uint64_t vk)
 Get RegionKey from VariantKey. More...
 

Detailed Description

The functions provided here allows to generate and process a 64 bit Unsigned Integer Keys for Human Genomic Regions. The RegionKey is sortable for chromosome and start position, and it is also fully reversible.

Macro Definition Documentation

#define RK_CHROM   ((rk & RKMASK_CHROM) >> RKSHIFT_CHROM)
#define RK_ENDPOS   ((rk & RKMASK_ENDPOS) >> RKSHIFT_ENDPOS)
#define RK_MAX_POS   0x000000000FFFFFFF
#define RK_STARTPOS   ((rk & RKMASK_STARTPOS) >> RKSHIFT_STARTPOS)
#define RK_STRAND   ((rk & RKMASK_STRAND) >> RKSHIFT_STRAND)
#define RKMASK_CHROM   0xF800000000000000
#define RKMASK_ENDPOS   0x000000007FFFFFF8
#define RKMASK_NOPOS   0xF800000000000007
#define RKMASK_STARTPOS   0x07FFFFFF80000000
#define RKMASK_STRAND   0x0000000000000006
#define RKSHIFT_CHROM   59
#define RKSHIFT_ENDPOS   3
#define RKSHIFT_STARTPOS   31
#define RKSHIFT_STRAND   1

Typedef Documentation

RegionKey decoded struct

typedef struct regionkey_t regionkey_t

RegionKey struct. Contains the numerically encoded RegionKey components (CHROM, STARTPOS, ENDPOS, STRAND).

Function Documentation

static uint8_t are_overlapping_region_regionkey ( uint8_t  chrom,
uint32_t  startpos,
uint32_t  endpos,
uint64_t  rk 
)
inlinestatic
Parameters
chromRegion A chromosome code.
startposRegion A start position.
endposRegion A end position (startpos + region length).
rkRegionKey or region B.
Returns
1 if the regions overlap, 0 otherwise.
static uint8_t are_overlapping_regionkeys ( uint64_t  rka,
uint64_t  rkb 
)
inlinestatic
Parameters
rkaRegionKey A.
rkbRegionKey B.
Returns
1 if the regions overlap, 0 otherwise.
static uint8_t are_overlapping_regions ( uint8_t  a_chrom,
uint32_t  a_startpos,
uint32_t  a_endpos,
uint8_t  b_chrom,
uint32_t  b_startpos,
uint32_t  b_endpos 
)
inlinestatic
Parameters
a_chromRegion A chromosome code.
a_startposRegion A start position.
a_endposRegion A end position (startpos + region length).
b_chromRegion B chromosome code.
b_startposRegion B start position.
b_endposRegion B end position (startpos + region length).
Returns
1 if the regions overlap, 0 otherwise.
static uint8_t are_overlapping_variantkey_regionkey ( nrvk_cols_t  nvc,
uint64_t  vk,
uint64_t  rk 
)
inlinestatic
Parameters
nvcStructure containing the pointers to the NRVK memory mapped file columns.
vkVariantKey code.
rkRegionKey code.
Returns
1 if the regions overlap, 0 otherwise.
static int8_t decode_region_strand ( uint8_t  strand)
inlinestatic
Parameters
strandStrand code.
Returns
Strand direction.
static void decode_regionkey ( uint64_t  code,
regionkey_t rk 
)
inlinestatic
Parameters
codeRegionKey code.
rkDecoded regionkey structure.
static uint8_t encode_region_strand ( int8_t  strand)
inlinestatic
Parameters
strandStrand direction (-1, 0, +1).
Returns
Strand code.
static uint64_t encode_regionkey ( uint8_t  chrom,
uint32_t  startpos,
uint32_t  endpos,
uint8_t  strand 
)
inlinestatic
Parameters
chromEncoded Chromosome (see encode_chrom).
startposStart position (zero based).
endposEnd position (startpos + region_length).
strandEncoded Strand direction (-1 > 2, 0 > 0, +1 > 1)
Returns
RegionKey 64 bit code.
static uint64_t extend_regionkey ( uint64_t  rk,
uint32_t  size 
)
inlinestatic
Parameters
rkRegionKey code.
sizeAmount to extend the region.
static uint8_t extract_regionkey_chrom ( uint64_t  rk)
inlinestatic
Parameters
rkRegionKey code.
Returns
CHROM code.
static uint32_t extract_regionkey_endpos ( uint64_t  rk)
inlinestatic
Parameters
rkRegionKey code.
Returns
END POS.
static uint32_t extract_regionkey_startpos ( uint64_t  rk)
inlinestatic
Parameters
rkRegionKey code.
Returns
START POS.
static uint8_t extract_regionkey_strand ( uint64_t  rk)
inlinestatic
Parameters
rkRegionKey code.
Returns
STRAND.
static uint64_t get_regionkey_chrom_endpos ( uint64_t  rk)
inlinestatic
Parameters
rkRegionKey code.
Returns
CHROM + END POS.
static uint64_t get_regionkey_chrom_startpos ( uint64_t  rk)
inlinestatic
Parameters
rkRegionKey code.
Returns
CHROM + START POS.
static uint64_t parse_regionkey_hex ( const char *  rs)
inlinestatic
Parameters
rsRegionKey hexadecimal string (it must contain 16 hexadecimal characters).
Returns
A RegionKey code.
static uint64_t regionkey ( const char *  chrom,
size_t  sizechrom,
uint32_t  startpos,
uint32_t  endpos,
int8_t  strand 
)
inlinestatic
Parameters
chromChromosome. An identifier from the reference genome, no white-space or leading zeros permitted.
sizechromLength of the chrom string, excluding the terminating null byte.
startposStart position (zero based).
endposEnd position (startpos + region_length).
strandStrand direction (-1, 0, +1)
Returns
RegionKey 64 bit code.
static size_t regionkey_hex ( uint64_t  rk,
char *  str 
)
inlinestatic
Parameters
rkRegionKey code.
strString buffer to be returned (it must be sized 17 bytes at least).
Returns
Upon successful return, these function returns the number of characters processed (excluding the null byte used to end output to strings). If the buffer size is not sufficient, then the return value is the number of characters required for buffer string, including the terminating null byte.
static void reverse_regionkey ( uint64_t  rk,
regionkey_rev_t rev 
)
inlinestatic

Reverse a RegionKey code and returns the normalized components as regionkey_rev_t structure.

Parameters
rkRegionKey code.
revStructure containing the return values.
static uint64_t variantkey_to_regionkey ( nrvk_cols_t  nvc,
uint64_t  vk 
)
inlinestatic
Parameters
nvcStructure containing the pointers to the NRVK memory mapped file columns.
vkVariantKey code.
Returns
RegionKey.