...

Package variantkey

import "github.com/genomicsplc/variantkey/go/src"
Overview
Index

Overview ▾

Package variantkey is a Go wrapper for the variantkey C software library. 64 bit Encoding for Human Genetic Variants.

@category   Libraries
@author     Nicola Asuni <nicola.asuni@genomicsplc.com>
@copyright  2017-2018 GENOMICS plc
@license    MIT (see LICENSE)
@link       https://github.com/genomicsplc/variantkey

LICENSE

Copyright (c) 2017-2018 GENOMICS plc

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Index ▾

func AreOverlappingRegionKeys(rka, rkb uint64) bool
func AreOverlappingRegionRegionKey(chrom uint8, startpos, endpos uint32, rk uint64) bool
func AreOverlappingRegions(chromA uint8, startposA, endposA uint32, chromB uint8, startposB, endposB uint32) bool
func CompareVariantKeyChrom(va, vb uint64) int
func CompareVariantKeyChromPos(va, vb uint64) int
func DecodeChrom(c uint8) string
func DecodeRefAlt(c uint32) (string, string, uint8, uint8, uint8)
func DecodeRegionStrand(strand uint8) int8
func DecodeStringID(esid uint64) string
func EncodeChrom(chrom string) uint8
func EncodeRefAlt(ref string, alt string) uint32
func EncodeRegionKey(chrom uint8, startpos, endpos uint32, strand uint8) uint64
func EncodeRegionStrand(strand int8) uint8
func EncodeStringID(s string, start uint32) uint64
func EncodeStringNumID(s string, sep byte) uint64
func EncodeVariantKey(chrom uint8, pos, refalt uint32) uint64
func ExtendRegionKey(rk uint64, size uint32) uint64
func ExtractRegionKeyChrom(rk uint64) uint8
func ExtractRegionKeyEndPos(rk uint64) uint32
func ExtractRegionKeyStartPos(rk uint64) uint32
func ExtractRegionKeyStrand(rk uint64) uint8
func ExtractVariantKeyChrom(v uint64) uint8
func ExtractVariantKeyPos(v uint64) uint32
func ExtractVariantKeyRefAlt(v uint64) uint32
func FlipAllele(allele string) string
func GetRegionKeyChromEndPos(rk uint64) uint64
func GetRegionKeyChromStartPos(rk uint64) uint64
func GetVariantKeyChromStartPos(vk uint64) uint64
func HashStringID(s string) uint64
func Hex(v uint64) string
func MmapNRVKFile(file string) (TMMFile, NRVKCols, error)
func MmapRSVKFile(file string, ctbytes []uint8) (TMMFile, RSIDVARCols, error)
func MmapVKRSFile(file string, ctbytes []uint8) (TMMFile, RSIDVARCols, error)
func ParseHex(s string) uint64
func RegionKey(chrom string, startpos, endpos uint32, strand int8) uint64
func ReverseVariantKey(v uint64) (chrom string, pos uint32, ref string, alt string, sizeref uint8, sizealt uint8)
func StringToNTBytes(s string) []byte
func StringToNTBytesN(s string, size uint32) []byte
func VariantKey(chrom string, pos uint32, ref, alt string) uint64
type NRVKCols
    func (nr NRVKCols) AreOverlappingVariantKeyRegionKey(vk, rk uint64) bool
    func (nr NRVKCols) FindRefAltByVariantKey(vk uint64) (string, string, uint8, uint8, uint32)
    func (nr NRVKCols) GetVariantKeyChromEndPos(vk uint64) uint64
    func (nr NRVKCols) GetVariantKeyEndPos(vk uint64) uint32
    func (nr NRVKCols) GetVariantKeyRefLength(vk uint64) uint8
    func (nr NRVKCols) ReverseVariantKey(vk uint64) (TVariantKeyRev, uint32)
    func (nr NRVKCols) VariantToRegionkey(vk uint64) uint64
    func (nr NRVKCols) VknrBinToTSV(tsvfile string) uint64
type RSIDVARCols
    func (crv RSIDVARCols) FindAllRVVariantKeyByRsid(first, last uint64, rsid uint32) (vks []uint64)
    func (cvr RSIDVARCols) FindAllVRRsidByVariantKey(first, last uint64, vk uint64) (rsids []uint32)
    func (crv RSIDVARCols) FindRVVariantKeyByRsid(first, last uint64, rsid uint32) (uint64, uint64)
    func (crv RSIDVARCols) FindVRChromPosRange(first, last uint64, chrom uint8, posMin, posMax uint32) (uint32, uint64, uint64)
    func (crv RSIDVARCols) FindVRRsidByVariantKey(first uint64, last uint64, vk uint64) (uint32, uint64)
    func (crv RSIDVARCols) GetNextRVVariantKeyByRsid(pos, last uint64, rsid uint32) (uint64, uint64)
    func (cvr RSIDVARCols) GetNextVRRsidByVariantKey(pos, last uint64, vk uint64) (uint32, uint64)
type TMMFile
    func MmapGenorefFile(file string) (TMMFile, error)
    func (mf TMMFile) CheckReference(chrom uint8, pos uint32, ref string) int
    func (mf TMMFile) Close() error
    func (mf TMMFile) GetGenorefSeq(chrom uint8, pos uint32) byte
    func (mf TMMFile) NormalizeVariant(chrom uint8, pos uint32, ref string, alt string) (code int, npos uint32, nref, nalt string, nsizeref, nsizealt uint8)
    func (mf TMMFile) NormalizedVariantKey(chrom string, pos uint32, posindex uint8, ref string, alt string) (vk uint64, code int)
type TRegionKey
    func DecodeRegionKey(rk uint64) TRegionKey
type TRegionKeyRev
    func ReverseRegionKey(rk uint64) TRegionKeyRev
type TVKRange
    func Range(chrom uint8, posMin, posMax uint32) TVKRange
type TVariantKey
    func DecodeVariantKey(v uint64) TVariantKey
type TVariantKeyRev

Package files

variantkey.go

func AreOverlappingRegionKeys

func AreOverlappingRegionKeys(rka, rkb uint64) bool

AreOverlappingRegionKeys check if two regionkeys are overlapping.

func AreOverlappingRegionRegionKey

func AreOverlappingRegionRegionKey(chrom uint8, startpos, endpos uint32, rk uint64) bool

AreOverlappingRegionRegionKey check if a region and a regionkey are overlapping.

func AreOverlappingRegions

func AreOverlappingRegions(chromA uint8, startposA, endposA uint32, chromB uint8, startposB, endposB uint32) bool

AreOverlappingRegions check if two regions are overlapping.

func CompareVariantKeyChrom

func CompareVariantKeyChrom(va, vb uint64) int

CompareVariantKeyChrom compares two VariantKeys by chromosome only.

func CompareVariantKeyChromPos

func CompareVariantKeyChromPos(va, vb uint64) int

CompareVariantKeyChromPos compares two VariantKeys by chromosome and position.

func DecodeChrom

func DecodeChrom(c uint8) string

DecodeChrom decode chrom to string

func DecodeRefAlt

func DecodeRefAlt(c uint32) (string, string, uint8, uint8, uint8)

DecodeRefAlt decode Ref+Alt code if reversible

func DecodeRegionStrand

func DecodeRegionStrand(strand uint8) int8

DecodeRegionStrand decode the strand direction code (0 > 0, 1 > +1, 2 > -1).

func DecodeStringID

func DecodeStringID(esid uint64) string

DecodeStringID decode the encoded string ID.

func EncodeChrom

func EncodeChrom(chrom string) uint8

EncodeChrom returns chromosome encoding.

func EncodeRefAlt

func EncodeRefAlt(ref string, alt string) uint32

EncodeRefAlt returns reference+alternate code.

func EncodeRegionKey

func EncodeRegionKey(chrom uint8, startpos, endpos uint32, strand uint8) uint64

EncodeRegionKey returns a 64 bit regionkey

func EncodeRegionStrand

func EncodeRegionStrand(strand int8) uint8

EncodeRegionStrand encode the strand direction (-1 > 2, 0 > 0, +1 > 1).

func EncodeStringID

func EncodeStringID(s string, start uint32) uint64

EncodeStringID encode maximum 10 characters of a string into a 64 bit unsigned integer. The argument "start" indicate the first character to encode.

func EncodeStringNumID

func EncodeStringNumID(s string, sep byte) uint64

EncodeStringNumID cncode a string composed by a character section followed by a separator character and a numerical section into a 64 bit unsigned integer. For example: "ABCDE:0001234". Encodes up to 5 characters in uppercase, a number up to 2^27, and up to 7 zero padding digits. If the string is 10 character or less, then the encode_string_id() is used.

func EncodeVariantKey

func EncodeVariantKey(chrom uint8, pos, refalt uint32) uint64

EncodeVariantKey returns a Genetic Variant Key based on pre-encoded CHROM, POS (0-base), REF+ALT.

func ExtendRegionKey

func ExtendRegionKey(rk uint64, size uint32) uint64

ExtendRegionKey extend a regionkey region by a fixed amount from the start and end position.

func ExtractRegionKeyChrom

func ExtractRegionKeyChrom(rk uint64) uint8

ExtractRegionKeyChrom extracts the CHROM code from RegionKey.

func ExtractRegionKeyEndPos

func ExtractRegionKeyEndPos(rk uint64) uint32

ExtractRegionKeyEndPos extracts the END POS code from RegionKey.

func ExtractRegionKeyStartPos

func ExtractRegionKeyStartPos(rk uint64) uint32

ExtractRegionKeyStartPos extracts the START POS code from RegionKey.

func ExtractRegionKeyStrand

func ExtractRegionKeyStrand(rk uint64) uint8

ExtractRegionKeyStrand extract the STRAND from RegionKey.

func ExtractVariantKeyChrom

func ExtractVariantKeyChrom(v uint64) uint8

ExtractVariantKeyChrom extracts the CHROM code from VariantKey.

func ExtractVariantKeyPos

func ExtractVariantKeyPos(v uint64) uint32

ExtractVariantKeyPos extracts the POS code from VariantKey.

func ExtractVariantKeyRefAlt

func ExtractVariantKeyRefAlt(v uint64) uint32

ExtractVariantKeyRefAlt extracts the REF+ALT code from VariantKey.

func FlipAllele

func FlipAllele(allele string) string

FlipAllele flips allele nucleotides.

func GetRegionKeyChromEndPos

func GetRegionKeyChromEndPos(rk uint64) uint64

GetRegionKeyChromEndPos get the CHROM + END POS encoding from RegionKey.

func GetRegionKeyChromStartPos

func GetRegionKeyChromStartPos(rk uint64) uint64

GetRegionKeyChromStartPos get the CHROM + START POS encoding from RegionKey.

func GetVariantKeyChromStartPos

func GetVariantKeyChromStartPos(vk uint64) uint64

GetVariantKeyChromStartPos get the CHROM + START POS encoding from VariantKey.

func HashStringID

func HashStringID(s string) uint64

HashStringID hash the input string into a 64 bit unsigned integer.

func Hex

func Hex(v uint64) string

Hex provides a 16 digits hexadecimal string representation of a 64bit unsigned number.

func MmapNRVKFile

func MmapNRVKFile(file string) (TMMFile, NRVKCols, error)

MmapNRVKFile memory map the NRVK binary file.

func MmapRSVKFile

func MmapRSVKFile(file string, ctbytes []uint8) (TMMFile, RSIDVARCols, error)

MmapRSVKFile memory map the RSVK binary file.

func MmapVKRSFile

func MmapVKRSFile(file string, ctbytes []uint8) (TMMFile, RSIDVARCols, error)

MmapVKRSFile memory map the VKRS binary file.

func ParseHex

func ParseHex(s string) uint64

ParseHex parses a 16 digit HEX string and returns the 64 bit unsigned number.

func RegionKey

func RegionKey(chrom string, startpos, endpos uint32, strand int8) uint64

RegionKey returns a 64 bit regionkey based on CHROM, START POS (0-based), END POS and STRAND.

func ReverseVariantKey

func ReverseVariantKey(v uint64) (chrom string, pos uint32, ref string, alt string, sizeref uint8, sizealt uint8)

ReverseVariantKey parses a variant key string and returns the components.

func StringToNTBytes

func StringToNTBytes(s string) []byte

StringToNTBytes safely convert a string to byte array with an extra null terminator This is to ensure a correct CGO conversion to char*

func StringToNTBytesN

func StringToNTBytesN(s string, size uint32) []byte

StringToNTBytesN convert a string to byte array allocating "size" bytes.

func VariantKey

func VariantKey(chrom string, pos uint32, ref, alt string) uint64

VariantKey returns a Genetic Variant Key based on CHROM, POS (0-base), REF, ALT. The variant should be already normalized (see NormalizeVariant or use NormalizedVariantkey).

type NRVKCols

NRVKCols contains the NRVK memory mapped file column info.

type NRVKCols struct {
    Vk     unsafe.Pointer // Pointer to the VariantKey column.
    Offset unsafe.Pointer // Pointer to the Offset column.
    Data   unsafe.Pointer // Pointer to the Data column.
    NRows  uint64         // Number of rows.
}

func (NRVKCols) AreOverlappingVariantKeyRegionKey

func (nr NRVKCols) AreOverlappingVariantKeyRegionKey(vk, rk uint64) bool

AreOverlappingVariantKeyRegionKey check if variantkey and regionkey are overlapping.

func (NRVKCols) FindRefAltByVariantKey

func (nr NRVKCols) FindRefAltByVariantKey(vk uint64) (string, string, uint8, uint8, uint32)

FindRefAltByVariantKey retrieve the REF and ALT strings for the specified VariantKey.

func (NRVKCols) GetVariantKeyChromEndPos

func (nr NRVKCols) GetVariantKeyChromEndPos(vk uint64) uint64

GetVariantKeyChromEndPos get the CHROM + END POS encoding from VariantKey.

func (NRVKCols) GetVariantKeyEndPos

func (nr NRVKCols) GetVariantKeyEndPos(vk uint64) uint32

GetVariantKeyEndPos get the VariantKey end position (POS + REF length).

func (NRVKCols) GetVariantKeyRefLength

func (nr NRVKCols) GetVariantKeyRefLength(vk uint64) uint8

GetVariantKeyRefLength retrieve the REF length for the specified VariantKey.

func (NRVKCols) ReverseVariantKey

func (nr NRVKCols) ReverseVariantKey(vk uint64) (TVariantKeyRev, uint32)

ReverseVariantKey reverse a VariantKey code and returns the normalized components.

func (NRVKCols) VariantToRegionkey

func (nr NRVKCols) VariantToRegionkey(vk uint64) uint64

VariantToRegionkey get RegionKey from VariantKey.

func (NRVKCols) VknrBinToTSV

func (nr NRVKCols) VknrBinToTSV(tsvfile string) uint64

VknrBinToTSV converts a vrnr.bin file to a simple TSV. For the reverse operation see the resources/tools/nrvk.sh script.

type RSIDVARCols

RSIDVARCols contains the RSVK or VKRS memory mapped file column info.

type RSIDVARCols struct {
    Vk    unsafe.Pointer // Pointer to the VariantKey column.
    Rs    unsafe.Pointer // Pointer to the rsID column.
    NRows uint64         // Number of rows.
}

func (RSIDVARCols) FindAllRVVariantKeyByRsid

func (crv RSIDVARCols) FindAllRVVariantKeyByRsid(first, last uint64, rsid uint32) (vks []uint64)

FindAllRVVariantKeyByRsid get all VariantKeys for the specified rsID in the RV file. Returns a list of VariantKeys

func (RSIDVARCols) FindAllVRRsidByVariantKey

func (cvr RSIDVARCols) FindAllVRRsidByVariantKey(first, last uint64, vk uint64) (rsids []uint32)

FindAllVRRsidByVariantKey get all rsID for the specified VariantKeys in the VR file. Returns a list of rsIDs

func (RSIDVARCols) FindRVVariantKeyByRsid

func (crv RSIDVARCols) FindRVVariantKeyByRsid(first, last uint64, rsid uint32) (uint64, uint64)

FindRVVariantKeyByRsid search for the specified RSID and returns the first occurrence of VariantKey in the RV file.

func (RSIDVARCols) FindVRChromPosRange

func (crv RSIDVARCols) FindVRChromPosRange(first, last uint64, chrom uint8, posMin, posMax uint32) (uint32, uint64, uint64)

FindVRChromPosRange search for the specified CHROM-POS range and returns the first occurrence of RSID in the VR file.

func (RSIDVARCols) FindVRRsidByVariantKey

func (crv RSIDVARCols) FindVRRsidByVariantKey(first uint64, last uint64, vk uint64) (uint32, uint64)

FindVRRsidByVariantKey search for the specified VariantKey and returns the first occurrence of RSID in the VR file.

func (RSIDVARCols) GetNextRVVariantKeyByRsid

func (crv RSIDVARCols) GetNextRVVariantKeyByRsid(pos, last uint64, rsid uint32) (uint64, uint64)

GetNextRVVariantKeyByRsid get the next VariantKey for the specified rsID in the RV file. Returns the VariantKey or 0, and the position

func (RSIDVARCols) GetNextVRRsidByVariantKey

func (cvr RSIDVARCols) GetNextVRRsidByVariantKey(pos, last uint64, vk uint64) (uint32, uint64)

GetNextVRRsidByVariantKey get the next rsID for the specified VariantKey in the VR file. Returns the rsID or 0, and the position

type TMMFile

TMMFile contains the memory mapped file info

type TMMFile struct {
    Src     unsafe.Pointer // Pointer to the memory map.
    Fd      int            // File descriptor.
    Size    uint64         // File size in bytes.
    DOffset uint64         // Offset to the beginning of the data block (address of the first byte of the first item in the first column).
    DLength uint64         // Length in bytes of the data block.
    NRows   uint64         // Number of rows.
    NCols   uint8          // Number of columns.
    CTBytes []uint8        // Number of bytes per column type (i.e. 1 for uint8_t, 2 for uint16_t, 4 for uint32_t, 8 for uint64_t)
    Index   []uint64       // Index of the offsets to the beginning of each column.
}

func MmapGenorefFile

func MmapGenorefFile(file string) (TMMFile, error)

MmapGenorefFile maps the specified fasta file in memory.

func (TMMFile) CheckReference

func (mf TMMFile) CheckReference(chrom uint8, pos uint32, ref string) int

CheckReference checks if the reference allele matches the reference genome data.

func (TMMFile) Close

func (mf TMMFile) Close() error

Close Unmap and close the memory-mapped file.

func (TMMFile) GetGenorefSeq

func (mf TMMFile) GetGenorefSeq(chrom uint8, pos uint32) byte

GetGenorefSeq returns the nucleotide at the specified chromosome and position.

func (TMMFile) NormalizeVariant

func (mf TMMFile) NormalizeVariant(chrom uint8, pos uint32, ref string, alt string) (code int, npos uint32, nref, nalt string, nsizeref, nsizealt uint8)

NormalizeVariant flips alleles if required and apply the normalization algorithm described at: https://genome.sph.umich.edu/wiki/Variant_Normalization

func (TMMFile) NormalizedVariantKey

func (mf TMMFile) NormalizedVariantKey(chrom string, pos uint32, posindex uint8, ref string, alt string) (vk uint64, code int)

NormalizedVariantKey returns a normalized Genetic Variant Key based on CHROM, POS, REF, ALT.

type TRegionKey

TRegionKey contains a representation of a genomic region key

type TRegionKey struct {
    Chrom    uint8  `json:"chrom"`
    StartPos uint32 `json:"startpos"`
    EndPos   uint32 `json:"endpos"`
    Strand   uint8  `json:"strand"`
}

func DecodeRegionKey

func DecodeRegionKey(rk uint64) TRegionKey

DecodeRegionKey parses a regionkey string and returns the components as TRegionKey structure.

type TRegionKeyRev

TRegionKeyRev contains a genomic region components

type TRegionKeyRev struct {
    Chrom    string `json:"chrom"`
    StartPos uint32 `json:"startpos"`
    EndPos   uint32 `json:"endpos"`
    Strand   int8   `json:"strand"`
}

func ReverseRegionKey

func ReverseRegionKey(rk uint64) TRegionKeyRev

ReverseRegionKey parses a regionkey string and returns the components.

type TVKRange

TVKRange contains min and max VariantKey values for range searches

type TVKRange struct {
    Min uint64 `json:"min"`
    Max uint64 `json:"max"`
}

func Range

func Range(chrom uint8, posMin, posMax uint32) TVKRange

Range Returns minimum and maximum variant keys for range searches.

type TVariantKey

TVariantKey contains a representation of a genetic variant key

type TVariantKey struct {
    Chrom  uint8  `json:"chrom"`
    Pos    uint32 `json:"pos"`
    RefAlt uint32 `json:"refalt"`
}

func DecodeVariantKey

func DecodeVariantKey(v uint64) TVariantKey

DecodeVariantKey parses a variant key string and returns the components as TVariantKey structure.

type TVariantKeyRev

TVariantKeyRev contains a genetic variant components

type TVariantKeyRev struct {
    Chrom   string `json:"chrom"`
    Pos     uint32 `json:"pos"`
    Ref     string `json:"ref"`
    Alt     string `json:"alt"`
    SizeRef uint8  `json:"size_ref"`
    SizeAlt uint8  `json:"size_alt"`
}