diff --git a/.gitignore b/.gitignore
index b9e4fea..a18ec95 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@ docs/_build
.DS_Store
vcf/cparse.c
vcf/cparse.so
+.coverage
diff --git a/.travis.yml b/.travis.yml
index cdbf63a..ad315b2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,12 +1,18 @@
# Validate this file using http://lint.travis-ci.org/
language: python
+sudo: false
+cache:
+ directories:
+ - $HOME/.cache/pip
python:
- - "2.6"
- "2.7"
- - "3.2"
+ - "3.4"
+ - "3.5"
+ - "3.6"
+ - "nightly"
- "pypy"
+ - "pypy3"
install:
- - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors pysam argparse ordereddict; fi"
- - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors pysam; fi"
+ - if [[ "$TRAVIS_PYTHON_VERSION" =~ ^pypy ]]; then pip install -r requirements/pypy-requirements.txt; else pip install -r requirements/common-requirements.txt; fi
- python setup.py install
script: python setup.py test
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..44f678a
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include vcf *.pyx
diff --git a/README.rst b/README.rst
index 83792ce..67b5d1b 100644
--- a/README.rst
+++ b/README.rst
@@ -14,7 +14,7 @@ There main interface is the class: ``Reader``. It takes a file-like
object and acts as a reader::
>>> import vcf
- >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb'))
+ >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
>>> for record in vcf_reader:
... print record
Record(CHROM=20, POS=14370, REF=G, ALT=[A])
@@ -49,8 +49,8 @@ one-entry Python lists (see, e.g., ``Record.ALT``). Semicolon-delimited lists
of key=value pairs are converted to Python dictionaries, with flags being given
a ``True`` value. Integers and floats are handled exactly as you'd expect::
- >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb'))
- >>> record = vcf_reader.next()
+ >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
+ >>> record = next(vcf_reader)
>>> print record.POS
14370
>>> print record.ALT
@@ -58,17 +58,17 @@ a ``True`` value. Integers and floats are handled exactly as you'd expect::
>>> print record.INFO['AF']
[0.5]
-There are a number of convienience methods and properties for each ``Record`` allowing you to
+There are a number of convenience methods and properties for each ``Record`` allowing you to
examine properties of interest::
>>> print record.num_called, record.call_rate, record.num_unknown
3 1.0 0
>>> print record.num_hom_ref, record.num_het, record.num_hom_alt
1 1 1
- >>> print record.nucl_diversity, record.aaf
- 0.6 0.5
+ >>> print record.nucl_diversity, record.aaf, record.heterozygosity
+ 0.6 [0.5] 0.5
>>> print record.get_hets()
- [Call(sample=NA00002, GT=1|0, HQ=[51, 51], DP=8, GQ=48)]
+ [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
>>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
True False True False
>>> print record.var_type, record.var_subtype
@@ -82,7 +82,7 @@ fields. In case the FORMAT column does not exist, ``record.FORMAT`` is
parsed sample column and ``record.genotype`` is a way of looking up genotypes
by sample name::
- >>> record = vcf_reader.next()
+ >>> record = next(vcf_reader)
>>> for sample in record.samples:
... print sample['GT']
0|0
@@ -101,7 +101,7 @@ call data in ``data``::
>>> print call.sample
NA00001
>>> print call.data
- {'GT': '0|0', 'HQ': [58, 50], 'DP': 3, 'GQ': 49}
+ CallData(GT=0|0, GQ=49, DP=3, HQ=[58, 50])
Please note that as of release 0.4.0, attributes known to have single values (such as
``DP`` and ``GQ`` above) are returned as values. Other attributes are returned
@@ -134,39 +134,48 @@ For example::
ALT records are actually classes, so that you can interrogate them::
- >>> reader = vcf.Reader(file('vcf/test/example-4.1-bnd.vcf'))
- >>> _ = reader.next(); row = reader.next()
+ >>> reader = vcf.Reader(open('vcf/test/example-4.1-bnd.vcf'))
+ >>> _ = next(reader); row = next(reader)
>>> print row
Record(CHROM=1, POS=2, REF=T, ALT=[T[2:3[])
>>> bnd = row.ALT[0]
>>> print bnd.withinMainAssembly, bnd.orientation, bnd.remoteOrientation, bnd.connectingSequence
True False True T
-Random access is supported for files with tabix indexes. Simply call fetch for the
-region you are interested in::
+The Reader supports retrieval of records within designated regions for files
+with tabix indexes via the fetch method. This requires the pysam module as a
+dependency. Pass in a chromosome, and, optionally, start and end coordinates,
+for the regions of interest::
>>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
- >>> for record in vcf_reader.fetch('20', 1110696, 1230237):
+ >>> # fetch all records on chromosome 20 from base 1110696 through 1230237
+ >>> for record in vcf_reader.fetch('20', 1110695, 1230237): # doctest: +SKIP
... print record
Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
-Or extract a single row::
+Note that the start and end coordinates are in the zero-based, half-open
+coordinate system, similar to ``_Record.start`` and ``_Record.end``. The very
+first base of a chromosome is index 0, and the the region includes bases up
+to, but not including the base at the end coordinate. For example::
- >>> print vcf_reader.fetch('20', 1110696)
- Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
+ >>> # fetch all records on chromosome 4 from base 11 through 20
+ >>> vcf_reader.fetch('4', 10, 20) # doctest: +SKIP
+would include all records overlapping a 10 base pair region from the 11th base
+of through the 20th base (which is at index 19) of chromosome 4. It would not
+include the 21st base (at index 20). (See
+http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms for more
+information on the zero-based, half-open coordinate system.)
The ``Writer`` class provides a way of writing a VCF file. Currently, you must specify a
template ``Reader`` which provides the metadata::
>>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
- >>> vcf_writer = vcf.Writer(file('/dev/null', 'w'), vcf_reader)
+ >>> vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader)
>>> for record in vcf_reader:
... vcf_writer.write_record(record)
-
An extensible script is available to filter vcf files in vcf_filter.py. VCF filters
declared by other packages will be available for use in this script. Please
see :doc:`FILTERS` for full description.
-
diff --git a/docs/API.rst b/docs/API.rst
index 7ffc21a..d688893 100644
--- a/docs/API.rst
+++ b/docs/API.rst
@@ -14,43 +14,43 @@ vcf.Writer
:members:
vcf.model._Record
------------
+-----------------
.. autoclass:: vcf.model._Record
:members:
vcf.model._Call
----------
+---------------
.. autoclass:: vcf.model._Call
:members:
vcf.model._AltRecord
------------
+--------------------
.. autoclass:: vcf.model._AltRecord
:members:
vcf.model._Substitution
------------
+-----------------------
.. autoclass:: vcf.model._Substitution
:members:
vcf.model._SV
------------
+-------------
.. autoclass:: vcf.model._SV
:members:
vcf.model._SingleBreakend
------------
+-------------------------
.. autoclass:: vcf.model._SingleBreakend
:members:
vcf.model._Breakend
------------
+-------------------
.. autoclass:: vcf.parser._Breakend
:members:
diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst
index 396ffa7..8a97d8d 100644
--- a/docs/HISTORY.rst
+++ b/docs/HISTORY.rst
@@ -2,7 +2,7 @@ Development
===========
Please use the `PyVCF repository `_.
-Pull requests gladly accepted.
+Pull requests gladly accepted.
Issues should be reported at the github issue tracker.
Running tests
@@ -10,24 +10,90 @@ Running tests
Please check the tests by running them with::
- python setup.py test
+ python setup.py test
New features should have test code sent with them.
Changes
=======
+0.6.7 Release
+-------------
+
+* Include missing .pyx files
+
+0.6.6 Release
+-------------
+
+* better walk together record ordering (Thanks @datagram, #141)
+
+0.6.5 Release
+-------------
+
+* Better contig handling (#115, #116, #119 thanks Martijn)
+* INFO lines with type character (#120, #121 thanks @AndrewUzilov, Martijn)
+* Single breakends fix (#126 thanks @pkrushe)
+* Speedup by losing ordering of INFO (#128 thanks Martijn)
+* HOMSEQ and other missing fields in INFO (#130 thanks Martijn)
+* Add aaf property, (thanks @mgymrek #131)
+* Custom equality for walk_together, thanks bow #132
+* Change default line encoding to '\n'
+* Improved __eq__ (#134, thanks bow)
+
+
+0.6.4 Release
+-------------
+
+* Handle INFO fields with multiple values, thanks
+* Support writing records without GT data #88, thanks @bow
+* Pickleable call data #112, thanks @superbobry
+* Write files without FORMAT #95 thanks Martijn
+* Strict whitespace mode, thanks Martijn, Lee Lichtenstein and Manawsi Gupta
+* Add support for contigs in header, thanks @gcnh and Martijn
+* Fix GATK header parsing, thanks @alimanfoo
+
+0.6.3 Release
+-------------
+
+* cython port of #79
+* correct writing of meta lines #84
+
+0.6.2 Release
+-------------
+
+* issues #78, #79 (thanks Sean, Brad)
+
+0.6.1 Release
+-------------
+
+* Add strict whitespace mode for well formed VCFs with spaces
+ in sample names (thanks Marco)
+* Ignore blank lines in files (thanks Martijn)
+* Tweaks for handling missing data (thanks Sean)
+* bcftools tests (thanks Martijn)
+* record.FILTER is always a list
+
+0.6.0 Release
+-------------
+
+* Backwards incompatible change: _Call.data is now a
+ namedtuple (previously it was a dict)
+* Optional cython version, much improved performance.
+* Improvements to writer (thanks @cmclean)
+* Improvements to inheritance of classes (thanks @lennax)
+
+
0.5.0 Release
-------------
-VCF 4.1 support:
- * support missing genotype #28 (thanks @martijnvermaat)
- * parseALT for svs #42, #48 (thanks @dzerbino)
+* VCF 4.1 support:
+ - support missing genotype #28 (thanks @martijnvermaat)
+ - parseALT for svs #42, #48 (thanks @dzerbino)
* `trim_common_suffix` method #22 (thanks @martijnvermaat)
* Multiple metadata with the same key is stored (#52)
-Writer improvements
- * A/G in Number INFO fields #53 (thanks @lennax)
- * Better output #55 (thanks @cmclean)
+* Writer improvements:
+ - A/G in Number INFO fields #53 (thanks @lennax)
+ - Better output #55 (thanks @cmclean)
* Allow malformed INFO fields #49 (thanks @ilyaminkin)
* Added bayes factor error bias VCF filter
* Added docs on vcf_melt
@@ -37,14 +103,14 @@ Writer improvements
0.4.6 Release
-------------
-* Performance improvements (#47)
+* Performance improvements (#47)
* Preserve order of INFO column (#46)
0.4.5 Release
-------------
-* Support exponent syntax qual values (#43, #44) (thanks @martijnvermaat)
-* Preserve order of header lines (#45)
+* Support exponent syntax qual values (#43, #44) (thanks @martijnvermaat)
+* Preserve order of header lines (#45)
0.4.4 Release
-------------
@@ -73,15 +139,15 @@ Writer improvements
0.4.0 Release
-------------
-* Package structure
+* Package structure
* add ``vcf.utils`` module with ``walk_together`` method
-* samtools tests
+* samtools tests
* support Freebayes' non standard '.' for no call
-* fix vcf_melt
+* fix vcf_melt
* support monomorphic sites, add ``is_monomorphic`` method, handle null QUALs
-* filter support for files with monomorphic calls
+* filter support for files with monomorphic calls
* Values declared as single are no-longer returned in lists
-* several performance improvements
+* several performance improvements
0.3.0 Release
@@ -104,14 +170,14 @@ Documentation release
* Add shebang to vcf_filter.py
-0.2 Release
+0.2 Release
-----------
* Replace genotype dictionary with a ``Call`` object
* Methods on ``Record`` and ``Call`` (thanks @arq5x)
* Shortcut parse_sample when genotype is None
-0.1 Release
+0.1 Release
-----------
* Added test code
@@ -122,7 +188,7 @@ Documentation release
* Allow opening by filename as well as filesocket
* Support fetching rows for tabixed indexed files
* Performance improvements (see ``test/prof.py``)
-* Added extensible filter script (see FILTERS.md), vcf_filter.py
+* Added extensible filter script (see FILTERS.md), vcf_filter.py
Contributions
=============
@@ -131,4 +197,3 @@ Project started by @jdoughertyii and taken over by @jamescasbon on 12th January
Contributions from @arq5x, @brentp, @martijnvermaat, @ian1roberts, @marcelm.
This project was supported by `Population Genetics `_.
-
diff --git a/docs/INTRO.rst b/docs/INTRO.rst
index b61e9a9..2b9a587 100644
--- a/docs/INTRO.rst
+++ b/docs/INTRO.rst
@@ -1,5 +1,4 @@
Introduction
============
-.. automodule:: vcf
-
+.. include:: ../README.rst
diff --git a/requirements/common-requirements.txt b/requirements/common-requirements.txt
new file mode 100644
index 0000000..876b75e
--- /dev/null
+++ b/requirements/common-requirements.txt
@@ -0,0 +1,3 @@
+cython
+pysam!=0.8.0
+setuptools
diff --git a/requirements/pypy-requirements.txt b/requirements/pypy-requirements.txt
new file mode 100644
index 0000000..49fe098
--- /dev/null
+++ b/requirements/pypy-requirements.txt
@@ -0,0 +1 @@
+setuptools
diff --git a/scripts/vcf_filter.py b/scripts/vcf_filter.py
index 9a08629..fd32b39 100644
--- a/scripts/vcf_filter.py
+++ b/scripts/vcf_filter.py
@@ -162,7 +162,7 @@ def addfilt(filt):
if output_record:
# use PASS only if other filter names appear in the FILTER column
#FIXME: is this good idea?
- if record.FILTER == '.' and not drop_filtered: record.FILTER = 'PASS'
+ if record.FILTER is None and not drop_filtered: record.FILTER = 'PASS'
output.write_record(record)
if __name__ == '__main__': main()
diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py
new file mode 100644
index 0000000..d71e6a3
--- /dev/null
+++ b/scripts/vcf_sample_filter.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+# Author: Lenna X. Peterson
+# github.com/lennax
+# arklenna at gmail dot com
+
+import argparse
+import logging
+
+from vcf import SampleFilter
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("file", help="VCF file to filter")
+ parser.add_argument("-o", metavar="outfile",
+ help="File to write out filtered samples")
+ parser.add_argument("-f", metavar="filters",
+ help="Comma-separated list of sample indices or names \
+ to filter")
+ parser.add_argument("-i", "--invert", action="store_true",
+ help="Keep rather than discard the filtered samples")
+ parser.add_argument("-q", "--quiet", action="store_true",
+ help="Less output")
+
+ args = parser.parse_args()
+
+ if args.quiet:
+ log_level = logging.WARNING
+ else:
+ log_level = logging.INFO
+ logging.basicConfig(format='%(message)s', level=log_level)
+
+ sf = SampleFilter(infile=args.file, outfile=args.o,
+ filters=args.f, invert=args.invert)
+ if args.f is None:
+ print "Samples:"
+ for idx, val in enumerate(sf.samples):
+ print "{0}: {1}".format(idx, val)
diff --git a/setup.py b/setup.py
index bca3a0d..b865b8d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
from setuptools import setup
-from distutils.core import setup
from distutils.extension import Extension
+import sys
try:
from Cython.Distutils import build_ext
@@ -8,20 +8,7 @@
except:
CYTHON = False
-requires = []
-
-# python 2.6 does not have argparse
-try:
- import argparse
-except ImportError:
- requires.append('argparse')
-
-
-try:
- import collections
- collections.OrderedDict
-except AttributeError:
- requires.append('ordereddict')
+DEPENDENCIES = ['setuptools']
# get the version without an import
VERSION = "Undefined"
@@ -44,14 +31,14 @@
setup(
name='PyVCF',
packages=['vcf', 'vcf.test'],
- scripts=['scripts/vcf_melt', 'scripts/vcf_filter.py'],
+ scripts=['scripts/vcf_melt', 'scripts/vcf_filter.py',
+ 'scripts/vcf_sample_filter.py'],
author='James Casbon and @jdoughertyii',
author_email='casbon@gmail.com',
description='Variant Call Format (VCF) parser for Python',
long_description=DOC,
test_suite='vcf.test.test_vcf.suite',
- install_requires=['distribute'],
- requires=requires,
+ install_requires=DEPENDENCIES,
entry_points = {
'vcf.filters': [
'site_quality = vcf.filters:SiteQuality',
@@ -68,10 +55,20 @@
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
+ 'License :: OSI Approved :: BSD License',
+ 'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
+ 'Programming Language :: Cython',
'Programming Language :: Python',
+ 'Programming Language :: Python :: 2',
+ 'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
- 'Topic :: Scientific/Engineering',
+ 'Programming Language :: Python :: 3.4',
+ 'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: Implementation :: CPython',
+ 'Programming Language :: Python :: Implementation :: PyPy',
+ 'Topic :: Scientific/Engineering :: Bio-Informatics',
],
keywords='bioinformatics',
use_2to3=True,
diff --git a/tox.ini b/tox.ini
index 16847bb..af7049e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,25 +4,18 @@
# and then run "tox" from this directory.
[tox]
-envlist = py26, py27, py32
+envlist = py27, py34, py35, py36, pypy, pypy3
[testenv]
+deps =
+ -rrequirements/common-requirements.txt
commands =
- rm -rf {toxinidir}/build
- python setup.py test
+ python setup.py clean --all test
-[testenv:py26]
+[testenv:pypy]
deps =
- argparse
- ordereddict
- pysam
+ -rrequirements/pypy-requirements.txt
-[testenv:py27]
+[testenv:pypy3]
deps =
- pysam
- cython
-
-[testenv:py32]
-deps =
- cython
-
+ -rrequirements/pypy-requirements.txt
diff --git a/vcf/__init__.py b/vcf/__init__.py
index 2935c73..e1aae58 100644
--- a/vcf/__init__.py
+++ b/vcf/__init__.py
@@ -1,180 +1,15 @@
#!/usr/bin/env python
-'''A VCFv4.0 and 4.1 parser for Python.
+"""
+A VCFv4.0 and 4.1 parser for Python.
Online version of PyVCF documentation is available at http://pyvcf.rtfd.org/
+"""
-The intent of this module is to mimic the ``csv`` module in the Python stdlib,
-as opposed to more flexible serialization formats like JSON or YAML. ``vcf``
-will attempt to parse the content of each record based on the data types
-specified in the meta-information lines -- specifically the ##INFO and
-##FORMAT lines. If these lines are missing or incomplete, it will check
-against the reserved types mentioned in the spec. Failing that, it will just
-return strings.
-There main interface is the class: ``Reader``. It takes a file-like
-object and acts as a reader::
-
- >>> import vcf
- >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
- >>> for record in vcf_reader:
- ... print record
- Record(CHROM=20, POS=14370, REF=G, ALT=[A])
- Record(CHROM=20, POS=17330, REF=T, ALT=[A])
- Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
- Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
- Record(CHROM=20, POS=1234567, REF=GTCT, ALT=[G, GTACT])
-
-
-This produces a great deal of information, but it is conveniently accessed.
-The attributes of a Record are the 8 fixed fields from the VCF spec::
-
- * ``Record.CHROM``
- * ``Record.POS``
- * ``Record.ID``
- * ``Record.REF``
- * ``Record.ALT``
- * ``Record.QUAL``
- * ``Record.FILTER``
- * ``Record.INFO``
-
-plus attributes to handle genotype information:
-
- * ``Record.FORMAT``
- * ``Record.samples``
- * ``Record.genotype``
-
-``samples`` and ``genotype``, not being the title of any column, are left lowercase. The format
-of the fixed fields is from the spec. Comma-separated lists in the VCF are
-converted to lists. In particular, one-entry VCF lists are converted to
-one-entry Python lists (see, e.g., ``Record.ALT``). Semicolon-delimited lists
-of key=value pairs are converted to Python dictionaries, with flags being given
-a ``True`` value. Integers and floats are handled exactly as you'd expect::
-
- >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
- >>> record = vcf_reader.next()
- >>> print record.POS
- 14370
- >>> print record.ALT
- [A]
- >>> print record.INFO['AF']
- [0.5]
-
-There are a number of convienience methods and properties for each ``Record`` allowing you to
-examine properties of interest::
-
- >>> print record.num_called, record.call_rate, record.num_unknown
- 3 1.0 0
- >>> print record.num_hom_ref, record.num_het, record.num_hom_alt
- 1 1 1
- >>> print record.nucl_diversity, record.aaf
- 0.6 0.5
- >>> print record.get_hets()
- [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
- >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
- True False True False
- >>> print record.var_type, record.var_subtype
- snp ts
- >>> print record.is_monomorphic
- False
-
-``record.FORMAT`` will be a string specifying the format of the genotype
-fields. In case the FORMAT column does not exist, ``record.FORMAT`` is
-``None``. Finally, ``record.samples`` is a list of dictionaries containing the
-parsed sample column and ``record.genotype`` is a way of looking up genotypes
-by sample name::
-
- >>> record = vcf_reader.next()
- >>> for sample in record.samples:
- ... print sample['GT']
- 0|0
- 0|1
- 0/0
- >>> print record.genotype('NA00001')['GT']
- 0|0
-
-The genotypes are represented by ``Call`` objects, which have three attributes: the
-corresponding Record ``site``, the sample name in ``sample`` and a dictionary of
-call data in ``data``::
-
- >>> call = record.genotype('NA00001')
- >>> print call.site
- Record(CHROM=20, POS=17330, REF=T, ALT=[A])
- >>> print call.sample
- NA00001
- >>> print call.data
- CallData(GT=0|0, GQ=49, DP=3, HQ=[58, 50])
-
-Please note that as of release 0.4.0, attributes known to have single values (such as
-``DP`` and ``GQ`` above) are returned as values. Other attributes are returned
-as lists (such as ``HQ`` above).
-
-There are also a number of methods::
-
- >>> print call.called, call.gt_type, call.gt_bases, call.phased
- True 0 T|T True
-
-Metadata regarding the VCF file itself can be investigated through the
-following attributes:
-
- * ``Reader.metadata``
- * ``Reader.infos``
- * ``Reader.filters``
- * ``Reader.formats``
- * ``Reader.samples``
-
-For example::
-
- >>> vcf_reader.metadata['fileDate']
- '20090805'
- >>> vcf_reader.samples
- ['NA00001', 'NA00002', 'NA00003']
- >>> vcf_reader.filters
- OrderedDict([('q10', Filter(id='q10', desc='Quality below 10')), ('s50', Filter(id='s50', desc='Less than 50% of samples have data'))])
- >>> vcf_reader.infos['AA'].desc
- 'Ancestral Allele'
-
-ALT records are actually classes, so that you can interrogate them::
-
- >>> reader = vcf.Reader(open('vcf/test/example-4.1-bnd.vcf'))
- >>> _ = reader.next(); row = reader.next()
- >>> print row
- Record(CHROM=1, POS=2, REF=T, ALT=[T[2:3[])
- >>> bnd = row.ALT[0]
- >>> print bnd.withinMainAssembly, bnd.orientation, bnd.remoteOrientation, bnd.connectingSequence
- True False True T
-
-Random access is supported for files with tabix indexes. Simply call fetch for the
-region you are interested in::
-
- >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
- >>> for record in vcf_reader.fetch('20', 1110696, 1230237): # doctest: +SKIP
- ... print record
- Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
- Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
-
-Or extract a single row::
-
- >>> print vcf_reader.fetch('20', 1110696) # doctest: +SKIP
- Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
-
-
-The ``Writer`` class provides a way of writing a VCF file. Currently, you must specify a
-template ``Reader`` which provides the metadata::
-
- >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
- >>> vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader)
- >>> for record in vcf_reader:
- ... vcf_writer.write_record(record)
-
-
-An extensible script is available to filter vcf files in vcf_filter.py. VCF filters
-declared by other packages will be available for use in this script. Please
-see :doc:`FILTERS` for full description.
-
-'''
from vcf.parser import Reader, Writer
from vcf.parser import VCFReader, VCFWriter
from vcf.filters import Base as Filter
from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
+from vcf.sample_filter import SampleFilter
-VERSION = '0.5.0'
+VERSION = '0.6.8'
diff --git a/vcf/cparse.pyx b/vcf/cparse.pyx
index 4a473d7..334542a 100644
--- a/vcf/cparse.pyx
+++ b/vcf/cparse.pyx
@@ -1,14 +1,27 @@
from model import _Call
-cdef _map(func, iterable, bad='.'):
+cdef _map(func, iterable, bad=['.', '']):
'''``map``, but make bad values None.'''
- return [func(x) if x != bad else None
+ return [func(x) if x not in bad else None
for x in iterable]
INTEGER = 'Integer'
FLOAT = 'Float'
NUMERIC = 'Numeric'
+def _parse_filter(filt_str):
+ '''Parse the FILTER field of a VCF entry into a Python list
+
+ NOTE: this method has a python equivalent and care must be taken
+ to keep the two methods equivalent
+ '''
+ if filt_str == '.':
+ return None
+ elif filt_str == 'PASS':
+ return []
+ else:
+ return filt_str.split(';')
+
def parse_samples(
list names, list samples, samp_fmt,
list samp_fmt_types, list samp_fmt_nums, site):
@@ -36,7 +49,14 @@ def parse_samples(
vals = sampvals[j]
# short circuit the most common
- if vals == '.' or vals == './.':
+ if samp_fmt._fields[j] == 'GT':
+ sampdat[j] = vals
+ continue
+ # genotype filters are a special case
+ elif samp_fmt._fields[j] == 'FT':
+ sampdat[j] = _parse_filter(vals)
+ continue
+ elif not vals or vals == '.':
sampdat[j] = None
continue
@@ -45,24 +65,24 @@ def parse_samples(
entry_num = samp_fmt_nums[j]
# we don't need to split single entries
- if entry_num == 1 or ',' not in vals:
-
+ if entry_num == 1:
if entry_type == INTEGER:
- sampdat[j] = int(vals)
+ try:
+ sampdat[j] = int(vals)
+ except ValueError:
+ sampdat[j] = float(vals)
elif entry_type == FLOAT or entry_type == NUMERIC:
sampdat[j] = float(vals)
else:
sampdat[j] = vals
-
- if entry_num != 1:
- sampdat[j] = (sampdat[j])
-
continue
vals = vals.split(',')
-
if entry_type == INTEGER:
- sampdat[j] = _map(int, vals)
+ try:
+ sampdat[j] = _map(int, vals)
+ except ValueError:
+ sampdat[j] = map(float, vals)
elif entry_type == FLOAT or entry_type == NUMERIC:
sampdat[j] = _map(float, vals)
else:
diff --git a/vcf/model.py b/vcf/model.py
index 9a27f87..34a4d17 100644
--- a/vcf/model.py
+++ b/vcf/model.py
@@ -1,27 +1,39 @@
from abc import ABCMeta, abstractmethod
import collections
import sys
+import re
+
+try:
+ from collections import Counter
+except ImportError:
+ from counter import Counter
+
+allele_delimiter = re.compile(r'''[|/]''') # to split a genotype into alleles
class _Call(object):
""" A genotype call, a cell entry in a VCF file"""
- __slots__ = ['site', 'sample', 'data', 'gt_nums', 'called']
+ __slots__ = ['site', 'sample', 'data', 'gt_nums', 'gt_alleles', 'called', 'ploidity']
def __init__(self, site, sample, data):
#: The ``_Record`` for this ``_Call``
self.site = site
#: The sample name
self.sample = sample
- #: Dictionary of data from the VCF file
+ #: Namedtuple of data from the VCF file
self.data = data
- try:
- self.gt_nums = self.data.GT
- #: True if the GT is not ./.
- self.called = self.gt_nums is not None
- except AttributeError:
- self.gt_nums = None
- # FIXME how do we know if a non GT call is called?
+
+ if getattr(self.data, 'GT', None) is not None:
+ self.gt_alleles = [(al if al != '.' else None) for al in allele_delimiter.split(self.data.GT)]
+ self.ploidity = len(self.gt_alleles)
+ self.called = any(al is not None for al in self.gt_alleles)
+ self.gt_nums = self.data.GT if self.called else None
+ else:
+ #62 a call without a genotype is not defined as called or not
+ self.gt_alleles = None
+ self.ploidity = None
self.called = None
+ self.gt_nums = None
def __repr__(self):
return "Call(sample=%s, %s)" % (self.sample, str(self.data))
@@ -30,19 +42,20 @@ def __eq__(self, other):
""" Two _Calls are equal if their _Records are equal
and the samples and ``gt_type``s are the same
"""
- return (self.site == other.site
- and self.sample == other.sample
- and self.gt_type == other.gt_type)
+ return (self.site == getattr(other, "site", None)
+ and self.sample == getattr(other, "sample", None)
+ and self.gt_type == getattr(other, "gt_type", None))
+
+ def __getstate__(self):
+ return dict((attr, getattr(self, attr)) for attr in self.__slots__)
+
+ def __setstate__(self, state):
+ for attr in self.__slots__:
+ setattr(self, attr, state.get(attr))
def gt_phase_char(self):
return "/" if not self.phased else "|"
- @property
- def gt_alleles(self):
- '''The numbers of the alleles called at a given sample'''
- # grab the numeric alleles of the gt string; tokenize by phasing
- return self.gt_nums.split(self.gt_phase_char())
-
@property
def gt_bases(self):
'''The actual genotype alleles.
@@ -52,7 +65,7 @@ def gt_bases(self):
if self.called:
# lookup and return the actual DNA alleles
try:
- return self.gt_phase_char().join(str(self.site.alleles[int(X)]) for X in self.gt_alleles)
+ return self.gt_phase_char().join(str(self.site.alleles[int(X)] if X is not None else '.') for X in self.gt_alleles)
except:
sys.stderr.write("Allele number not found in list of alleles\n")
else:
@@ -70,10 +83,14 @@ def gt_type(self):
if self.called:
alleles = self.gt_alleles
if all(X == alleles[0] for X in alleles[1:]):
- if alleles[0] == "0": return 0
- else: return 2
- else: return 1
- else: return None
+ if alleles[0] == "0":
+ return 0
+ else:
+ return 2
+ else:
+ return 1
+ else:
+ return None
@property
def phased(self):
@@ -100,6 +117,18 @@ def is_het(self):
return None
return self.gt_type == 1
+ @property
+ def is_filtered(self):
+ """ Return True for filtered calls """
+ try: # no FT annotation present for this variant
+ filt = self.data.FT
+ except AttributeError:
+ return False
+ if filt is None or len(filt) == 0: # FT is not set or set to PASS
+ return False
+ else:
+ return True
+
class _Record(object):
""" A set of calls at a site. Equivalent to a row in a VCF file.
@@ -108,10 +137,45 @@ class _Record(object):
INFO and FORMAT are available as properties.
The list of genotype calls is in the ``samples`` property.
+
+ Regarding the coordinates associated with each instance:
+
+ - ``POS``, per VCF specification, is the one-based index
+ (the first base of the contig has an index of 1) of the first
+ base of the ``REF`` sequence.
+ - The ``start`` and ``end`` denote the coordinates of the entire
+ ``REF`` sequence in the zero-based, half-open coordinate
+ system (see
+ http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms),
+ where the first base of the contig has an index of 0, and the
+ interval runs up to, but does not include, the base at the
+ ``end`` index. This indexing scheme is analagous to Python
+ slice notation.
+ - The ``affected_start`` and ``affected_end`` coordinates are
+ also in the zero-based, half-open coordinate system. These
+ coordinates indicate the precise region of the reference
+ genome actually affected by the events denoted in ``ALT``
+ (i.e., the minimum ``affected_start`` and maximum
+ ``affected_end``).
+
+ - For SNPs and structural variants, the affected region
+ includes all bases of ``REF``, including the first base
+ (i.e., ``affected_start = start = POS - 1``).
+ - For deletions, the region includes all bases of ``REF``
+ except the first base, which flanks upstream the actual
+ deletion event, per VCF specification.
+ - For insertions, the ``affected_start`` and ``affected_end``
+ coordinates represent a 0 bp-length region between the two
+ flanking bases (i.e., ``affected_start`` =
+ ``affected_end``). This is analagous to Python slice
+ notation (see http://stackoverflow.com/a/2947881/38140).
+ Neither the upstream nor downstream flanking bases are
+ included in the region.
"""
def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT,
sample_indexes, samples=None):
self.CHROM = CHROM
+ #: the one-based coordinate of the first nucleotide in ``REF``
self.POS = POS
self.ID = ID
self.REF = REF
@@ -120,23 +184,87 @@ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT,
self.FILTER = FILTER
self.INFO = INFO
self.FORMAT = FORMAT
- #: 0-based start coordinate
+ #: zero-based, half-open start coordinate of ``REF``
self.start = self.POS - 1
- #: 1-based end coordinate
+ #: zero-based, half-open end coordinate of ``REF``
self.end = self.start + len(self.REF)
#: list of alleles. [0] = REF, [1:] = ALTS
self.alleles = [self.REF]
self.alleles.extend(self.ALT)
#: list of ``_Calls`` for each sample ordered as in source VCF
- self.samples = samples
+ self.samples = samples or []
self._sample_indexes = sample_indexes
+ # Setting affected_start and affected_end here for Sphinx
+ # autodoc purposes...
+ #: zero-based, half-open start coordinate of affected region of reference genome
+ self.affected_start = None
+ #: zero-based, half-open end coordinate of affected region of reference genome (not included in the region)
+ self.affected_end = None
+ self._set_start_and_end()
+
+
+ def _set_start_and_end(self):
+ self.affected_start = self.affected_end = self.POS
+ for alt in self.ALT:
+ if alt is None:
+ start, end = self._compute_coordinates_for_none_alt()
+ elif alt.type == 'SNV':
+ start, end = self._compute_coordinates_for_snp()
+ elif alt.type == 'MNV':
+ start, end = self._compute_coordinates_for_indel()
+ else:
+ start, end = self._compute_coordinates_for_sv()
+ self.affected_start = min(self.affected_start, start)
+ self.affected_end = max(self.affected_end, end)
+
+
+ def _compute_coordinates_for_none_alt(self):
+ start = self.POS - 1
+ end = start + len(self.REF)
+ return (start, end)
+
+
+ def _compute_coordinates_for_snp(self):
+ if len(self.REF) > 1:
+ start = self.POS
+ end = start + (len(self.REF) - 1)
+ else:
+ start = self.POS - 1
+ end = self.POS
+ return (start, end)
+
+
+ def _compute_coordinates_for_indel(self):
+ if len(self.REF) > 1:
+ start = self.POS
+ end = start + (len(self.REF) - 1)
+ else:
+ start = end = self.POS
+ return (start, end)
+
+
+ def _compute_coordinates_for_sv(self):
+ start = self.POS - 1
+ end = start + len(self.REF)
+ return (start, end)
+
+
+ # For Python 2
+ def __cmp__(self, other):
+ return cmp((self.CHROM, self.POS), (getattr(other, "CHROM", None), getattr(other, "POS", None)))
+
+ # For Python 3
def __eq__(self, other):
""" _Records are equal if they describe the same variant (same position, alleles) """
- return (self.CHROM == other.CHROM and
- self.POS == other.POS and
- self.REF == other.REF and
- self.ALT == other.ALT)
+ return (self.CHROM == getattr(other, "CHROM", None) and
+ self.POS == getattr(other, "POS", None) and
+ self.REF == getattr(other, "REF", None) and
+ self.ALT == getattr(other, "ALT", None))
+
+ # For Python 3
+ def __lt__(self, other):
+ return (self.CHROM, self.POS) < (getattr(other, "CHROM", None), getattr(other, "POS", None))
def __iter__(self):
return iter(self.samples)
@@ -144,14 +272,14 @@ def __iter__(self):
def __str__(self):
return "Record(CHROM=%(CHROM)s, POS=%(POS)s, REF=%(REF)s, ALT=%(ALT)s)" % self.__dict__
- def __cmp__(self, other):
- return cmp( (self.CHROM, self.POS), (other.CHROM, other.POS))
-
def add_format(self, fmt):
self.FORMAT = self.FORMAT + ':' + fmt
def add_filter(self, flt):
- self.FILTER.append(flt)
+ if self.FILTER is None:
+ self.FILTER = [flt]
+ else:
+ self.FILTER.append(flt)
def add_info(self, info, value=True):
self.INFO[info] = value
@@ -163,7 +291,7 @@ def genotype(self, name):
@property
def num_called(self):
""" The number of called samples"""
- return sum(s.called for s in self.samples)
+ return sum(1 for s in self.samples if s.called)
@property
def call_rate(self):
@@ -192,18 +320,17 @@ def num_unknown(self):
@property
def aaf(self):
- """ The allele frequency of the alternate allele.
- NOTE 1: Punt if more than one alternate allele.
- NOTE 2: Denominator calc'ed from _called_ genotypes.
+ """ A list of allele frequencies of alternate alleles.
+ NOTE: Denominator calc'ed from _called_ genotypes.
"""
- # skip if more than one alternate allele. assumes bi-allelic
- if len(self.ALT) > 1:
- return None
- hom_ref = self.num_hom_ref
- het = self.num_het
- hom_alt = self.num_hom_alt
- num_chroms = float(2.0 * self.num_called)
- return float(het + 2 * hom_alt) / float(num_chroms)
+ num_chroms = 0.0
+ allele_counts = Counter()
+ for s in self.samples:
+ if s.gt_type is not None:
+ for a in s.gt_alleles:
+ allele_counts.update([a])
+ num_chroms += 1
+ return [allele_counts[str(i)]/num_chroms for i in range(1, len(self.ALT)+1)]
@property
def nucl_diversity(self):
@@ -215,16 +342,28 @@ def nucl_diversity(self):
Derived from:
\"Population Genetics: A Concise Guide, 2nd ed., p.45\"
- John Gillespie.
+ John Gillespie.
"""
# skip if more than one alternate allele. assumes bi-allelic
if len(self.ALT) > 1:
return None
- p = self.aaf
+ p = self.aaf[0]
q = 1.0 - p
num_chroms = float(2.0 * self.num_called)
return float(num_chroms / (num_chroms - 1.0)) * (2.0 * p * q)
+ @property
+ def heterozygosity(self):
+ """
+ Heterozygosity of a site. Heterozygosity gives the probability that
+ two randomly chosen chromosomes from the population have different
+ alleles, giving a measure of the degree of polymorphism in a population.
+
+ If there are i alleles with frequency p_i, H=1-sum_i(p_i^2)
+ """
+ allele_freqs = [1-sum(self.aaf)] + self.aaf
+ return 1 - sum(map(lambda x: x**2, allele_freqs))
+
def get_hom_refs(self):
""" The list of hom ref genotypes"""
return [s for s in self.samples if s.gt_type == 0]
@@ -244,11 +383,12 @@ def get_unknowns(self):
@property
def is_snp(self):
""" Return whether or not the variant is a SNP """
- if len(self.REF) > 1: return False
+ if len(self.REF) > 1:
+ return False
for alt in self.ALT:
if alt is None or alt.type != "SNV":
return False
- if alt not in ['A', 'C', 'G', 'T']:
+ if alt not in ['A', 'C', 'G', 'T', 'N', '*']:
return False
return True
@@ -257,10 +397,11 @@ def is_indel(self):
""" Return whether or not the variant is an INDEL """
is_sv = self.is_sv
- if len(self.REF) > 1 and not is_sv: return True
+ if len(self.REF) > 1 and not is_sv:
+ return True
for alt in self.ALT:
if alt is None:
- return True
+ return False
if alt.type != "SNV" and alt.type != "MNV":
return False
elif len(alt) != len(self.REF):
@@ -284,7 +425,8 @@ def is_sv(self):
def is_transition(self):
""" Return whether or not the SNP is a transition """
# if multiple alts, it is unclear if we have a transition
- if len(self.ALT) > 1: return False
+ if len(self.ALT) > 1:
+ return False
if self.is_snp:
# just one alt allele
@@ -294,24 +436,29 @@ def is_transition(self):
(self.REF == "C" and alt_allele == "T") or
(self.REF == "T" and alt_allele == "C")):
return True
- else: return False
- else: return False
+ else:
+ return False
+ else:
+ return False
@property
def is_deletion(self):
""" Return whether or not the INDEL is a deletion """
# if multiple alts, it is unclear if we have a transition
- if len(self.ALT) > 1: return False
+ if len(self.ALT) > 1:
+ return False
if self.is_indel:
# just one alt allele
alt_allele = self.ALT[0]
if alt_allele is None:
- return True
+ return False
if len(self.REF) > len(alt_allele):
return True
- else: return False
- else: return False
+ else:
+ return False
+ else:
+ return False
@property
def var_type(self):
@@ -332,13 +479,14 @@ def var_type(self):
def var_subtype(self):
"""
Return the subtype of variant.
+
- For SNPs and INDELs, yeild one of: [ts, tv, ins, del]
- - For SVs yield either "complex" or the SV type defined
- in the ALT fields (removing the brackets).
- E.g.:
- -> DEL
- -> INS:ME:L1
- -> DUP
+ - For SVs yield either "complex" or the SV type defined in the ALT
+ fields (removing the brackets). E.g.::
+
+ -> DEL
+ -> INS:ME:L1
+ -> DUP
The logic is meant to follow the rules outlined in the following
paragraph at:
@@ -400,6 +548,15 @@ def is_monomorphic(self):
""" Return True for reference calls """
return len(self.ALT) == 1 and self.ALT[0] is None
+ @property
+ def is_filtered(self):
+ """ Return True if a variant has been filtered """
+ filt = self.FILTER
+ if filt is None or len(filt) == 0: # FILTER is not set or set to PASS
+ return False
+ else:
+ return True
+
class _AltRecord(object):
'''An alternative allele record: either replacement string, SV placeholder, or breakend'''
@@ -415,7 +572,7 @@ def __str__(self):
raise NotImplementedError
def __eq__(self, other):
- return self.type == other.type
+ return self.type == getattr(other, 'type', None)
class _Substitution(_AltRecord):
@@ -441,8 +598,9 @@ def __len__(self):
def __eq__(self, other):
if isinstance(other, basestring):
return self.sequence == other
- else:
- return super(_Substitution, self).__eq__(other) and self.sequence == other.sequence
+ elif not isinstance(other, self.__class__):
+ return False
+ return super(_Substitution, self).__eq__(other) and self.sequence == other.sequence
class _Breakend(_AltRecord):
@@ -451,9 +609,15 @@ class _Breakend(_AltRecord):
def __init__(self, chr, pos, orientation, remoteOrientation, connectingSequence, withinMainAssembly, **kwargs):
super(_Breakend, self).__init__(type="BND", **kwargs)
#: The chromosome of breakend's mate.
- self.chr = str(chr)
+ if chr is not None:
+ self.chr = str(chr)
+ else:
+ self.chr = None # Single breakend
#: The coordinate of breakend's mate.
- self.pos = int(pos)
+ if pos is not None:
+ self.pos = int(pos)
+ else:
+ self.pos = None
#: The orientation of breakend's mate. If the sequence 3' of the breakend's mate is connected, True, else if the sequence 5' of the breakend's mate is connected, False.
self.remoteOrientation = remoteOrientation
#: If the breakend mate is within the assembly, True, else False if the breakend mate is on a contig in an ancillary assembly file.
@@ -485,13 +649,15 @@ def __str__(self):
return self.connectingSequence + remoteTag
def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return False
return super(_Breakend, self).__eq__(other) \
- and self.chr == other.chr \
- and self.pos == other.pos \
- and self.remoteOrientation == other.remoteOrientation \
- and self.withinMainAssembly == other.withinMainAssembly \
- and self.orientation == other.orientation \
- and self.connectingSequence == other.connectingSequence
+ and self.chr == getattr(other, "chr", None) \
+ and self.pos == getattr(other, "pos", None) \
+ and self.remoteOrientation == getattr(other, "remoteOrientation", None) \
+ and self.withinMainAssembly == getattr(other, "withinMainAssembly", None) \
+ and self.orientation == getattr(other, "orientation", None) \
+ and self.connectingSequence == getattr(other, "connectingSequence", None)
class _SingleBreakend(_Breakend):
@@ -528,4 +694,8 @@ def __str__(self):
for (x, y) in zip(self._fields, self)])
return "CallData(" + dat + ')'
+ def __reduce__(self):
+ args = super(CallData, self).__reduce__()
+ return make_calldata_tuple, (fields, )
+
return CallData
diff --git a/vcf/parser.py b/vcf/parser.py
index f274e9c..c3c3d08 100644
--- a/vcf/parser.py
+++ b/vcf/parser.py
@@ -1,10 +1,11 @@
+import codecs
import collections
-import re
import csv
import gzip
-import sys
import itertools
-import codecs
+import os
+import re
+import sys
try:
from collections import OrderedDict
@@ -29,25 +30,30 @@
RESERVED_INFO = {
'AA': 'String', 'AC': 'Integer', 'AF': 'Float', 'AN': 'Integer',
'BQ': 'Float', 'CIGAR': 'String', 'DB': 'Flag', 'DP': 'Integer',
- 'END': 'Integer', 'H2': 'Flag', 'MQ': 'Float', 'MQ0': 'Integer',
- 'NS': 'Integer', 'SB': 'String', 'SOMATIC': 'Flag', 'VALIDATED': 'Flag',
-
- # VCF 4.1 Additions
- 'IMPRECISE':'Flag', 'NOVEL':'Flag', 'END':'Integer', 'SVTYPE':'String',
- 'CIPOS':'Integer','CIEND':'Integer','HOMLEN':'Integer','HOMSEQ':'Integer',
- 'BKPTID':'String','MEINFO':'String','METRANS':'String','DGVID':'String',
- 'DBVARID':'String','MATEID':'String','PARID':'String','EVENT':'String',
- 'CILEN':'Integer','CN':'Integer','CNADJ':'Integer','CICN':'Integer',
- 'CICNADJ':'Integer'
+ 'END': 'Integer', 'H2': 'Flag', 'H3': 'Flag', 'MQ': 'Float',
+ 'MQ0': 'Integer', 'NS': 'Integer', 'SB': 'String', 'SOMATIC': 'Flag',
+ 'VALIDATED': 'Flag', '1000G': 'Flag',
+
+ # Keys used for structural variants
+ 'IMPRECISE': 'Flag', 'NOVEL': 'Flag', 'SVTYPE': 'String',
+ 'SVLEN': 'Integer', 'CIPOS': 'Integer', 'CIEND': 'Integer',
+ 'HOMLEN': 'Integer', 'HOMSEQ': 'String', 'BKPTID': 'String',
+ 'MEINFO': 'String', 'METRANS': 'String', 'DGVID': 'String',
+ 'DBVARID': 'String', 'DBRIPID': 'String', 'MATEID': 'String',
+ 'PARID': 'String', 'EVENT': 'String', 'CILEN': 'Integer',
+ 'DPADJ': 'Integer', 'CN': 'Integer', 'CNADJ': 'Integer',
+ 'CICN': 'Integer', 'CICNADJ': 'Integer'
}
RESERVED_FORMAT = {
'GT': 'String', 'DP': 'Integer', 'FT': 'String', 'GL': 'Float',
- 'GQ': 'Float', 'HQ': 'Float',
+ 'GLE': 'String', 'PL': 'Integer', 'GP': 'Float', 'GQ': 'Integer',
+ 'HQ': 'Integer', 'PS': 'Integer', 'PQ': 'Integer', 'EC': 'Integer',
+ 'MQ': 'Integer',
- # VCF 4.1 Additions
- 'CN':'Integer','CNQ':'Float','CNL':'Float','NQ':'Integer','HAP':'Integer',
- 'AHAP':'Integer'
+ # Keys used for structural variants
+ 'CN': 'Integer', 'CNQ': 'Float', 'CNL': 'Float', 'NQ': 'Integer',
+ 'HAP': 'Integer', 'AHAP': 'Integer'
}
# Spec is a bit weak on which metadata lines are singular, like fileformat
@@ -57,47 +63,58 @@
# Conversion between value in file and Python value
field_counts = {
'.': None, # Unknown number of values
- 'A': -1, # Equal to the number of alleles in a given record
+ 'A': -1, # Equal to the number of alternate alleles in a given record
'G': -2, # Equal to the number of genotypes in a given record
+ 'R': -3, # Equal to the number of alleles including reference in a given record
}
-_Info = collections.namedtuple('Info', ['id', 'num', 'type', 'desc'])
+_Info = collections.namedtuple('Info', ['id', 'num', 'type', 'desc', 'source', 'version'])
_Filter = collections.namedtuple('Filter', ['id', 'desc'])
_Alt = collections.namedtuple('Alt', ['id', 'desc'])
_Format = collections.namedtuple('Format', ['id', 'num', 'type', 'desc'])
_SampleInfo = collections.namedtuple('SampleInfo', ['samples', 'gt_bases', 'gt_types', 'gt_phases'])
+_Contig = collections.namedtuple('Contig', ['id', 'length'])
class _vcf_metadata_parser(object):
- '''Parse the metadat in the header of a VCF file.'''
+ '''Parse the metadata in the header of a VCF file.'''
def __init__(self):
super(_vcf_metadata_parser, self).__init__()
self.info_pattern = re.compile(r'''\#\#INFO=<
- ID=(?P[^,]+),
- Number=(?P-?\d+|\.|[AG]),
- Type=(?PInteger|Float|Flag|Character|String),
+ ID=(?P[^,]+),\s*
+ Number=(?P-?\d+|\.|[AGR])?,\s*
+ Type=(?PInteger|Float|Flag|Character|String),\s*
Description="(?P[^"]*)"
+ (?:,\s*Source="(?P[^"]*)")?
+ (?:,\s*Version="?(?P[^"]*)"?)?
>''', re.VERBOSE)
self.filter_pattern = re.compile(r'''\#\#FILTER=<
- ID=(?P[^,]+),
+ ID=(?P[^,]+),\s*
Description="(?P[^"]*)"
>''', re.VERBOSE)
self.alt_pattern = re.compile(r'''\#\#ALT=<
- ID=(?P[^,]+),
+ ID=(?P[^,]+),\s*
Description="(?P[^"]*)"
>''', re.VERBOSE)
self.format_pattern = re.compile(r'''\#\#FORMAT=<
- ID=(?P.+),
- Number=(?P-?\d+|\.|[AG]),
- Type=(?P.+),
+ ID=(?P.+),\s*
+ Number=(?P-?\d+|\.|[AGR]),\s*
+ Type=(?P.+),\s*
Description="(?P.*)"
>''', re.VERBOSE)
+ self.contig_pattern = re.compile(r'''\#\#contig=<
+ ID=(?P[^>,]+)
+ (,.*length=(?P-?\d+))?
+ .*
+ >''', re.VERBOSE)
self.meta_pattern = re.compile(r'''##(?P.+?)=(?P.+)''')
def vcf_field_count(self, num_str):
"""Cast vcf header numbers to integer or None"""
- if num_str not in field_counts:
+ if num_str is None:
+ return None
+ elif num_str not in field_counts:
# Fixed, specified number
return int(num_str)
else:
@@ -113,7 +130,8 @@ def read_info(self, info_string):
num = self.vcf_field_count(match.group('number'))
info = _Info(match.group('id'), num,
- match.group('type'), match.group('desc'))
+ match.group('type'), match.group('desc'),
+ match.group('source'), match.group('version'))
return (match.group('id'), info)
@@ -133,7 +151,7 @@ def read_alt(self, alt_string):
match = self.alt_pattern.match(alt_string)
if not match:
raise SyntaxError(
- "One of the FILTER lines is malformed: %s" % alt_string)
+ "One of the ALT lines is malformed: %s" % alt_string)
alt = _Alt(match.group('id'), match.group('desc'))
@@ -153,31 +171,82 @@ def read_format(self, format_string):
return (match.group('id'), form)
+ def read_contig(self, contig_string):
+ '''Read a meta-contigrmation INFO line.'''
+ match = self.contig_pattern.match(contig_string)
+ if not match:
+ raise SyntaxError(
+ "One of the contig lines is malformed: %s" % contig_string)
+ length = self.vcf_field_count(match.group('length'))
+ contig = _Contig(match.group('id'), length)
+ return (match.group('id'), contig)
+
def read_meta_hash(self, meta_string):
- items = re.split("[<>]", meta_string)
- # Removing initial hash marks and final equal sign
- key = items[0][2:-1]
- hashItems = items[1].split(',')
- val = dict(item.split("=") for item in hashItems)
+ # assert re.match("##.+=<", meta_string)
+ items = meta_string.split('=', 1)
+ # Removing initial hash marks
+ key = items[0].lstrip('#')
+ # N.B., items can have quoted values, so cannot just split on comma
+ val = OrderedDict()
+ state = 0
+ k = ''
+ v = ''
+ for c in items[1].strip('[<>]'):
+
+ if state == 0: # reading item key
+ if c == '=':
+ state = 1 # end of key, start reading value
+ else:
+ k += c # extend key
+ elif state == 1: # reading item value
+ if v == '' and c == '"':
+ v += c # include quote mark in value
+ state = 2 # start reading quoted value
+ elif c == ',':
+ val[k] = v # store parsed item
+ state = 0 # read next key
+ k = ''
+ v = ''
+ else:
+ v += c
+ elif state == 2: # reading quoted item value
+ if c == '"':
+ v += c # include quote mark in value
+ state = 1 # end quoting
+ else:
+ v += c
+ if k != '':
+ val[k] = v
return key, val
def read_meta(self, meta_string):
if re.match("##.+=<", meta_string):
return self.read_meta_hash(meta_string)
- else:
- match = self.meta_pattern.match(meta_string)
- return match.group('key'), match.group('val')
+ match = self.meta_pattern.match(meta_string)
+ if not match:
+ # Spec only allows key=value, but we try to be liberal and
+ # interpret anything else as key=none (and all values are parsed
+ # as strings).
+ return meta_string.lstrip('#'), 'none'
+ return match.group('key'), match.group('val')
class Reader(object):
""" Reader for a VCF v 4.0 file, an iterator returning ``_Record objects`` """
- def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=False):
+ def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False,
+ strict_whitespace=False, encoding='ascii'):
""" Create a new Reader for a VCF file.
You must specify either fsock (stream) or filename. Gzipped streams
or files are attempted to be recogized by the file extension, or gzipped
can be forced with ``compressed=True``
+
+ 'prepend_chr=True' will put 'chr' before all the CHROM values, useful
+ for different sources.
+
+ 'strict_whitespace=True' will split records on tabs only (as with VCF
+ spec) which allows you to parse files with spaces in the sample names.
"""
super(Reader, self).__init__()
@@ -188,15 +257,25 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
self._reader = fsock
if filename is None and hasattr(fsock, 'name'):
filename = fsock.name
- compressed = compressed or filename.endswith('.gz')
+ if compressed is None:
+ compressed = filename.endswith('.gz')
elif filename:
- compressed = compressed or filename.endswith('.gz')
+ if compressed is None:
+ compressed = filename.endswith('.gz')
self._reader = open(filename, 'rb' if compressed else 'rt')
self.filename = filename
if compressed:
self._reader = gzip.GzipFile(fileobj=self._reader)
if sys.version > '3':
- self._reader = codecs.getreader('ascii')(self._reader)
+ self._reader = codecs.getreader(encoding)(self._reader)
+
+ if strict_whitespace:
+ self._separator = '\t'
+ else:
+ self._separator = '\t| +'
+
+ self._row_pattern = re.compile(self._separator)
+ self._alt_pattern = re.compile('[\[\]]')
self.reader = (line.strip() for line in self._reader if line.strip())
@@ -210,13 +289,17 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
self.alts = None
#: FORMAT fields from header
self.formats = None
+ #: contig fields from header
+ self.contigs = None
self.samples = None
self._sample_indexes = None
self._header_lines = []
+ self._column_headers = []
self._tabix = None
self._prepend_chr = prepend_chr
self._parse_metainfo()
self._format_cache = {}
+ self.encoding = encoding
def __iter__(self):
return self
@@ -226,12 +309,12 @@ def _parse_metainfo(self):
The end user shouldn't have to use this. She can access the metainfo
directly with ``self.metadata``.'''
- for attr in ('metadata', 'infos', 'filters', 'alts', 'formats'):
+ for attr in ('metadata', 'infos', 'filters', 'alts', 'contigs', 'formats'):
setattr(self, attr, OrderedDict())
parser = _vcf_metadata_parser()
- line = self.reader.next()
+ line = next(self.reader)
while line.startswith('##'):
self._header_lines.append(line)
@@ -251,6 +334,10 @@ def _parse_metainfo(self):
key, val = parser.read_format(line)
self.formats[key] = val
+ elif line.startswith('##contig'):
+ key, val = parser.read_contig(line)
+ self.contigs[key] = val
+
else:
key, val = parser.read_meta(line)
if key in SINGULAR_METADATA:
@@ -260,17 +347,31 @@ def _parse_metainfo(self):
self.metadata[key] = []
self.metadata[key].append(val)
- line = self.reader.next()
+ line = next(self.reader)
- fields = re.split('\t| +', line)
+ fields = self._row_pattern.split(line[1:])
+ self._column_headers = fields[:9]
self.samples = fields[9:]
self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)])
- def _map(self, func, iterable, bad='.'):
+ def _map(self, func, iterable, bad=['.', '']):
'''``map``, but make bad values None.'''
- return [func(x) if x != bad else None
+ return [func(x) if x not in bad else None
for x in iterable]
+ def _parse_filter(self, filt_str):
+ '''Parse the FILTER field of a VCF entry into a Python list
+
+ NOTE: this method has a cython equivalent and care must be taken
+ to keep the two methods equivalent
+ '''
+ if filt_str == '.':
+ return None
+ elif filt_str == 'PASS':
+ return []
+ else:
+ return filt_str.split(';')
+
def _parse_info(self, info_str):
'''Parse the INFO field of a VCF entry into a dictionary of Python
types.
@@ -280,10 +381,10 @@ def _parse_info(self, info_str):
return {}
entries = info_str.split(';')
- retdict = OrderedDict()
+ retdict = {}
for entry in entries:
- entry = entry.split('=')
+ entry = entry.split('=', 1)
ID = entry[0]
try:
entry_type = self.infos[ID].type
@@ -298,20 +399,27 @@ def _parse_info(self, info_str):
if entry_type == 'Integer':
vals = entry[1].split(',')
- val = self._map(int, vals)
+ try:
+ val = self._map(int, vals)
+ # Allow specified integers to be flexibly parsed as floats.
+ # Handles cases with incorrectly specified header types.
+ except ValueError:
+ val = self._map(float, vals)
elif entry_type == 'Float':
vals = entry[1].split(',')
val = self._map(float, vals)
elif entry_type == 'Flag':
val = True
- elif entry_type == 'String':
+ elif entry_type in ('String', 'Character'):
try:
- val = entry[1]
+ vals = entry[1].split(',') # commas are reserved characters indicating multiple values
+ val = self._map(str, vals)
except IndexError:
+ entry_type = 'Flag'
val = True
try:
- if self.infos[ID].num == 1 and entry_type != 'String':
+ if self.infos[ID].num == 1 and entry_type not in ( 'Flag', ):
val = val[0]
except KeyError:
pass
@@ -349,7 +457,6 @@ def _parse_samples(self, samples, samp_fmt, site):
# check whether we already know how to parse this format
if samp_fmt not in self._format_cache:
self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt)
-
samp_fmt = self._format_cache[samp_fmt]
if cparse:
@@ -369,7 +476,14 @@ def _parse_samples(self, samples, samp_fmt, site):
for i, vals in enumerate(sample.split(':')):
# short circuit the most common
- if vals == '.' or vals == './.':
+ if samp_fmt._fields[i] == 'GT':
+ sampdat[i] = vals
+ continue
+ # genotype filters are a special case
+ elif samp_fmt._fields[i] == 'FT':
+ sampdat[i] = self._parse_filter(vals)
+ continue
+ elif not vals or vals == ".":
sampdat[i] = None
continue
@@ -377,24 +491,24 @@ def _parse_samples(self, samples, samp_fmt, site):
entry_type = samp_fmt._types[i]
# we don't need to split single entries
- if entry_num == 1 or ',' not in vals:
-
+ if entry_num == 1:
if entry_type == 'Integer':
- sampdat[i] = int(vals)
- elif entry_type == 'Float':
+ try:
+ sampdat[i] = int(vals)
+ except ValueError:
+ sampdat[i] = float(vals)
+ elif entry_type == 'Float' or entry_type == 'Numeric':
sampdat[i] = float(vals)
else:
sampdat[i] = vals
-
- if entry_num != 1:
- sampdat[i] = (sampdat[i])
-
continue
vals = vals.split(',')
-
if entry_type == 'Integer':
- sampdat[i] = _map(int, vals)
+ try:
+ sampdat[i] = _map(int, vals)
+ except ValueError:
+ sampdat[i] = _map(float, vals)
elif entry_type == 'Float' or entry_type == 'Numeric':
sampdat[i] = _map(float, vals)
else:
@@ -407,9 +521,9 @@ def _parse_samples(self, samples, samp_fmt, site):
return samp_data
def _parse_alt(self, str):
- if re.search('[\[\]]', str) is not None:
+ if self._alt_pattern.search(str) is not None:
# Paired breakend
- items = re.split('[\[\]]', str)
+ items = self._alt_pattern.split(str)
remoteCoords = items[1].split(':')
chr = remoteCoords[0]
if chr[0] == '<':
@@ -436,8 +550,8 @@ def _parse_alt(self, str):
def next(self):
'''Return the next record in the file.'''
- line = self.reader.next()
- row = re.split('\t| +', line)
+ line = next(self.reader)
+ row = self._row_pattern.split(line.rstrip())
chrom = row[0]
if self._prepend_chr:
chrom = 'chr' + chrom
@@ -459,17 +573,16 @@ def next(self):
except ValueError:
qual = None
- filt = row[6]
- if filt == 'PASS' or filt == '.':
- filt = []
- else:
- filt = filt.split(';')
+ filt = self._parse_filter(row[6])
info = self._parse_info(row[7])
try:
fmt = row[8]
except IndexError:
fmt = None
+ else:
+ if fmt == '.':
+ fmt = None
record = _Record(chrom, pos, ID, ref, alt, qual, filt,
info, fmt, self._sample_indexes)
@@ -480,50 +593,69 @@ def next(self):
return record
- def fetch(self, chrom, start, end=None):
- """ fetch records from a Tabix indexed VCF, requires pysam
- if start and end are specified, return iterator over positions
- if end not specified, return individual ``_Call`` at start or None
+ def fetch(self, chrom, start=None, end=None):
+ """ Fetches records from a tabix-indexed VCF file and returns an
+ iterable of ``_Record`` instances
+
+ chrom must be specified.
+
+ The start and end coordinates are in the zero-based,
+ half-open coordinate system, similar to ``_Record.start`` and
+ ``_Record.end``. The very first base of a chromosome is
+ index 0, and the the region includes bases up to, but not
+ including the base at the end coordinate. For example
+ ``fetch('4', 10, 20)`` would include all variants
+ overlapping a 10 base pair region from the 11th base of
+ through the 20th base (which is at index 19) of chromosome
+ 4. It would not include the 21st base (at index 20). See
+ http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms
+ for more information on the zero-based, half-open coordinate
+ system.
+
+ If end is omitted, all variants from start until the end of
+ the chromosome chrom will be included.
+
+ If start and end are omitted, all variants on chrom will be
+ returned.
+
+ requires pysam
+
"""
if not pysam:
raise Exception('pysam not available, try "pip install pysam"?')
-
if not self.filename:
raise Exception('Please provide a filename (or a "normal" fsock)')
if not self._tabix:
- self._tabix = pysam.Tabixfile(self.filename)
+ self._tabix = pysam.Tabixfile(self.filename,
+ encoding=self.encoding)
if self._prepend_chr and chrom[:3] == 'chr':
chrom = chrom[3:]
- # not sure why tabix needs position -1
- start = start - 1
-
- if end is None:
- self.reader = self._tabix.fetch(chrom, start, start + 1)
- try:
- return self.next()
- except StopIteration:
- return None
-
self.reader = self._tabix.fetch(chrom, start, end)
return self
class Writer(object):
- """ VCF Writer """
-
- fixed_fields = "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT".split()
+ """VCF Writer. On Windows Python 2, open stream with 'wb'."""
# Reverse keys and values in header field count dictionary
counts = dict((v,k) for k,v in field_counts.iteritems())
- def __init__(self, stream, template, lineterminator="\r\n"):
- self.writer = csv.writer(stream, delimiter="\t", lineterminator=lineterminator)
+ def __init__(self, stream, template, lineterminator="\n"):
+ self.writer = csv.writer(stream, delimiter="\t",
+ lineterminator=lineterminator,
+ quotechar='', quoting=csv.QUOTE_NONE)
self.template = template
self.stream = stream
+ # Order keys for INFO fields defined in the header (undefined fields
+ # get a maximum key).
+ self.info_order = collections.defaultdict(
+ lambda: len(template.infos),
+ dict(zip(template.infos.iterkeys(), itertools.count())))
+
two = '##{key}=\n'
four = '##{key}=\n'
_num = self._fix_field_count
@@ -531,7 +663,12 @@ def __init__(self, stream, template, lineterminator="\r\n"):
if key in SINGULAR_METADATA:
vals = [vals]
for val in vals:
- stream.write('##{0}={1}\n'.format(key, val))
+ if isinstance(val, dict):
+ values = ','.join('{0}={1}'.format(key, value)
+ for key, value in val.items())
+ stream.write('##{0}=<{1}>\n'.format(key, values))
+ else:
+ stream.write('##{0}={1}\n'.format(key, val))
for line in template.infos.itervalues():
stream.write(four.format(key="INFO", *line, num=_num(line.num)))
for line in template.formats.itervalues():
@@ -540,18 +677,26 @@ def __init__(self, stream, template, lineterminator="\r\n"):
stream.write(two.format(key="FILTER", *line))
for line in template.alts.itervalues():
stream.write(two.format(key="ALT", *line))
+ for line in template.contigs.itervalues():
+ if line.length:
+ stream.write('##contig=\n'.format(*line))
+ else:
+ stream.write('##contig=\n'.format(*line))
self._write_header()
def _write_header(self):
# TODO: write INFO, etc
- self.writer.writerow(self.fixed_fields + self.template.samples)
+ self.stream.write('#' + '\t'.join(self.template._column_headers
+ + self.template.samples) + '\n')
def write_record(self, record):
""" write a record to the file """
ffs = self._map(str, [record.CHROM, record.POS, record.ID, record.REF]) \
+ [self._format_alt(record.ALT), record.QUAL or '.', self._format_filter(record.FILTER),
- self._format_info(record.INFO), record.FORMAT]
+ self._format_info(record.INFO)]
+ if record.FORMAT:
+ ffs.append(record.FORMAT)
samples = [self._format_sample(record.FORMAT, sample)
for sample in record.samples]
@@ -589,12 +734,29 @@ def _format_filter(self, flt):
def _format_info(self, info):
if not info:
return '.'
- return ';'.join([self._stringify_pair(x,y) for x, y in info.iteritems()])
+ def order_key(field):
+ # Order by header definition first, alphabetically second.
+ return self.info_order[field], field
+ return ';'.join(self._stringify_pair(f, info[f]) for f in
+ sorted(info, key=order_key))
def _format_sample(self, fmt, sample):
- if getattr(sample.data, 'GT', None) is None:
- return "./."
- return ':'.join([self._stringify(x) for x in sample.data])
+ if hasattr(sample.data, 'GT'):
+ gt = sample.data.GT
+ else:
+ gt = './.' if 'GT' in fmt else ''
+
+ result = [gt] if gt else []
+ # Following the VCF spec, GT is always the first item whenever it is present.
+ for field in sample.data._fields:
+ value = getattr(sample.data,field)
+ if field == 'GT':
+ continue
+ if field == 'FT':
+ result.append(self._format_filter(value))
+ else:
+ result.append(self._stringify(value))
+ return ':'.join(result)
def _stringify(self, x, none='.', delim=','):
if type(x) == type([]):
diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py
new file mode 100644
index 0000000..b156b45
--- /dev/null
+++ b/vcf/sample_filter.py
@@ -0,0 +1,115 @@
+# Author: Lenna X. Peterson
+# github.com/lennax
+# arklenna at gmail dot com
+
+import logging
+import sys
+import warnings
+
+
+from parser import Reader, Writer
+
+
+class SampleFilter(object):
+ """
+ Modifies the vcf Reader to filter each row by sample as it is parsed.
+
+ """
+
+ def __init__(self, infile, outfile=None, filters=None, invert=False):
+ # Methods to add to Reader
+ def get_filter(self):
+ return self._samp_filter
+
+ def set_filter(self, filt):
+ self._samp_filter = filt
+ if filt:
+ self.samples = [val for idx, val in enumerate(self.samples)
+ if idx not in set(filt)]
+
+ def filter_samples(fn):
+ """Decorator function to filter sample parameter"""
+ def filt(self, samples, *args):
+ samples = [val for idx, val in enumerate(samples)
+ if idx not in set(self.sample_filter)]
+ return fn(self, samples, *args)
+ return filt
+
+ # Add property to Reader for filter list
+ Reader.sample_filter = property(get_filter, set_filter)
+ Reader._samp_filter = []
+ # Modify Reader._parse_samples to filter samples
+ self._orig_parse_samples = Reader._parse_samples
+ Reader._parse_samples = filter_samples(Reader._parse_samples)
+ self.parser = Reader(filename=infile)
+ # Store initial samples and indices
+ self.samples = self.parser.samples
+ self.smp_idx = dict([(v, k) for k, v in enumerate(self.samples)])
+ # Properties for filter/writer
+ self.outfile = outfile
+ self.invert = invert
+ self.filters = filters
+ if filters is not None:
+ self.set_filters()
+ self.write()
+
+ def __del__(self):
+ try:
+ self._undo_monkey_patch()
+ except AttributeError:
+ pass
+
+ def set_filters(self, filters=None, invert=False):
+ """Convert filters from string to list of indices, set on Reader"""
+ if filters is not None:
+ self.filters = filters
+ if invert:
+ self.invert = invert
+ filt_l = self.filters.split(",")
+ filt_s = set(filt_l)
+ if len(filt_s) < len(filt_l):
+ warnings.warn("Non-unique filters, ignoring", RuntimeWarning)
+
+ def filt2idx(item):
+ """Convert filter to valid sample index"""
+ try:
+ item = int(item)
+ except ValueError:
+ # not an idx, check if it's a value
+ return self.smp_idx.get(item)
+ else:
+ # is int, check if it's an idx
+ if item < len(self.samples):
+ return item
+ filters = set(filter(lambda x: x is not None, map(filt2idx, filt_s)))
+ if len(filters) < len(filt_s):
+ # TODO print the filters that were ignored
+ warnings.warn("Invalid filters, ignoring", RuntimeWarning)
+
+ if self.invert:
+ filters = set(xrange(len(self.samples))).difference(filters)
+
+ # `sample_filter` setter updates `samples`
+ self.parser.sample_filter = filters
+ if len(self.parser.samples) == 0:
+ warnings.warn("Number of samples to keep is zero", RuntimeWarning)
+ logging.info("Keeping these samples: {0}\n".format(self.parser.samples))
+ return self.parser.samples
+
+ def write(self, outfile=None):
+ if outfile is not None:
+ self.outfile = outfile
+ if self.outfile is None:
+ _out = sys.stdout
+ elif hasattr(self.outfile, 'write'):
+ _out = self.outfile
+ else:
+ _out = open(self.outfile, "wb")
+ logging.info("Writing to '{0}'\n".format(self.outfile))
+ writer = Writer(_out, self.parser)
+ for row in self.parser:
+ writer.write_record(row)
+
+ def _undo_monkey_patch(self):
+ Reader._parse_samples = self._orig_parse_samples
+ delattr(Reader, 'sample_filter')
diff --git a/vcf/test/1kg.sites.vcf b/vcf/test/1kg.sites.vcf
new file mode 100644
index 0000000..857a944
--- /dev/null
+++ b/vcf/test/1kg.sites.vcf
@@ -0,0 +1,200 @@
+##fileformat=VCFv4.1
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##ALT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##reference=GRCh37
+#CHROM POS ID REF ALT QUAL FILTER INFO
+1 10583 rs58108140 G A 100 PASS AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21
+1 10611 rs189107123 C G 100 PASS AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02
+1 13302 rs180734498 C T 100 PASS THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14
+1 13327 rs144762171 G C 100 PASS AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04
+1 13957 rs201747181 TC T 28 PASS AA=TC;AC=35;AF=0.02;AFR_AF=0.02;AMR_AF=0.02;AN=2184;ASN_AF=0.01;AVGPOST=0.8711;ERATE=0.0065;EUR_AF=0.02;LDAF=0.0788;RSQ=0.2501;THETA=0.0100;VT=INDEL
+1 13980 rs151276478 T C 100 PASS AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02
+1 30923 rs140337953 G T 100 PASS AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73
+1 46402 rs199681827 C CTGT 31 PASS AA=.;AC=8;AF=0.0037;AFR_AF=0.01;AN=2184;ASN_AF=0.0017;AVGPOST=0.8325;ERATE=0.0072;LDAF=0.0903;RSQ=0.0960;THETA=0.0121;VT=INDEL
+1 47190 rs200430748 G GA 192 PASS AA=G;AC=29;AF=0.01;AFR_AF=0.06;AMR_AF=0.0028;AN=2184;AVGPOST=0.9041;ERATE=0.0041;LDAF=0.0628;RSQ=0.2883;THETA=0.0153;VT=INDEL
+1 51476 rs187298206 T C 100 PASS ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01
+1 51479 rs116400033 T A 100 PASS RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22
+1 51914 rs190452223 T G 100 PASS ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017
+1 51935 rs181754315 C T 100 PASS THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0
+1 51954 rs185832753 G C 100 PASS LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01
+1 52058 rs62637813 G C 100 PASS AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05
+1 52144 rs190291950 T A 100 PASS THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01
+1 52185 rs201374420 TTAA T 244 PASS AA=.;AC=10;AF=0.0046;AFR_AF=0.0020;AMR_AF=0.02;AN=2184;ASN_AF=0.0035;AVGPOST=0.9840;ERATE=0.0037;LDAF=0.0124;RSQ=0.4271;THETA=0.0232;VT=INDEL
+1 52238 rs150021059 T G 100 PASS THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95
+1 53234 rs199502715 CAT C 227 PASS AA=CAT;AC=10;AF=0.0046;AFR_AF=0.02;AMR_AF=0.0028;AN=2184;AVGPOST=0.9936;ERATE=0.0007;LDAF=0.0074;RSQ=0.6237;THETA=0.0119;VT=INDEL
+1 54353 rs140052487 C A 100 PASS THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013
+1 54421 rs146477069 A G 100 PASS ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02
+1 54490 rs141149254 G A 100 PASS ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15
+1 54676 rs2462492 C T 100 PASS LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18
+1 54753 rs143174675 T G 100 PASS AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03
+1 55164 rs3091274 C A 100 PASS AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96
+1 55249 rs200769871 C CTATGG 443 PASS AA=C;AC=151;AF=0.07;AFR_AF=0.03;AMR_AF=0.08;AN=2184;ASN_AF=0.16;AVGPOST=0.9073;ERATE=0.0063;EUR_AF=0.02;LDAF=0.0968;RSQ=0.5891;THETA=0.0038;VT=INDEL
+1 55299 rs10399749 C T 100 PASS RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13
+1 55313 rs182462964 A T 100 PASS ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020
+1 55326 rs3107975 T C 100 PASS AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01
+1 55330 rs185215913 G A 100 PASS ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020
+1 55367 rs190850374 G A 100 PASS ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01
+1 55388 rs182711216 C T 100 PASS THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017
+1 55394 rs2949420 T A 100 PASS AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02
+1 55416 rs193242050 G A 100 PASS AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02
+1 55427 rs183189405 T C 100 PASS THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020
+1 55816 rs187434873 G A 100 PASS AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01
+1 55850 rs191890754 C G 100 PASS AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01
+1 55852 rs184233019 G C 100 PASS THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013
+1 56644 rs143342222 A C 100 PASS AN=2184;AVGPOST=0.9962;LDAF=0.0040;ERATE=0.0024;VT=SNP;AA=A;RSQ=0.5700;AC=5;SNPSOURCE=LOWCOV;THETA=0.0117;AF=0.0023;AFR_AF=0.01
+1 57952 rs189727433 A C 100 PASS AA=C;ERATE=0.0085;AN=2184;LDAF=0.7878;VT=SNP;THETA=0.0076;RSQ=0.4712;AC=1902;SNPSOURCE=LOWCOV;AVGPOST=0.7578;AF=0.87;ASN_AF=0.98;AMR_AF=0.91;AFR_AF=0.64;EUR_AF=0.91
+1 58814 rs114420996 G A 100 PASS AC=223;THETA=0.0032;AA=G;AN=2184;RSQ=0.9087;LDAF=0.1074;VT=SNP;SNPSOURCE=LOWCOV;ERATE=0.0006;AVGPOST=0.9777;AF=0.10;ASN_AF=0.03;AMR_AF=0.17;AFR_AF=0.20;EUR_AF=0.06
+1 59040 rs149755937 T C 100 PASS AVGPOST=0.9710;AC=115;AA=T;AN=2184;RSQ=0.8248;VT=SNP;ERATE=0.0017;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.0613;AF=0.05;ASN_AF=0.03;AMR_AF=0.15;AFR_AF=0.0041;EUR_AF=0.06
+1 60726 rs192328835 C A 100 PASS AVGPOST=0.9092;AN=2184;RSQ=0.5988;ERATE=0.0081;AC=144;VT=SNP;THETA=0.0045;AA=A;SNPSOURCE=LOWCOV;LDAF=0.0959;AF=0.07;ASN_AF=0.05;AMR_AF=0.10;AFR_AF=0.11;EUR_AF=0.03
+1 61442 rs74970982 A G 100 PASS LDAF=0.9152;AA=G;AN=2184;VT=SNP;ERATE=0.0026;RSQ=0.4867;AVGPOST=0.9004;SNPSOURCE=LOWCOV;THETA=0.0013;AC=2084;AF=0.95;ASN_AF=1.00;AMR_AF=0.97;AFR_AF=0.84;EUR_AF=0.99
+1 61462 rs56992750 T A 100 PASS THETA=0.0023;LDAF=0.0378;RSQ=0.7396;AA=T;AN=2184;AVGPOST=0.9773;VT=SNP;AC=68;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.03;AMR_AF=0.02;AFR_AF=0.13
+1 61743 rs184286948 G C 100 PASS AVGPOST=0.9939;LDAF=0.0047;AA=G;AN=2184;VT=SNP;ERATE=0.0011;SNPSOURCE=LOWCOV;AC=4;THETA=0.0016;RSQ=0.4838;AF=0.0018;AMR_AF=0.01;EUR_AF=0.0026
+1 61987 rs76735897 A G 100 PASS THETA=0.0015;AN=2184;AC=569;VT=SNP;AA=A;RSQ=0.7192;AVGPOST=0.8533;LDAF=0.2944;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.26;ASN_AF=0.07;AMR_AF=0.31;AFR_AF=0.25;EUR_AF=0.39
+1 61989 rs77573425 G C 100 PASS RSQ=0.7254;AVGPOST=0.8584;AA=G;AN=2184;LDAF=0.2849;VT=SNP;AC=555;THETA=0.0019;SNPSOURCE=LOWCOV;ERATE=0.0007;AF=0.25;ASN_AF=0.07;AMR_AF=0.31;AFR_AF=0.22;EUR_AF=0.39
+1 61993 rs190553843 C T 100 PASS AC=7;RSQ=0.6106;AA=C;THETA=0.0143;AN=2184;ERATE=0.0009;VT=SNP;AVGPOST=0.9953;SNPSOURCE=LOWCOV;LDAF=0.0050;AF=0.0032;AFR_AF=0.01
+1 62156 rs181864839 C T 100 PASS ERATE=0.0005;AA=C;AN=2184;AVGPOST=0.9979;LDAF=0.0015;VT=SNP;THETA=0.0094;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4561;AF=0.0005;AFR_AF=0.0020
+1 62157 rs10399597 G A 100 PASS AVGPOST=0.9945;AA=G;AN=2184;ERATE=0.0025;VT=SNP;RSQ=0.5217;AC=5;THETA=0.0066;SNPSOURCE=LOWCOV;LDAF=0.0050;AF=0.0023;AFR_AF=0.01
+1 62162 rs140556834 G A 100 PASS AA=G;AN=2184;AC=8;LDAF=0.0057;VT=SNP;THETA=0.0018;ERATE=0.0017;RSQ=0.6089;AVGPOST=0.9948;SNPSOURCE=LOWCOV;AF=0.0037;AMR_AF=0.0028;AFR_AF=0.01;EUR_AF=0.0013
+1 63276 rs185977555 G A 100 PASS RSQ=0.2744;AA=G;AN=2184;AVGPOST=0.9947;VT=SNP;ERATE=0.0010;SNPSOURCE=LOWCOV;AC=1;THETA=0.0010;LDAF=0.0031;AF=0.0005;AFR_AF=0.0020
+1 63297 rs188886746 G A 100 PASS ERATE=0.0005;AVGPOST=0.9986;AA=G;AN=2184;VT=SNP;AC=0;SNPSOURCE=LOWCOV;RSQ=0.2459;THETA=0.0024;LDAF=0.0008;AF=0
+1 63671 rs116440577 G A 100 PASS AA=G;AN=2184;ERATE=0.0047;LDAF=0.1773;VT=SNP;THETA=0.0072;AC=369;SNPSOURCE=LOWCOV;RSQ=0.8980;AVGPOST=0.9652;AF=0.17;ASN_AF=0.05;AMR_AF=0.22;AFR_AF=0.35;EUR_AF=0.11
+1 63735 rs201888535 CCTA C 455 PASS AA=CCTA;AC=829;AF=0.38;AFR_AF=0.13;AMR_AF=0.33;AN=2184;ASN_AF=0.69;AVGPOST=0.7654;ERATE=0.0047;EUR_AF=0.34;LDAF=0.4128;RSQ=0.6424;THETA=0.0062;VT=INDEL
+1 64649 rs181431124 A C 100 PASS RSQ=0.6975;AN=2184;VT=SNP;AA=.;ERATE=0.0008;AVGPOST=0.9918;SNPSOURCE=LOWCOV;AC=21;THETA=0.0024;LDAF=0.0114;AF=0.01;AMR_AF=0.01;EUR_AF=0.03
+1 66162 rs62639105 A T 100 PASS THETA=0.0026;ERATE=0.0166;LDAF=0.3089;AN=2184;VT=SNP;AA=.;AC=544;SNPSOURCE=LOWCOV;RSQ=0.5681;AVGPOST=0.7777;AF=0.25;ASN_AF=0.07;AMR_AF=0.30;AFR_AF=0.23;EUR_AF=0.38
+1 66176 rs28552463 T A 100 PASS AN=2184;RSQ=0.4451;VT=SNP;AA=.;THETA=0.0095;LDAF=0.0631;AC=70;SNPSOURCE=LOWCOV;ERATE=0.0061;AVGPOST=0.9210;AF=0.03;ASN_AF=0.0017;AMR_AF=0.01;AFR_AF=0.13;EUR_AF=0.0013
+1 66219 rs181028663 A T 100 PASS LDAF=0.1137;ERATE=0.0074;AN=2184;VT=SNP;AA=.;AC=68;THETA=0.0059;RSQ=0.2946;AVGPOST=0.8268;SNPSOURCE=LOWCOV;AF=0.03;ASN_AF=0.08;AMR_AF=0.04;AFR_AF=0.01;EUR_AF=0.01
+1 66331 rs186063952 A C 100 PASS THETA=0.0126;AVGPOST=0.7656;RSQ=0.1616;AN=2184;LDAF=0.1387;ERATE=0.0093;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=42;AF=0.02;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.07
+1 66442 rs192044252 T A 100 PASS RSQ=0.1763;AVGPOST=0.7894;AN=2184;THETA=0.0031;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=36;ERATE=0.0107;LDAF=0.1241;AF=0.02;ASN_AF=0.0035;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.01
+1 66457 rs13328655 T A 100 PASS ERATE=0.0085;AN=2184;VT=SNP;AA=.;AC=31;AVGPOST=0.8340;LDAF=0.0957;RSQ=0.1836;SNPSOURCE=LOWCOV;THETA=0.0024;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.03;EUR_AF=0.01
+1 66507 rs12401368 T A 100 PASS ERATE=0.0197;AN=2184;VT=SNP;AA=.;THETA=0.0122;SNPSOURCE=LOWCOV;AC=170;RSQ=0.2110;LDAF=0.2457;AVGPOST=0.6536;AF=0.08;ASN_AF=0.07;AMR_AF=0.09;AFR_AF=0.05;EUR_AF=0.09
+1 67179 rs149952626 C G 100 PASS AVGPOST=0.9946;AN=2184;VT=SNP;AA=.;THETA=0.0046;SNPSOURCE=LOWCOV;ERATE=0.0012;AC=11;RSQ=0.6333;LDAF=0.0069;AF=0.01;ASN_AF=0.02
+1 67181 rs77662731 A G 100 PASS AVGPOST=0.9817;THETA=0.0096;ERATE=0.0013;AN=2184;RSQ=0.8542;AC=104;LDAF=0.0529;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AF=0.05;AMR_AF=0.02;AFR_AF=0.20
+1 69511 rs75062661 A G 100 PASS LDAF=0.6051;AC=1424;ERATE=0.0237;AN=2184;RSQ=0.5669;VT=SNP;AA=.;AVGPOST=0.7173;SNPSOURCE=LOWCOV;THETA=0.0052;AF=0.65;ASN_AF=0.87;AMR_AF=0.65;AFR_AF=0.33;EUR_AF=0.70
+1 69534 rs190717287 T C 100 PASS AVGPOST=0.9986;LDAF=0.0013;AN=2184;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4002;THETA=0.0016;ERATE=0.0006;AF=0.0005;ASN_AF=0.0017
+1 69536 rs200013390 C T 100 PASS AA=.;AC=0;AF=0;AN=2184;AVGPOST=0.9986;ERATE=0.0006;LDAF=0.0008;RSQ=0.0677;SNPSOURCE=EXOME;THETA=0.0087;VT=SNP
+1 72119 rs199639004 G GTA 158 PASS AA=.;AC=8;AF=0.0037;AMR_AF=0.0028;AN=2184;ASN_AF=0.01;AVGPOST=0.9589;ERATE=0.0026;EUR_AF=0.0013;LDAF=0.0243;RSQ=0.2268;THETA=0.0016;VT=INDEL
+1 72148 rs182862337 C T 100 PASS AN=2184;RSQ=0.2794;THETA=0.0130;VT=SNP;AA=.;LDAF=0.0019;AVGPOST=0.9971;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0007;AF=0.0005;AMR_AF=0.0028
+1 72297 rs200651397 G GTAT 160 PASS AA=G;AC=19;AF=0.01;AMR_AF=0.02;AN=2184;ASN_AF=0.01;AVGPOST=0.9383;ERATE=0.0055;EUR_AF=0.01;LDAF=0.0399;RSQ=0.3194;THETA=0.0064;VT=INDEL
+1 73841 rs143773730 C T 100 PASS ERATE=0.0303;THETA=0.0044;AN=2184;AVGPOST=0.8178;RSQ=0.5832;VT=SNP;AA=.;SNPSOURCE=LOWCOV;LDAF=0.2588;AC=425;AF=0.19;ASN_AF=0.15;AMR_AF=0.22;AFR_AF=0.17;EUR_AF=0.23
+1 77462 rs188023513 G A 100 PASS LDAF=0.1685;AN=2184;AVGPOST=0.8149;VT=SNP;AA=.;RSQ=0.4624;AC=198;THETA=0.0100;SNPSOURCE=LOWCOV;ERATE=0.0222;AF=0.09;ASN_AF=0.11;AMR_AF=0.12;AFR_AF=0.08;EUR_AF=0.07
+1 77470 rs192898053 T C 100 PASS LDAF=0.0047;AN=2184;VT=SNP;AA=.;ERATE=0.0011;AVGPOST=0.9918;THETA=0.0025;RSQ=0.1818;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;AFR_AF=0.0020
+1 77874 rs184538873 G A 100 PASS THETA=0.0068;LDAF=0.0516;AN=2184;VT=SNP;AA=.;AVGPOST=0.9594;ERATE=0.0011;AC=87;SNPSOURCE=LOWCOV;RSQ=0.6970;AF=0.04;ASN_AF=0.01;AMR_AF=0.12;AFR_AF=0.0041;EUR_AF=0.04
+1 77961 rs78385339 G A 100 PASS AVGPOST=0.9114;AN=2184;VT=SNP;AA=.;THETA=0.0072;RSQ=0.6667;ERATE=0.0011;SNPSOURCE=LOWCOV;AC=192;LDAF=0.1180;AF=0.09;ASN_AF=0.20;AMR_AF=0.14;AFR_AF=0.01;EUR_AF=0.03
+1 79033 rs62641298 A G 100 PASS AVGPOST=0.7371;THETA=0.0022;LDAF=0.7962;AN=2184;ERATE=0.0054;VT=SNP;AA=.;AC=1961;SNPSOURCE=LOWCOV;RSQ=0.3963;AF=0.90;ASN_AF=0.98;AMR_AF=0.95;AFR_AF=0.65;EUR_AF=0.97
+1 79050 rs62641299 G T 100 PASS AC=1871;AN=2184;THETA=0.0031;RSQ=0.3928;VT=SNP;AA=.;AVGPOST=0.6803;SNPSOURCE=LOWCOV;LDAF=0.7318;ERATE=0.0107;AF=0.86;ASN_AF=0.98;AMR_AF=0.93;AFR_AF=0.54;EUR_AF=0.94
+1 79137 rs143777184 A T 100 PASS AN=2184;AC=55;ERATE=0.0009;AVGPOST=0.9773;LDAF=0.0324;VT=SNP;AA=.;THETA=0.0091;SNPSOURCE=LOWCOV;RSQ=0.7309;AF=0.03;AMR_AF=0.01;AFR_AF=0.10
+1 79417 rs184768190 C T 100 PASS ERATE=0.0005;THETA=0.0166;AN=2184;RSQ=0.5026;AVGPOST=0.9975;VT=SNP;AA=.;LDAF=0.0022;SNPSOURCE=LOWCOV;AC=2;AF=0.0009;ASN_AF=0.0035
+1 79772 rs147215883 C G 100 PASS LDAF=0.1066;AN=2184;THETA=0.0138;RSQ=0.7199;VT=SNP;AA=.;AVGPOST=0.9271;AC=176;ERATE=0.0011;SNPSOURCE=LOWCOV;AF=0.08;ASN_AF=0.07;AMR_AF=0.06;AFR_AF=0.10;EUR_AF=0.09
+1 79872 rs189224661 T G 100 PASS THETA=0.0054;AN=2184;LDAF=0.0057;VT=SNP;AA=.;ERATE=0.0017;AC=9;AVGPOST=0.9956;SNPSOURCE=LOWCOV;RSQ=0.6548;AF=0.0041;AFR_AF=0.02
+1 80454 rs144226842 G C 100 PASS RSQ=0.6549;LDAF=0.0035;AN=2184;AVGPOST=0.9975;VT=SNP;AA=.;AC=5;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0015;AF=0.0023;ASN_AF=0.01
+1 81949 rs181567186 T C 100 PASS AN=2184;ERATE=0.0009;VT=SNP;AA=.;AVGPOST=0.9948;LDAF=0.0030;SNPSOURCE=LOWCOV;AC=1;THETA=0.0052;RSQ=0.2129;AF=0.0005;ASN_AF=0.0017
+1 82163 rs139113303 G A 100 PASS AN=2184;LDAF=0.0375;ERATE=0.0009;VT=SNP;AA=.;RSQ=0.7842;AC=66;THETA=0.0053;SNPSOURCE=LOWCOV;AVGPOST=0.9761;AF=0.03;ASN_AF=0.0017;AMR_AF=0.01;AFR_AF=0.0020;EUR_AF=0.08
+1 82249 rs1851945 A G 100 PASS THETA=0.0137;LDAF=0.0712;AVGPOST=0.9150;AN=2184;VT=SNP;AA=.;RSQ=0.4689;AC=75;ERATE=0.0116;SNPSOURCE=LOWCOV;AF=0.03;ASN_AF=0.03;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.04
+1 82609 rs149189449 C G 100 PASS ERATE=0.0005;AN=2184;LDAF=0.0364;VT=SNP;AA=.;AC=68;AVGPOST=0.9822;RSQ=0.8408;SNPSOURCE=LOWCOV;THETA=0.0024;AF=0.03;AMR_AF=0.02;AFR_AF=0.0020;EUR_AF=0.08
+1 82676 rs185237834 T G 100 PASS LDAF=0.1144;AN=2184;AVGPOST=0.9264;VT=SNP;AA=.;RSQ=0.7176;AC=198;THETA=0.0025;SNPSOURCE=LOWCOV;ERATE=0.0056;AF=0.09;ASN_AF=0.07;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.10
+1 82734 rs4030331 T C 100 PASS AN=2184;THETA=0.0008;VT=SNP;AA=.;ERATE=0.0158;RSQ=0.6316;AVGPOST=0.8280;LDAF=0.2433;SNPSOURCE=LOWCOV;AC=435;AF=0.20;ASN_AF=0.15;AMR_AF=0.28;AFR_AF=0.24;EUR_AF=0.17
+1 82957 rs189774606 C T 100 PASS RSQ=0.5163;AN=2184;VT=SNP;AA=.;LDAF=0.0072;THETA=0.0028;AC=9;AVGPOST=0.9918;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.0041;AMR_AF=0.01;AFR_AF=0.01
+1 83084 rs181193408 T A 100 PASS AN=2184;AVGPOST=0.8261;VT=SNP;AA=.;RSQ=0.5750;AC=1914;LDAF=0.8278;SNPSOURCE=LOWCOV;ERATE=0.0061;THETA=0.0064;AF=0.88;ASN_AF=0.99;AMR_AF=0.92;AFR_AF=0.58;EUR_AF=0.96
+1 83088 rs186081601 G C 100 PASS ERATE=0.0013;AN=2184;LDAF=0.0043;VT=SNP;AA=.;AVGPOST=0.9922;RSQ=0.1618;THETA=0.0019;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;AFR_AF=0.0020
+1 83771 rs189906733 T G 100 PASS RSQ=0.6473;AN=2184;AVGPOST=0.9871;VT=SNP;AA=.;AC=24;ERATE=0.0011;SNPSOURCE=LOWCOV;THETA=0.0043;LDAF=0.0158;AF=0.01;AMR_AF=0.01;AFR_AF=0.04;EUR_AF=0.0013
+1 83977 rs180759811 A G 100 PASS AN=2184;ERATE=0.0009;VT=SNP;AA=.;THETA=0.0059;LDAF=0.0038;RSQ=0.2074;SNPSOURCE=LOWCOV;AC=1;AVGPOST=0.9932;AF=0.0005;AFR_AF=0.0020
+1 84002 rs28850140 G A 100 PASS THETA=0.0050;ERATE=0.0211;AN=2184;AC=236;VT=SNP;AA=.;AVGPOST=0.8144;LDAF=0.1921;SNPSOURCE=LOWCOV;RSQ=0.4810;AF=0.11;ASN_AF=0.12;AMR_AF=0.15;AFR_AF=0.07;EUR_AF=0.11
+1 84005 rs202079949 AG A 78 PASS AA=.;AC=52;AF=0.02;AFR_AF=0.02;AMR_AF=0.03;AN=2184;ASN_AF=0.01;AVGPOST=0.9360;ERATE=0.0049;EUR_AF=0.04;LDAF=0.0514;RSQ=0.4690;THETA=0.0005;VT=INDEL
+1 84010 rs186443818 G A 100 PASS AVGPOST=0.9169;AN=2184;VT=SNP;AA=.;AC=97;THETA=0.0087;LDAF=0.0789;SNPSOURCE=LOWCOV;ERATE=0.0061;RSQ=0.5318;AF=0.04;ASN_AF=0.03;AMR_AF=0.05;AFR_AF=0.03;EUR_AF=0.06
+1 84079 rs190867312 T C 100 PASS ERATE=0.0021;AN=2184;AC=6;VT=SNP;AA=.;LDAF=0.0049;AVGPOST=0.9956;RSQ=0.5906;SNPSOURCE=LOWCOV;THETA=0.0016;AF=0.0027;AMR_AF=0.0028;AFR_AF=0.01
+1 84139 rs183605470 A T 100 PASS THETA=0.0023;AC=28;AN=2184;RSQ=0.6469;VT=SNP;AA=.;LDAF=0.0180;SNPSOURCE=LOWCOV;AVGPOST=0.9835;ERATE=0.0006;AF=0.01;ASN_AF=0.0017;AMR_AF=0.07;AFR_AF=0.0041
+1 84156 rs188652299 A C 100 PASS THETA=0.0009;AVGPOST=0.9936;ERATE=0.0014;RSQ=0.3359;AN=2184;VT=SNP;AA=.;LDAF=0.0044;SNPSOURCE=LOWCOV;AC=3;AF=0.0014;AMR_AF=0.0028;AFR_AF=0.0041
+1 84244 rs191297051 A C 100 PASS LDAF=0.1204;AN=2184;VT=SNP;AA=.;AVGPOST=0.9398;RSQ=0.7828;THETA=0.0025;ERATE=0.0018;SNPSOURCE=LOWCOV;AC=222;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.14;EUR_AF=0.11
+1 84295 rs183209871 G A 100 PASS LDAF=0.0067;AVGPOST=0.9946;AN=2184;THETA=0.0038;VT=SNP;AA=.;AC=9;SNPSOURCE=LOWCOV;ERATE=0.0007;RSQ=0.6599;AF=0.0041;AMR_AF=0.01;EUR_AF=0.01
+1 84346 rs187855973 T C 100 PASS THETA=0.0044;AN=2184;AVGPOST=0.9981;VT=SNP;AA=.;LDAF=0.0014;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0007;RSQ=0.3659;AF=0.0005;EUR_AF=0.0013
+1 84453 rs191379015 C G 100 PASS LDAF=0.0021;RSQ=0.2866;AN=2184;VT=SNP;AA=.;THETA=0.0018;ERATE=0.0008;SNPSOURCE=LOWCOV;AC=1;AVGPOST=0.9968;AF=0.0005;AMR_AF=0.0028
+1 84705 rs183470350 T G 100 PASS LDAF=0.0033;AVGPOST=0.9943;AN=2184;VT=SNP;AA=.;THETA=0.0030;RSQ=0.2658;SNPSOURCE=LOWCOV;AC=2;ERATE=0.0007;AF=0.0009;AMR_AF=0.0028;EUR_AF=0.0013
+1 85063 rs187802690 T C 100 PASS THETA=0.0093;AN=2184;VT=SNP;AA=.;ERATE=0.0051;LDAF=0.0255;RSQ=0.6868;AVGPOST=0.9806;AC=38;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02
+1 85597 rs192472955 A C 100 PASS AC=145;AVGPOST=0.9322;AN=2184;LDAF=0.0880;VT=SNP;AA=.;RSQ=0.6993;SNPSOURCE=LOWCOV;THETA=0.0020;ERATE=0.0022;AF=0.07;AMR_AF=0.07;AFR_AF=0.11;EUR_AF=0.09
+1 85622 rs185273034 A T 100 PASS ERATE=0.0005;AVGPOST=0.9963;AN=2184;RSQ=0.5194;VT=SNP;AA=.;THETA=0.0174;LDAF=0.0034;SNPSOURCE=LOWCOV;AC=4;AF=0.0018;AFR_AF=0.01
+1 85892 rs147185795 A G 100 PASS AVGPOST=0.9936;RSQ=0.7759;AN=2184;VT=SNP;AA=.;LDAF=0.0122;SNPSOURCE=LOWCOV;AC=21;THETA=0.0116;ERATE=0.0007;AF=0.01;AMR_AF=0.0028;AFR_AF=0.04
+1 86000 rs140628094 A C 100 PASS AN=2184;LDAF=0.0062;VT=SNP;AA=.;AC=10;THETA=0.0018;ERATE=0.0008;RSQ=0.7700;SNPSOURCE=LOWCOV;AVGPOST=0.9968;AF=0.0046;AFR_AF=0.02
+1 86018 rs142878000 C G 100 PASS ERATE=0.0036;RSQ=0.7867;AVGPOST=0.9429;AN=2184;AC=213;LDAF=0.1166;VT=SNP;AA=.;THETA=0.0030;SNPSOURCE=LOWCOV;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.11
+1 86028 rs114608975 T C 100 PASS ERATE=0.0005;AC=73;AN=2184;RSQ=0.8713;VT=SNP;AA=.;THETA=0.0108;SNPSOURCE=LOWCOV;AVGPOST=0.9841;LDAF=0.0388;AF=0.03;AMR_AF=0.02;AFR_AF=0.0041;EUR_AF=0.08
+1 86064 rs190167736 G A 100 PASS ERATE=0.0004;AN=2184;VT=SNP;AA=.;THETA=0.0081;SNPSOURCE=LOWCOV;AC=1;RSQ=0.5628;AVGPOST=0.9992;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020
+1 86065 rs116504101 G C 100 PASS ERATE=0.0005;LDAF=0.0398;AN=2184;AVGPOST=0.9846;VT=SNP;AA=.;AC=76;THETA=0.0057;RSQ=0.8725;SNPSOURCE=LOWCOV;AF=0.03;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.09
+1 86282 rs192830046 T G 100 PASS LDAF=0.0036;AN=2184;VT=SNP;AA=.;SNPSOURCE=LOWCOV;ERATE=0.0012;RSQ=0.2764;AVGPOST=0.9941;AC=2;THETA=0.0034;AF=0.0009;AMR_AF=0.0028;EUR_AF=0.0013
+1 86303 rs2949417 G T 100 PASS THETA=0.0021;RSQ=0.8008;LDAF=0.1194;AN=2184;AC=214;VT=SNP;AA=.;AVGPOST=0.9465;SNPSOURCE=LOWCOV;ERATE=0.0007;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.11
+1 86331 rs115209712 A G 100 PASS THETA=0.0047;AN=2184;VT=SNP;AA=.;AC=216;LDAF=0.1195;RSQ=0.8119;ERATE=0.0008;AVGPOST=0.9495;SNPSOURCE=LOWCOV;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.11
+1 86982 rs184970101 G A 100 PASS THETA=0.0050;AN=2184;AVGPOST=0.9979;LDAF=0.0015;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0006;RSQ=0.3541;AF=0.0005;AFR_AF=0.0020
+1 87021 rs188486692 T C 100 PASS AN=2184;RSQ=0.4348;VT=SNP;AA=.;THETA=0.0112;AVGPOST=0.9687;ERATE=0.0011;SNPSOURCE=LOWCOV;AC=19;LDAF=0.0221;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02
+1 87114 rs200095900 CT C 192 PASS AA=.;AC=8;AF=0.0037;AFR_AF=0.02;AN=2184;AVGPOST=0.9976;ERATE=0.0010;LDAF=0.0042;RSQ=0.7479;THETA=0.0149;VT=INDEL
+1 87190 rs1524602 G A 100 PASS AN=2184;LDAF=0.2822;VT=SNP;AA=.;RSQ=0.7549;THETA=0.0148;AC=540;SNPSOURCE=LOWCOV;ERATE=0.0096;AVGPOST=0.8739;AF=0.25;ASN_AF=0.29;AMR_AF=0.35;AFR_AF=0.38;EUR_AF=0.08
+1 87360 rs180907504 C T 100 PASS THETA=0.0014;AN=2184;ERATE=0.0025;RSQ=0.3869;VT=SNP;AA=.;AC=14;LDAF=0.0170;SNPSOURCE=LOWCOV;AVGPOST=0.9768;AF=0.01;ASN_AF=0.02;AMR_AF=0.0028;AFR_AF=0.01
+1 87409 rs139490478 C T 100 PASS AN=2184;AC=80;RSQ=0.8364;AVGPOST=0.9797;THETA=0.0075;VT=SNP;AA=.;ERATE=0.0011;SNPSOURCE=LOWCOV;LDAF=0.0438;AF=0.04;ASN_AF=0.0017;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.09
+1 87590 rs185279164 G A 100 PASS THETA=0.0068;ERATE=0.0005;AN=2184;VT=SNP;AA=.;LDAF=0.0026;RSQ=0.6866;SNPSOURCE=LOWCOV;AC=4;AVGPOST=0.9982;AF=0.0018;AFR_AF=0.01
+1 87647 rs146836579 T C 100 PASS AN=2184;AC=111;THETA=0.0041;VT=SNP;AA=.;LDAF=0.0558;AVGPOST=0.9811;SNPSOURCE=LOWCOV;ERATE=0.0015;RSQ=0.8636;AF=0.05;AMR_AF=0.03;AFR_AF=0.20
+1 87755 rs140735660 G A 100 PASS ERATE=0.0027;RSQ=0.5060;AN=2184;AC=16;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AVGPOST=0.9847;LDAF=0.0138;THETA=0.0069;AF=0.01;AFR_AF=0.03
+1 87970 rs189643077 T C 100 PASS ERATE=0.0005;AN=2184;RSQ=0.5846;VT=SNP;AA=.;AVGPOST=0.9976;THETA=0.0053;LDAF=0.0025;SNPSOURCE=LOWCOV;AC=3;AF=0.0014;AFR_AF=0.01
+1 87978 rs182297743 G A 100 PASS AVGPOST=0.9963;THETA=0.0074;AN=2184;VT=SNP;AA=.;LDAF=0.0023;RSQ=0.2883;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0006;AF=0.0005;AMR_AF=0.0028
+1 88136 rs59529791 G A 100 PASS RSQ=0.8406;AN=2184;VT=SNP;AA=.;ERATE=0.0010;THETA=0.0059;AVGPOST=0.9778;SNPSOURCE=LOWCOV;AC=106;LDAF=0.0548;AF=0.05;AMR_AF=0.03;AFR_AF=0.20
+1 88169 rs940550 C T 100 PASS RSQ=0.7811;AN=2184;VT=SNP;AA=.;THETA=0.0055;LDAF=0.2576;ERATE=0.0018;SNPSOURCE=LOWCOV;AC=506;AVGPOST=0.8932;AF=0.23;ASN_AF=0.29;AMR_AF=0.33;AFR_AF=0.33;EUR_AF=0.08
+1 88172 rs940551 G A 100 PASS RSQ=0.7703;AVGPOST=0.9669;LDAF=0.0483;AN=2184;ERATE=0.0009;VT=SNP;AA=.;THETA=0.0027;SNPSOURCE=LOWCOV;AC=86;AF=0.04;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.09
+1 88177 rs143215837 G C 100 PASS ERATE=0.0004;AN=2184;LDAF=0.0456;VT=SNP;AA=.;AVGPOST=0.9686;AC=82;THETA=0.0089;SNPSOURCE=LOWCOV;RSQ=0.7787;AF=0.04;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.09
+1 88188 rs148331237 C A 100 PASS THETA=0.0039;AN=2184;VT=SNP;AA=.;AC=9;LDAF=0.0085;SNPSOURCE=LOWCOV;ERATE=0.0007;RSQ=0.5212;AVGPOST=0.9910;AF=0.0041;AMR_AF=0.0028;EUR_AF=0.01
+1 88236 rs186918018 C T 100 PASS AVGPOST=0.9904;AN=2184;ERATE=0.0031;RSQ=0.5511;VT=SNP;AA=.;THETA=0.0087;LDAF=0.0097;SNPSOURCE=LOWCOV;AC=11;AF=0.01;AMR_AF=0.02;AFR_AF=0.0041;EUR_AF=0.0026
+1 88250 rs191950833 T A 100 PASS LDAF=0.0013;AN=2184;RSQ=0.1387;VT=SNP;AA=.;AC=0;THETA=0.0019;SNPSOURCE=LOWCOV;ERATE=0.0007;AVGPOST=0.9974;AF=0
+1 88316 rs113759966 G A 100 PASS LDAF=0.0531;AN=2184;VT=SNP;AA=.;THETA=0.0071;RSQ=0.7791;AVGPOST=0.9644;ERATE=0.0008;AC=87;SNPSOURCE=LOWCOV;AF=0.04;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.01;EUR_AF=0.09
+1 88324 rs183326616 A G 100 PASS ERATE=0.0004;AN=2184;AVGPOST=0.9996;VT=SNP;AA=.;RSQ=0.7073;LDAF=0.0006;SNPSOURCE=LOWCOV;AC=1;THETA=0.0092;AF=0.0005;AFR_AF=0.0020
+1 88338 rs55700207 G A 100 PASS THETA=0.0035;RSQ=0.7967;AN=2184;ERATE=0.0034;LDAF=0.1019;VT=SNP;AA=.;AC=186;SNPSOURCE=LOWCOV;AVGPOST=0.9507;AF=0.09;ASN_AF=0.03;AMR_AF=0.15;AFR_AF=0.14;EUR_AF=0.05
+1 88370 rs185487977 G A 100 PASS AVGPOST=0.9957;LDAF=0.0035;AN=2184;ERATE=0.0009;RSQ=0.4507;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=4;THETA=0.0043;AF=0.0018;AMR_AF=0.0028;EUR_AF=0.0040
+1 88376 rs189954431 T G 100 PASS RSQ=0.6404;AVGPOST=0.9994;AN=2184;VT=SNP;AA=.;THETA=0.0057;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0003;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020
+1 88388 rs182344336 C T 100 PASS THETA=0.0048;ERATE=0.0005;RSQ=0.3843;AVGPOST=0.9977;LDAF=0.0016;AN=2184;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017
+1 88429 rs146027550 T C 100 PASS LDAF=0.0083;AN=2184;AC=13;RSQ=0.6097;VT=SNP;AA=.;ERATE=0.0010;AVGPOST=0.9922;SNPSOURCE=LOWCOV;THETA=0.0069;AF=0.01;AMR_AF=0.01;AFR_AF=0.02;EUR_AF=0.0013
+1 88710 rs186575039 C G 100 PASS ERATE=0.0005;AC=73;THETA=0.0022;AN=2184;VT=SNP;AA=.;AVGPOST=0.9774;LDAF=0.0389;SNPSOURCE=LOWCOV;RSQ=0.8058;AF=0.03;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.08
+1 89165 rs192631277 A C 100 PASS RSQ=0.2647;AN=2184;ERATE=0.0009;VT=SNP;AA=.;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;THETA=0.0043;AF=0.0005;ASN_AF=0.0017
+1 89744 rs184101761 A G 100 PASS THETA=0.0068;ERATE=0.0004;AVGPOST=0.9985;LDAF=0.0016;AN=2184;RSQ=0.5853;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=2;AF=0.0009;AFR_AF=0.0041
+1 89794 rs188661839 T C 100 PASS AN=2184;THETA=0.0130;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0006;RSQ=0.3812;AVGPOST=0.9984;LDAF=0.0012;AF=0.0005;AFR_AF=0.0020
+1 89946 rs138808727 A T 100 PASS RSQ=0.7414;LDAF=0.1417;AN=2184;ERATE=0.0009;AC=236;VT=SNP;AA=.;THETA=0.0100;AVGPOST=0.9001;SNPSOURCE=LOWCOV;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.02;EUR_AF=0.22
+1 91190 rs143856811 G A 100 PASS AN=2184;AC=77;ERATE=0.0009;VT=SNP;AA=.;LDAF=0.0447;SNPSOURCE=LOWCOV;RSQ=0.7517;THETA=0.0113;AVGPOST=0.9690;AF=0.04;ASN_AF=0.0017;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.08
+1 91228 rs139873689 A G 100 PASS AN=2184;AVGPOST=0.9924;AC=8;RSQ=0.5097;VT=SNP;AA=.;ERATE=0.0010;THETA=0.0012;SNPSOURCE=LOWCOV;LDAF=0.0070;AF=0.0037;AFR_AF=0.02
+1 91536 rs77418980 G T 100 PASS AC=695;AN=2184;ERATE=0.0025;AVGPOST=0.7792;VT=SNP;AA=.;THETA=0.0018;LDAF=0.3255;RSQ=0.6634;SNPSOURCE=LOWCOV;AF=0.32;ASN_AF=0.34;AMR_AF=0.30;AFR_AF=0.04;EUR_AF=0.50
+1 91581 rs151118460 G A 100 PASS AVGPOST=0.7763;THETA=0.0078;AN=2184;VT=SNP;AA=.;AC=716;ERATE=0.0035;LDAF=0.3353;RSQ=0.6618;SNPSOURCE=LOWCOV;AF=0.33;ASN_AF=0.37;AMR_AF=0.30;AFR_AF=0.04;EUR_AF=0.50
+1 91605 rs141083882 C T 100 PASS AC=105;LDAF=0.0597;AN=2184;RSQ=0.7792;VT=SNP;AA=.;ERATE=0.0010;AVGPOST=0.9660;SNPSOURCE=LOWCOV;THETA=0.0070;AF=0.05;AMR_AF=0.02;AFR_AF=0.20
+1 92633 rs149776517 C T 100 PASS THETA=0.0054;AN=2184;VT=SNP;AA=.;AC=44;AVGPOST=0.9592;ERATE=0.0008;LDAF=0.0366;SNPSOURCE=LOWCOV;RSQ=0.5870;AF=0.02;AMR_AF=0.02;EUR_AF=0.05
+1 92858 rs147061536 G T 100 PASS AC=248;THETA=0.0212;RSQ=0.7567;AN=2184;ERATE=0.0046;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AVGPOST=0.9072;LDAF=0.1433;AF=0.11;ASN_AF=0.01;AMR_AF=0.15;AFR_AF=0.05;EUR_AF=0.22
+1 92875 rs193157612 T C 100 PASS THETA=0.0048;AVGPOST=0.9957;AN=2184;LDAF=0.0040;ERATE=0.0009;VT=SNP;AA=.;RSQ=0.4901;SNPSOURCE=LOWCOV;AC=4;AF=0.0018;EUR_AF=0.01
+1 94421 rs200856736 TC T 90 PASS AA=TC;AC=253;AF=0.12;AFR_AF=0.01;AMR_AF=0.20;AN=2184;ASN_AF=0.26;AVGPOST=0.7183;ERATE=0.0117;EUR_AF=0.03;LDAF=0.2244;RSQ=0.3175;THETA=0.0159;VT=INDEL
+1 94986 rs185004859 C T 100 PASS ERATE=0.0166;AN=2184;AC=100;THETA=0.0227;VT=SNP;AA=.;LDAF=0.0872;AVGPOST=0.9040;SNPSOURCE=LOWCOV;RSQ=0.4650;AF=0.05;ASN_AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.04
+1 94991 rs188832636 G A 100 PASS THETA=0.0048;LDAF=0.0041;AN=2184;AVGPOST=0.9944;ERATE=0.0009;VT=SNP;AA=.;SNPSOURCE=LOWCOV;RSQ=0.4157;AC=3;AF=0.0014;AMR_AF=0.0028;EUR_AF=0.0026
+1 98583 rs141344361 T A 100 PASS AVGPOST=0.9463;AC=248;THETA=0.0099;AN=2184;VT=SNP;AA=.;RSQ=0.8090;LDAF=0.1336;ERATE=0.0008;SNPSOURCE=LOWCOV;AF=0.11;ASN_AF=0.30;AMR_AF=0.16;AFR_AF=0.01;EUR_AF=0.02
+1 98929 rs12184306 A G 100 PASS RSQ=0.6226;AVGPOST=0.8784;AN=2184;VT=SNP;AA=.;ERATE=0.0045;LDAF=0.1723;SNPSOURCE=LOWCOV;AC=264;THETA=0.0070;AF=0.12;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.16;EUR_AF=0.09
+1 98946 rs191775802 C G 100 PASS AVGPOST=0.9945;ERATE=0.0013;AN=2184;LDAF=0.0046;VT=SNP;AA=.;RSQ=0.4807;SNPSOURCE=LOWCOV;AC=4;THETA=0.0097;AF=0.0018;AFR_AF=0.01
+1 98974 rs12184307 A G 100 PASS AVGPOST=0.8921;AN=2184;AC=224;THETA=0.0130;VT=SNP;AA=.;LDAF=0.1405;RSQ=0.6149;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.10;ASN_AF=0.14;AMR_AF=0.07;AFR_AF=0.14;EUR_AF=0.06
+1 99671 rs146209971 A T 100 PASS THETA=0.0199;AN=2184;AC=13;RSQ=0.4401;VT=SNP;AA=.;ERATE=0.0010;AVGPOST=0.9802;SNPSOURCE=LOWCOV;LDAF=0.0158;AF=0.01;AMR_AF=0.02;AFR_AF=0.0020;EUR_AF=0.01
+1 99687 rs139153227 C T 100 PASS THETA=0.0211;LDAF=0.0470;AN=2184;VT=SNP;AA=.;ERATE=0.0010;RSQ=0.6276;AVGPOST=0.9548;AC=64;SNPSOURCE=LOWCOV;AF=0.03;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.07
+1 99719 rs183898652 C T 100 PASS AN=2184;RSQ=0.5856;VT=SNP;AA=.;AC=10;ERATE=0.0018;SNPSOURCE=LOWCOV;LDAF=0.0076;AVGPOST=0.9925;THETA=0.0251;AF=0.0046;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.0013
+1 100676 rs188226172 A T 100 PASS THETA=0.0212;LDAF=0.0047;AN=2184;VT=SNP;AA=.;RSQ=0.2646;SNPSOURCE=LOWCOV;ERATE=0.0012;AC=2;AVGPOST=0.9925;AF=0.0009;AMR_AF=0.0028;AFR_AF=0.0020
+1 103905 rs142403309 A G 100 PASS AN=2184;THETA=0.0131;ERATE=0.0025;AVGPOST=0.8782;AC=220;VT=SNP;AA=.;LDAF=0.1434;SNPSOURCE=LOWCOV;RSQ=0.5994;AF=0.10;ASN_AF=0.10;AMR_AF=0.14;AFR_AF=0.15;EUR_AF=0.05
+1 106544 rs180741296 C G 100 PASS AC=205;AVGPOST=0.5776;AN=2184;VT=SNP;AA=.;LDAF=0.3120;SNPSOURCE=LOWCOV;ERATE=0.0061;THETA=0.0372;RSQ=0.1442;AF=0.09;ASN_AF=0.11;AMR_AF=0.13;AFR_AF=0.11;EUR_AF=0.05
+1 109107 rs201432136 G GT 67 PASS AA=G;AC=63;AF=0.03;AFR_AF=0.01;AMR_AF=0.04;AN=2184;ASN_AF=0.03;AVGPOST=0.8840;ERATE=0.0122;EUR_AF=0.04;LDAF=0.0890;RSQ=0.3660;THETA=0.0210;VT=INDEL
+1 111513 rs199911222 C CTA 249 PASS AA=.;AC=58;AF=0.03;AFR_AF=0.09;AMR_AF=0.03;AN=2184;ASN_AF=0.0017;AVGPOST=0.9145;ERATE=0.0024;EUR_AF=0.0013;LDAF=0.0665;RSQ=0.4694;THETA=0.0292;VT=INDEL
diff --git a/vcf/test/FT.vcf b/vcf/test/FT.vcf
new file mode 100644
index 0000000..e42436a
--- /dev/null
+++ b/vcf/test/FT.vcf
@@ -0,0 +1,50 @@
+##fileformat=VCFv4.2
+##ALT=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##GATKCommandLine.VariantFiltration=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##contig=
+##reference=file://../ref.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5
+ref 63393 . C A 29454.60 . AC=5;AF=1.00;AN=5;DP=719;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=29.67;SOR=0.965 GT:AD:DP:GQ:PL 1:0,166:166:99:6740,0 1:0,142:142:99:5824,0 1:0,134:134:99:5616,0 1:0,122:122:99:4930,0 1:0,155:155:99:6371,0
+ref 65903 . AATTGCGCTG A 7340.57 PASS AC=1;AF=0.200;AN=5;DP=524;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;QD=34.04;SOR=1.091 GT:AD:DP:FT:GQ:PL 1:0,164:164:PASS:99:7383,0 0:95,0:95:DP125;DP130:99:0,1800 0:88,0:88:DP125;DP130:99:0,1800 0:87,0:87:DP125;DP130:99:0,1800 0:89,0:89:DP125;DP130:99:0,1800
+ref 70837 . C A 4711.61 Q4800;Q5000 AC=1;AF=0.200;AN=5;DP=512;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;QD=27.64;SOR=0.726 GT:AD:DP:FT:GQ:PL 0:121,0:121:DP125;DP130:99:0,1800 0:95,0:95:DP125;DP130:99:0,1800 1:0,120:120:DP125;DP130:99:4745,0 0:87,0:87:DP125;DP130:99:0,1800 0:89,0:89:DP125;DP130:99:0,1800
+ref 71448 . C T 31134.60 PASS AC=5;AF=1.00;AN=5;BaseQRankSum=2.22;ClippingRankSum=0.00;DP=768;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;MQRankSum=0.00;QD=29.43;ReadPosRankSum=2.03;SOR=0.295 GT:AD:DP:FT:GQ:PL 1:0,147:147:PASS:99:5996,0 1:1,183:184:PASS:99:7501,0 1:0,113:113:DP125;DP130:99:4559,0 1:0,161:161:PASS:99:6436,0 1:0,163:163:PASS:99:6669,0
+ref 104257 . C T 5521.61 PASS AC=1;AF=0.200;AN=5;DP=506;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;QD=29.45;SOR=0.854 GT:AD:DP:FT:GQ:PL 0:101,0:101:DP125;DP130:99:0,1800 0:109,0:109:DP125;DP130:99:0,1800 1:0,132:132:PASS:99:5555,0 0:67,0:67:DP125;DP130:99:0,1800 0:97,0:97:DP125;DP130:99:0,1800
+ref 140658 . C A 32467.60 PASS AC=5;AF=1.00;AN=5;BaseQRankSum=2.24;ClippingRankSum=0.00;DP=801;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;MQRankSum=0.00;QD=29.65;ReadPosRankSum=1.27;SOR=0.346 GT:AD:DP:GQ:PL 1:0,170:170:99:6854,0 1:0,198:198:99:8098,0 1:0,136:136:99:5554,0 1:0,141:141:99:5661,0 1:1,155:156:99:6327,0
+ref 147463 . C A 4885.61 Q5000 AC=1;AF=0.200;AN=5;BaseQRankSum=-7.720e-01;ClippingRankSum=0.00;DP=503;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;MQRankSum=0.00;QD=35.03;ReadPosRankSum=-6.950e-01;SOR=0.278 GT:AD:DP:FT:GQ:PL 0:97,0:97:DP125;DP130:99:0,1800 0:104,0:104:DP125;DP130:99:0,1800 0:84,0:84:DP125;DP130:99:0,1800 1:1,128:129:DP130:99:4919,0 0:89,0:89:DP125;DP130:99:0,1800
+ref 154578 . A G 32015.60 PASS AC=5;AF=1.00;AN=5;DP=776;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=25.82;SOR=0.902 GT:AD:DP:GQ:PL 1:0,152:152:99:6300,0 1:0,183:183:99:7608,0 1:0,137:137:99:5713,0 1:0,148:148:99:6040,0 1:0,156:156:99:6381,0
+ref 203200 . C T 30880.60 PASS AC=5;AF=1.00;AN=5;DP=752;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=29.65;SOR=0.878 GT:AD:DP:FT:GQ:PL 1:0,161:161:PASS:99:6708,0 1:0,185:185:PASS:99:7602,0 1:0,136:136:PASS:99:5602,0 1:0,126:126:DP130:99:5080,0 1:0,144:144:PASS:99:5915,0
+ref 231665 . C T 30074.60 PASS AC=5;AF=1.00;AN=5;DP=735;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=33.23;SOR=0.938 GT:AD:DP:FT:GQ:PL 1:0,191:191:PASS:99:7867,0 1:0,159:159:PASS:99:6431,0 1:0,130:130:PASS:99:5299,0 1:0,129:129:DP130:99:5290,0 1:0,126:126:DP130:99:5214,0
diff --git a/vcf/test/bad-info-character.vcf b/vcf/test/bad-info-character.vcf
new file mode 100644
index 0000000..099470c
--- /dev/null
+++ b/vcf/test/bad-info-character.vcf
@@ -0,0 +1,14 @@
+##fileformat=VCFv4.1
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample
+chr1 100 id1 G A . . FLAG;EMPTY_1=;EMPTY_3=;EMPTY_N=;DOT_1=.;DOT_3=.,.,.;DOT_N=.;NOTEMPTY_1=1;NOTEMPTY_3=1,2,3;NOTEMPTY_N=1 GT 0/1
diff --git a/vcf/test/contig_idonly.vcf b/vcf/test/contig_idonly.vcf
new file mode 100644
index 0000000..5e5a6ad
--- /dev/null
+++ b/vcf/test/contig_idonly.vcf
@@ -0,0 +1,5 @@
+##fileformat=VCFv4.2
+##contig=
+##contig=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO
diff --git a/vcf/test/example-4.0.vcf b/vcf/test/example-4.0.vcf
index 27803a1..97fb07e 100644
--- a/vcf/test/example-4.0.vcf
+++ b/vcf/test/example-4.0.vcf
@@ -20,4 +20,5 @@
20 17330 . T A 3.0 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3
20 1110696 rs6040355 A G,T 1e+03 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4
20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2
+20 1231234 . AT A 46 PASS NS=3;DP=15;AA=A GT:GQ:DP:HQ 1|1:23:7:26,30 0|0:27:9:56,60 0|0:31:10:65,71
20 1234567 microsat1 GTCT G,GTACT . PASS NS=3;DP=9;AA=G GT:GQ:DP ./.:35:4 0/2:17:2 1/1:40:3
diff --git a/vcf/test/example-4.1-info-multiple-values.vcf b/vcf/test/example-4.1-info-multiple-values.vcf
new file mode 100644
index 0000000..6faf95e
--- /dev/null
+++ b/vcf/test/example-4.1-info-multiple-values.vcf
@@ -0,0 +1,7 @@
+##fileformat=VCFv4.1
+##contig=
+##INFO=
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
+Pf3D7_01_v3 401 . C T 53.99 PASS RepeatCopies=19.3,47.4,14.0;RepeatSize=42,14,56;RepeatConsensus=TCTTATCTTCTTACTTTTCATTCCTTACTCTTACTTACTTAC,TTACTCTTACTTAC,TTACTCTTACTTACTTACTCTTACTTACTTACTCTTACTTACTTACTCTTATCTTC
diff --git a/vcf/test/example-4.1-ploidy.vcf b/vcf/test/example-4.1-ploidy.vcf
new file mode 100644
index 0000000..6704048
--- /dev/null
+++ b/vcf/test/example-4.1-ploidy.vcf
@@ -0,0 +1,21 @@
+##fileformat=VCFv4.1
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
+##contig=
+##phasing=partial
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FILTER=
+##FILTER=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
+X 60034 rs186434315 T A 100 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0:48:1:51,51 1|0:48:8:51,51 1/1/1:43:5:.,.
+X 60378 rs185512268 C A 100 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0:48:1:51,51 1:48:8:51,51 0:43:5:.,.
\ No newline at end of file
diff --git a/vcf/test/example-4.2.vcf b/vcf/test/example-4.2.vcf
new file mode 100644
index 0000000..d649fc3
--- /dev/null
+++ b/vcf/test/example-4.2.vcf
@@ -0,0 +1,56 @@
+##fileformat=VCFv4.2
+##FILTER=
+##samtoolsVersion=1.0-17-gfaf4dd6+htslib-1.0-11-g830ea73
+##samtoolsCommand=samtools mpileup -u -t DP,DPR,DV,DP4,INFO/DPR,SP -f /data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa -r KB704451:0004153102-0004172483 huge_list_of_bam_files_removed
+##reference=file:///data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa
+##contig=
+##ALT=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##bcftools_callVersion=1.0-55-gc661821+htslib-1.0-11-g830ea73
+##bcftools_callCommand=call -m -vM -f GQ,GP
+##SnpSiftVersion="SnpSift 3.6c (build 2014-05-20), by Pablo Cingolani"
+##SnpSiftCmd="SnpSift varType - "
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT LUPI059 MINE001 OKJ042 LUPI001 LUPI007 LUPI024 LUPI056 LUPI071 LUPI074 LUPI082 MINE040 MINE100 MINE101 MINE105 MINE111 OKJ017 OKJ045 OKJ070 SAGA066 SAGA107 SAGA131 SAGA133 SAGA134 SAGA141 2012L_LUPI_002 2012L_LUPI_015 2012L_LUPI_017 2012L_LUPI_018 2012L_LUPI_035 2012L_LUPI_062 2012L_LUPI_065 2012L_LUPI_077 2012L_LUPI_083 2012L_LUPI_116 2012L_LUPI_013 2012L_LUPI_041 2012L_LUPI_068 2012L_LUPI_096 2012L_LUPI_098 2012L_LUPI_101 2012L_LUPI_103 2012_LUPI_156 2012_LUPI_157 2012_LUPI_161 2012_LUPI_171 2012_LUPI_173 2012_LUPI_180 2012L_LUPI_010 2012L_LUPI_012 2012L_LUPI_021 2012L_LUPI_045 2012L_LUPI_047 2012L_LUPI_060 2012L_LUPI_061 2012L_LUPI_067 2012_LUPI_125 2012_LUPI_129 2012_LUPI_146 2012_LUPI_178 2012_LUPI_211 2012_LUPI_277 2012_LUPI_278 2012_LUPI_279 2012_LUPI_284
+KB704451 4157846 . N A,C 167.0 . DP=10;VDB=1.17174e-06;SGB=1.26353;MQ0F=0;DPR=0,6,4;AC=10,4;AN=14;DP4=0,0,10,0;MQ=60;SNP;VARTYPE=SNP,SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 1/2:74,23,14,57,0,54:4:4:0:0,0,4,0:0,3,1:144,56,16,90,0,57:16 1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0
+KB704451 4157870 . T C 275.0 . DP=243;VDB=0.00023935;SGB=29.4468;RPB=0.0368658;MQB=0.979612;MQSB=0.268441;BQB=0.99223;MQ0F=0;DPR=213,19;ICB=0.85092;HOB=0.0287274;AC=6;AN=118;DP4=201,12,19,1;MQ=53;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,66,255:22:0:0:20,2,0,0:22,0:0,75,279:75 0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,21,144:21 1/1:193,36,0:12:12:0:0,0,12,0:0,12:168,20,0:20 0/0:0,9,95:3:0:0:3,0,0,0:3,0:0,18,119:18 0/1:78,0,110:7:3:0:3,1,3,0:4,3:68,0,125:68 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,12,64:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,90:3:0:0:3,0,0,0:3,0:0,18,114:18 0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21 0/0:0,9,97:3:0:0:3,0,0,0:3,0:0,18,121:18 0/0:0,15,122:5:0:0:5,0,0,0:5,0:0,24,146:24 0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,15,95:15 0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,15,82:15 0/0:0,18,155:6:0:0:6,0,0,0:6,0:0,27,179:27 0/0:0,3,39:1:0:0:1,0,0,0:1,0:0,12,63:12 0/1:35,3,0:1:1:0:0,0,1,0:0,1:23,0,12:12 0/0:0,9,87:3:0:0:3,0,0,0:3,0:0,18,111:18 0/1:47,0,104:6:2:0:4,0,2,0:4,2:37,0,119:37 0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,30,184:30 0/0:0,6,35:2:0:0:2,0,0,0:2,0:0,15,59:15 0/0:0,12,98:4:0:0:4,0,0,0:4,0:0,21,122:21 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,15,94:15 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21 0/0:0,3,29:1:0:0:0,1,0,0:1,0:0,12,53:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,76:3:0:0:3,0,0,0:3,0:0,18,100:18 0/0:0,15,136:5:0:0:5,0,0,0:5,0:0,24,160:24 0/0:0,30,182:10:0:0:10,0,0,0:10,0:0,39,206:39 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,15,93:15 0/0:0,27,152:9:0:0:9,0,0,0:9,0:0,36,176:36 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,15,118:15 0/0:0,21,195:7:0:0:5,2,0,0:7,0:0,30,219:30 0/0:0,9,92:3:0:0:2,1,0,0:3,0:0,18,116:18 0/1:33,0,18:2:1:0:0,1,1,0:1,1:23,0,33:23 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,12,59:12 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,18,115:18 0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,30,212:10:0:0:9,1,0,0:10,0:0,39,236:39 0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,195:7:0:0:7,0,0,0:7,0:0,30,219:30 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,97:3:0:0:2,1,0,0:3,0:0,18,121:18 0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,18,117:18 0/0:0,9,116:3:0:0:3,0,0,0:3,0:0,18,140:18 0/0:0,6,71:2:0:0:1,1,0,0:2,0:0,15,95:15 0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18 0/0:0,33,175:11:0:0:11,0,0,0:11,0:0,42,199:42 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,15,87:15 0/0:0,21,145:7:0:0:7,0,0,0:7,0:0,30,169:30 0/0:0,3,39:1:0:0:0,1,0,0:1,0:0,12,63:12 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,18,108:18 0/0:0,3,13:1:0:0:1,0,0,0:1,0:0,12,37:12 0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,12,47:12 0/0:0,12,106:4:0:0:4,0,0,0:4,0:0,21,130:21 0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,18,118:18 0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,15,91:15 0/0:2,5,27:2:1:0:1,0,0,1:1,0:0,12,49:12
+KB704451 4157877 . G A 999.0 . DP=250;VDB=6.58963e-09;SGB=31.659;RPB=0.0227135;MQB=0.410318;MQSB=0.139343;BQB=0.0767891;MQ0F=0;DPR=188,48;ICB=0.990841;HOB=0.00761276;AC=17;AN=118;DP4=176,12,45,3;MQ=55;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/1:159,0,202:22:9:0:12,1,8,1:13,9:154,0,212:127 0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,16,134:16 0/0:0,51,207:17:0:0:17,0,0,0:17,0:0,55,221:55 0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13 0/1:123,0,61:8:5:0:3,0,4,1:3,5:118,0,71:71 0/0:0,3,38:1:0:0:0,1,0,0:1,0:0,7,52:7 0/1:68,0,29:3:2:0:1,0,1,1:1,2:63,0,39:39 0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,10,83:10 0/0:0,12,119:4:0:0:4,0,0,0:4,0:0,16,133:16 0/1:34,0,34:2:1:0:1,0,1,0:1,1:29,0,44:29 0/1:24,0,99:5:1:0:4,0,1,0:4,1:19,0,109:19 0/1:34,0,28:2:1:0:1,0,1,0:1,1:29,0,38:29 0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,10,72:10 0/1:122,0,57:7:4:0:3,0,4,0:3,4:117,0,67:67 0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,7,55:7 0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,7,43:7 0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16 0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,22,158:22 0/1:118,0,63:8:5:0:3,0,5,0:3,5:113,0,73:73 0/0:0,6,34:2:0:0:2,0,0,0:2,0:0,10,48:10 0/0:0,15,131:5:0:0:5,0,0,0:5,0:0,19,145:19 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,10,86:10 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,10,103:10 1/1:124,12,0:4:4:0:0,0,4,0:0,4:112,4,2:4 0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,7,48:7 0/0:0,6,73:2:0:0:2,0,0,0:2,0:0,10,87:10 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,13,105:13 0/0:0,15,138:5:0:0:5,0,0,0:5,0:0,19,152:19 0/0:0,30,179:10:0:0:10,0,0,0:10,0:0,34,193:34 0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,10,79:10 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,10,84:10 0/0:0,27,155:9:0:0:9,0,0,0:9,0:0,31,169:31 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,10,108:10 0/0:0,15,161:5:0:0:3,2,0,0:5,0:0,19,175:19 0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,10,79:10 0/1:36,3,0:1:1:0:0,0,1,0:0,1:29,0,7:7 0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,13,107:13 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,7,48:7 0/1:87,0,137:10:4:0:5,1,4,0:6,4:82,0,147:82 0/1:57,0,26:3:2:0:1,0,2,0:1,2:52,0,36:35 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:139,0,73:7:4:0:3,0,4,0:3,4:134,0,83:83 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,75:2:0:0:1,1,0,0:2,0:0,10,89:10 0/1:90,9,0:3:3:0:0,0,3,0:0,3:79,2,3:3 0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13 0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,9,88:3:0:0:3,0,0,0:3,0:0,13,102:13 0/0:0,33,173:11:0:0:11,0,0,0:11,0:0,37,187:37 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,10,71:10 0/0:0,15,125:5:0:0:5,0,0,0:5,0:0,19,139:19 0/0:0,6,61:2:0:0:1,1,0,0:2,0:0,10,75:10 0/1:24,0,51:3:1:0:2,0,1,0:2,1:19,0,61:19 0/0:0,3,30:1:0:0:1,0,0,0:1,0:0,7,44:7 0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,7,37:7 0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,7,49:7 0/1:25,0,61:3:1:0:2,0,1,0:2,1:20,0,71:20 0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,10,81:10 0/0:0,3,8:1:0:0:0,1,0,0:1,0:0,7,22:7
+KB704451 4157907 . A C 278.0 . DP=295;VDB=0.241276;SGB=26.7514;RPB=0.676983;MQB=0.997838;MQSB=0.136536;BQB=0.45683;MQ0F=0;DPR=264,15;ICB=0.00518819;HOB=0.00237812;AC=4;AN=116;DP4=233,31,14,1;MQ=59;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,90,255:30:0:0:25,5,0,0:30,0:0,101,283:101 0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41 0/1:157,0,188:18:8:0:10,0,7,1:10,8:145,0,205:127 0/1:75,0,90:5:2:0:2,1,2,0:3,2:63,0,107:63 0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41 0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,17,108:17 0/0:0,12,134:4:0:0:3,1,0,0:4,0:0,23,162:23 0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,32,188:32 0/0:0,12,135:4:0:0:2,2,0,0:4,0:0,23,163:23 0/0:0,15,148:5:0:0:5,0,0,0:5,0:0,26,176:26 0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20 0/1:70,0,19:4:3:0:1,0,3,0:1,3:58,0,36:36 0/0:0,24,246:8:0:0:7,1,0,0:8,0:0,35,274:35 0/0:0,18,147:6:0:0:6,0,0,0:6,0:0,29,175:29 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20 0/1:59,0,62:5:2:0:3,0,2,0:3,2:47,0,79:47 0/0:0,33,192:11:0:0:11,0,0,0:11,0:0,44,220:44 0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,20,122:20 0/0:0,24,198:8:0:0:7,1,0,0:8,0:0,35,226:35 0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,23,148:23 0/0:0,15,165:5:0:0:3,2,0,0:5,0:0,26,193:26 0/0:0,24,172:8:0:0:8,0,0,0:8,0:0,35,200:35 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,14,59:14 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,17,94:17 0/0:0,15,118:5:0:0:5,0,0,0:5,0:0,26,146:26 0/0:0,33,178:11:0:0:11,0,0,0:11,0:0,44,206:44 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,9,74:3:0:0:2,1,0,0:3,0:0,20,102:20 0/0:0,21,168:7:0:0:5,2,0,0:7,0:0,32,196:32 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,14,68:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,17,117:17 0/0:0,12,130:4:0:0:2,2,0,0:4,0:0,23,158:23 0/0:0,9,78:3:0:0:1,2,0,0:3,0:0,20,106:20 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,17,93:17 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,17,83:17 0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,14,57:14 0/0:0,36,194:12:0:0:11,1,0,0:12,0:0,47,222:47 0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,23,138:23 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,182:6:0:0:6,0,0,0:6,0:0,29,210:29 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,14,62:14 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,9,107:3:0:0:3,0,0,0:3,0:0,20,135:20 0/0:0,6,67:2:0:0:1,1,0,0:2,0:0,17,95:17 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,27,184:9:0:0:9,0,0,0:9,0:0,38,212:38 0/0:0,9,85:3:0:0:3,0,0,0:3,0:0,20,113:20 0/0:0,12,111:4:0:0:4,0,0,0:4,0:0,23,139:23 0/0:0,6,77:2:0:0:1,1,0,0:2,0:0,17,105:17 0/0:0,12,108:4:0:0:3,1,0,0:4,0:0,23,136:23 0/0:0,3,27:1:0:0:1,0,0,0:1,0:0,14,55:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,12,125:4:0:0:4,0,0,0:4,0:0,23,153:23 0/0:0,9,98:3:0:0:2,1,0,0:3,0:0,20,126:20 0/0:0,6,46:2:0:0:2,0,0,0:2,0:0,17,74:17
+KB704451 4157909 . T G 278.0 . DP=295;VDB=0.184881;SGB=22.7413;RPB=0.646301;MQB=0.998034;MQSB=0.200514;BQB=0.321842;MQ0F=0;DPR=247,15;ICB=0.00558284;HOB=0.00255102;AC=4;AN=112;DP4=218,29,15,1;MQ=59;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,87,255:29:0:0:24,5,0,0:29,0:0,97,282:97 0/0:0,27,183:9:0:0:9,0,0,0:9,0:0,37,210:37 0/1:156,0,167:19:8:0:11,0,7,1:11,8:145,0,183:127 0/1:75,0,107:5:2:0:2,1,2,0:3,2:64,0,123:64 0/0:0,27,191:9:0:0:8,1,0,0:9,0:0,37,218:37 0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,16,107:16 0/0:0,12,119:4:0:0:3,1,0,0:4,0:0,22,146:22 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,15,126:5:0:0:5,0,0,0:5,0:0,25,153:25 0/0:0,12,132:4:0:0:2,2,0,0:4,0:0,22,159:22 0/0:0,12,133:4:0:0:4,0,0,0:4,0:0,22,160:22 0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,16,94:16 0/1:79,9,0:3:3:0:0,0,3,0:0,3:60,0,8:8 0/0:0,21,230:7:0:0:6,1,0,0:7,0:0,31,257:31 0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,28,171:28 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,16,80:16 0/1:59,0,64:5:2:0:3,0,2,0:3,2:48,0,80:48 0/0:0,33,180:11:0:0:11,0,0,0:11,0:0,43,207:43 0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22 0/0:0,24,190:8:0:0:7,1,0,0:8,0:0,34,217:34 0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22 0/0:0,15,164:5:0:0:3,2,0,0:5,0:0,25,191:25 0/0:0,24,161:8:0:0:8,0,0,0:8,0:0,34,188:34 0/0:0,3,32:1:0:0:0,1,0,0:1,0:0,13,59:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,16,92:16 0/0:0,15,121:5:0:0:5,0,0,0:5,0:0,25,148:25 0/0:0,30,174:10:0:0:10,0,0,0:10,0:0,40,201:40 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,21,164:7:0:0:5,2,0,0:7,0:0,31,191:31 0/0:0,3,37:1:0:0:1,0,0,0:1,0:0,13,64:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,16,116:16 0/0:0,12,128:4:0:0:2,2,0,0:4,0:0,22,155:22 0/0:0,9,94:3:0:0:1,2,0,0:3,0:0,19,121:19 0/0:0,6,63:2:0:0:1,1,0,0:2,0:0,16,90:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,16,83:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,36,193:12:0:0:11,1,0,0:12,0:0,46,220:46 0/0:0,12,108:4:0:0:4,0,0,0:4,0:0,22,135:22 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,168:6:0:0:6,0,0,0:6,0:0,28,195:28 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,13,58:13 0/0:0,6,47:2:0:0:2,0,0,0:2,0:0,16,74:16 0/0:8,11,65:2:1:0:1,0,1,0:1,0:0,13,84:13 0/0:0,6,64:2:0:0:1,1,0,0:2,0:0,16,91:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,27,177:9:0:0:9,0,0,0:9,0:0,37,204:37 0/0:0,6,50:2:0:0:2,0,0,0:2,0:0,16,77:16 0/0:0,12,101:4:0:0:4,0,0,0:4,0:0,22,128:22 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,16,92:16 0/0:0,12,100:4:0:0:3,1,0,0:4,0:0,22,127:22 0/0:0,3,31:1:0:0:1,0,0,0:1,0:0,13,58:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,19,111:19 0/0:0,3,32:1:0:0:1,0,0,0:1,0:0,13,59:13 0/0:0,12,104:4:0:0:4,0,0,0:4,0:0,22,131:22 0/0:0,6,66:2:0:0:1,1,0,0:2,0:0,16,93:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0
+KB704451 4157927 . G A 4.88727 . DP=334;VDB=0.38;SGB=3.29913;RPB=0.454248;MQB=0.970588;MQSB=0.546099;BQB=0.215686;MQ0F=0;DPR=306,2;ICB=0.000310486;HOB=0.000153894;AC=1;AN=114;DP4=265,41,2,0;MQ=59;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,105,255:35:0:0:31,4,0,0:35,0:0,122,295:122 0/0:0,45,255:15:0:0:14,1,0,0:15,0:0,62,295:62 0/0:0,60,255:20:0:0:17,3,0,0:20,0:0,77,295:77 0/0:0,21,242:7:0:0:6,1,0,0:7,0:0,38,282:38 0/0:0,24,207:8:0:0:6,2,0,0:8,0:0,41,247:41 0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,23,112:23 0/0:0,24,215:8:0:0:6,2,0,0:8,0:0,41,255:41 0/0:0,6,70:2:0:0:1,1,0,0:2,0:0,23,111:23 0/0:0,30,191:10:0:0:10,0,0,0:10,0:0,47,231:47 0/0:0,15,163:5:0:0:3,2,0,0:5,0:0,32,203:32 0/0:0,15,151:5:0:0:5,0,0,0:5,0:0,32,191:32 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,23,111:23 0/0:0,12,102:4:0:0:4,0,0,0:4,0:0,29,142:29 0/0:0,24,255:8:0:0:6,2,0,0:8,0:0,41,295:41 0/0:0,21,189:7:0:0:7,0,0,0:7,0:0,38,229:38 0/0:0,3,35:1:0:0:0,1,0,0:1,0:0,20,76:20 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,20,81:20 0/0:0,12,126:4:0:0:3,1,0,0:4,0:0,29,166:29 0/0:0,39,255:13:0:0:12,1,0,0:13,0:0,56,295:56 0/0:0,21,206:7:0:0:6,1,0,0:7,0:0,38,246:38 0/0:0,30,238:10:0:0:8,2,0,0:10,0:0,47,278:47 0/0:0,18,145:6:0:0:6,0,0,0:6,0:0,35,185:35 0/0:0,24,244:8:0:0:6,2,0,0:8,0:0,41,284:41 0/0:0,24,195:8:0:0:7,1,0,0:8,0:0,41,235:41 0/0:0,3,27:1:0:0:0,1,0,0:1,0:0,20,68:20 0/0:0,6,62:2:0:0:2,0,0,0:2,0:0,23,103:23 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,23,105:23 0/0:0,15,123:5:0:0:5,0,0,0:5,0:0,32,163:32 0/0:0,33,184:11:0:0:11,0,0,0:11,0:0,50,224:50 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,18,165:6:0:0:4,2,0,0:6,0:0,35,205:35 0/0:0,3,38:1:0:0:1,0,0,0:1,0:0,20,79:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,117:3:0:0:3,0,0,0:3,0:0,26,157:26 0/0:0,12,121:4:0:0:2,2,0,0:4,0:0,29,161:29 0/0:0,9,95:3:0:0:1,2,0,0:3,0:0,26,135:26 0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,20,82:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23 0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 0/0:0,36,213:12:0:0:11,1,0,0:12,0:0,53,253:53 0/0:0,15,152:5:0:0:4,1,0,0:5,0:0,32,192:32 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,215:7:0:0:6,1,0,0:7,0:0,38,255:38 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,20,72:20 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23 0/0:0,9,101:3:0:0:3,0,0,0:3,0:0,26,141:26 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23 0/0:0,27,179:9:0:0:9,0,0,0:9,0:0,44,219:44 0/0:0,9,92:3:0:0:3,0,0,0:3,0:0,26,132:26 0/0:0,12,112:4:0:0:4,0,0,0:4,0:0,29,152:29 0/0:0,6,58:2:0:0:1,1,0,0:2,0:0,23,99:23 0/0:0,15,123:5:0:0:4,1,0,0:5,0:0,32,163:32 0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,78:3:0:0:3,0,0,0:3,0:0,26,118:26 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,20,75:20 0/1:47,0,51:3:2:0:1,0,2,0:1,2:29,0,74:29 0/0:0,9,80:3:0:0:2,1,0,0:3,0:0,26,120:26 0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,23,94:23
+KB704451 4157938 . ATTT ATTTT 650.0 . INDEL;IDV=18;IMF=0.428571;DP=361;VDB=0.773794;SGB=32.6744;MQSB=0.993251;MQ0F=0.00831025;DPR=115,60;ICB=0.929833;HOB=0.0258;AC=23;AN=100;DP4=98,17,48,12;MQ=59;INS;VARTYPE=INS GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/1:124,0,7:19:16:0:3,0,13,3:3,16:123,0,13:13 0/1:14,3,0:1:1:0:0,0,1,0:0,1:12,1,5:4 0/0:0,9,55:3:0:0:3,0,0,0:3,0:0,9,62:9 0/0:0,21,146:7:0:0:5,2,0,0:7,0:0,21,153:21 0/0:0,18,126:6:0:0:4,2,0,0:6,0:0,18,133:18 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7 0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9 1/1:46,9,0:3:3:0:0,0,1,2:0,3:40,4,1:4 0/0:0,27,155:9:0:0:8,1,0,0:9,0:0,27,162:27 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7 0/1:24,3,0:1:1:0:0,0,1,0:0,1:22,1,5:5 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 0/0:0,9,84:3:0:0:2,1,0,0:3,0:0,9,91:9 0/0:0,15,111:5:0:0:5,0,0,0:5,0:0,15,118:15 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:32,6,0:2:2:0:0,0,1,1:0,2:28,2,3:3 0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,7,62:7 0/1:63,0,61:7:4:0:3,0,3,1:3,4:62,0,67:61 0/0:0,24,154:8:0:0:7,1,0,0:8,0:0,24,161:24 0/1:16,0,94:7:2:0:5,0,2,0:5,2:15,0,100:15 0/1:61,0,101:9:4:0:4,1,3,1:5,4:60,0,107:60 0/0:0,3,31:1:0:0:1,0,0,0:1,0:1,5,39:5 0/1:13,3,0:1:1:0:0,0,0,1:0,1:11,1,5:4 0/1:16,0,47:3:1:0:2,0,1,0:2,1:15,0,53:15 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 0/1:46,0,57:5:2:0:3,0,2,0:3,2:45,0,63:45 0/1:50,0,12:6:5:0:1,0,5,0:1,5:49,0,18:18 0/0:0,3,30:1:0:0:1,0,0,0:1,0:1,5,38:5 0/0:0,3,4:1:0:0:1,0,0,0:1,0:1,5,12:4 0/1:18,0,98:5:1:0:2,2,1,0:4,1:17,0,104:17 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:15,0,26:2:1:0:1,0,1,0:1,1:14,0,32:14 0/1:20,0,26:2:1:0:1,0,0,1:1,1:19,0,32:19 0/0:0,6,60:2:0:0:1,1,0,0:2,0:0,7,67:7 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:33,3,0:1:1:0:0,0,1,0:0,1:31,1,5:5 0/1:82,6,0:7:6:0:1,0,5,1:1,6:78,2,3:3 0/0:0,15,126:5:0:0:4,1,0,0:5,0:0,15,133:15 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:34,6,0:2:2:0:0,0,2,0:0,2:30,2,3:3 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,50:2:0:0:1,1,0,0:2,0:0,7,57:7 0/0:0,3,28:1:0:0:1,0,0,0:1,0:1,5,36:5 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7 0/0:0,18,124:6:0:0:5,1,0,0:6,0:0,18,131:18 0/1:53,6,0:2:2:0:0,0,2,0:0,2:49,2,3:3 0/0:0,12,96:4:0:0:4,0,0,0:4,0:0,12,103:12 0/1:25,3,0:1:1:0:0,0,1,0:0,1:23,1,5:5 1/1:61,9,0:3:3:0:0,0,2,1:0,3:55,4,1:4 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:1,5,39:5 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7 0/0:0,3,29:1:0:0:1,0,0,0:1,0:1,5,37:5 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0
+KB704451 4157940 . TTGTGTGTGTGTGT TTGTGTGTGTGTGTGTGT,TTTCTGTGTGTGTGTGT 999.0 . INDEL;IDV=7;IMF=0.5;DP=366;VDB=0.0431342;SGB=14.7456;MQSB=0.996953;MQ0F=0.010929;DPR=86,41,8;ICB=0.963728;HOB=0.02;AC=21,6;AN=90;DP4=70,16,39,10;MQ=58;INS;VARTYPE=INS,INS GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/2:60,60,60,3,3,0:1:1:0:0,0,1,0:0,0,1:54,55,61,2,5,9:4 1/1:255,18,0,255,18,255:6:6:0:0,0,3,3:0,6,0:248,11,0,252,19,263:11 0/0:0,6,62,6,62,62:2:0:0:2,0,0,0:2,0,0:1,7,70,11,71,78:6 0/1:9,0,238,27,241,255:7:1:0:4,2,1,0:6,1,0:8,0,245,31,249,270:8 0/0:0,12,185,12,185,185:4:0:0:3,1,0,0:4,0,0:0,12,192,16,193,200:11 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 0/1:1,0,158,10,161,165:4:1:0:2,1,1,0:3,1,0:3,2,167,16,171,182:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,24,255,24,255,255:8:0:0:7,1,0,0:8,0,0:0,24,262,28,263,270:23 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:37,0,54,40,57,94:2:1:0:1,0,1,0:1,1,0:36,0,60,44,64,108:35 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:117,6,0,117,6,117:2:2:0:0,0,1,1:0,2,0:113,3,3,118,10,128:2 0/0:0,18,237,18,237,237:6:0:0:5,1,0,0:6,0,0:0,18,244,22,245,252:17 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/2:120,120,120,6,6,0:2:2:0:0,0,1,1:0,0,2:111,112,119,2,6,7:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:255,21,0,255,21,255:7:7:0:0,0,5,2:0,7,0:247,14,0,252,22,263:13 0/2:19,31,181,0,157,154:5:1:0:3,1,1,0:4,0,1:14,27,183,0,160,164:14 0/1:120,0,255,141,255,255:10:3:0:6,1,2,1:7,3,0:119,0,261,145,262,269:119 0/0:0,15,212,15,212,212:5:0:0:5,0,0,0:5,0,0:0,15,219,19,220,227:14 0/0:0,15,243,15,243,243:5:0:0:4,1,0,0:5,0,0:0,15,250,19,251,258:14 1/1:40,9,0,40,9,40:3:3:0:0,0,3,0:0,3,0:34,4,2,39,12,50:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 0/1:54,0,54,57,57,111:2:1:0:1,0,1,0:1,1,0:53,0,60,61,64,125:51 0/0:0,9,139,9,139,139:3:0:0:3,0,0,0:3,0,0:0,10,146,14,147,154:8 1/1:243,15,0,243,15,243:5:5:0:0,0,4,1:0,5,0:236,9,0,241,16,251:8 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:55,1,0,58,3,57:2:1:0:1,0,1,0:1,1,0:54,1,7,62,11,72:5 0/0:0,9,170,9,170,170:3:0:0:1,2,0,0:3,0,0:0,10,177,14,178,185:8 0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:60,3,0,60,3,60:1:1:0:0,0,0,1:0,1,0:58,2,5,63,9,73:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,15,206,15,206,206:5:0:0:4,1,0,0:5,0,0:0,15,213,19,214,221:14 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/2:134,105,96,40,0,34:5:5:0:0,0,5,0:0,3,2:125,97,95,36,0,41:35 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:58,6,0,58,6,58:2:2:0:0,0,2,0:0,2,0:54,3,3,59,10,69:2 0/0:0,6,102,6,102,102:2:0:0:1,1,0,0:2,0,0:1,7,110,11,111,118:6 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:98,0,88,104,94,191:4:2:0:1,1,2,0:2,2,0:97,0,94,108,101,205:92 0/2:35,35,35,3,3,0:1:1:0:0,0,1,0:0,0,1:29,30,36,2,5,9:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/2:45,45,45,3,3,0:1:1:0:0,0,1,0:0,0,1:39,40,46,2,5,9:4 0/0:0,3,60,3,60,60:1:0:0:0,1,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,120,6,120,120:2:0:0:1,1,0,0:2,0,0:1,7,128,11,129,136:6 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0
diff --git a/vcf/test/gatk_26_meta.vcf b/vcf/test/gatk_26_meta.vcf
new file mode 100644
index 0000000..1dd2e56
--- /dev/null
+++ b/vcf/test/gatk_26_meta.vcf
@@ -0,0 +1,4 @@
+##fileformat=VCFv4.1
+##GATKCommandLine=
+##GATKCommandLine=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
diff --git a/vcf/test/gonl.chr20.release4.gtc.vcf b/vcf/test/gonl.chr20.release4.gtc.vcf
new file mode 100644
index 0000000..03588bf
--- /dev/null
+++ b/vcf/test/gonl.chr20.release4.gtc.vcf
@@ -0,0 +1,120 @@
+##fileformat=VCFv4.1
+##ApplyRecalibration="analysis_type=ApplyRecalibration input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/target/gpfs2/gcc/resources/hg19/indices/human_g1k_v37.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false input=[(RodBinding name=input source=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/results/snps/UG_raw_biallelic/gonl.biallelic.vcf)] recal_file=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/intermediate/snps/vqsr_1kg_phase1/gonl.biallelic.vcf.1kg_phase1.2.recal tranches_file=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/intermediate/snps/vqsr_1kg_phase1/gonl.biallelic.vcf.1kg_phase1.2.tranches out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub ts_filter_level=99.5 ignore_filter=null mode=SNP filter_mismatching_base_and_quals=false"
+##CombineVariants="analysis_type=CombineVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:123000001-126000000] excludeIntervals=null reference_sequence=/humgen/1kg/reference/human_g1k_v37.fasta rodBind=[/humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AFR/AFR.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/ASN/ASN.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AMR/AMR.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/EUR/EUR.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AFR.admix/AFR.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/ASN.admix/ASN.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AMR.admix/AMR.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/EUR.admix/EUR.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/ALL/ALL.phase1.chr1.42.raw.snps.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub genotypemergeoption=PRIORITIZE filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED rod_priority_list=ALL,AFR.admix,AMR.admix,EUR.admix,ASN.admix,AFR,AMR,EUR,ASN printComplexMerges=false filteredAreUncalled=false minimalVCF=false setKey=pop assumeIdenticalSamples=false minimumN=1 masterMerge=false mergeInfoWithMaxAC=true"
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##INFO=
+##INFO=
+##SelectVariants="analysis_type=SelectVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:1-5000001] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/target/gpfs2/gcc/home/lfrancioli/gonl/resources/hg19/indices/human_g1k_v37.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/target/gpfs2/gcc/home/lfrancioli/results/trio-analysis/ug_initial/gonl.1_1-5000001.vcf) discordance=(RodBinding name= source=UNBOUND) concordance=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sample_name=[] sample_expressions=null sample_file=null exclude_sample_name=[] exclude_sample_file=[] select_expressions=[] excludeNonVariants=false excludeFiltered=false restrictAllelesTo=BIALLELIC keepOriginalAC=false mendelianViolation=false mendelianViolationQualThreshold=0.0 select_random_number=0 select_random_fraction=0.0 remove_fraction_genotypes=0.0 selectTypeToInclude=[] keepIDs=null outMVFile=null filter_mismatching_base_and_quals=false"
+##SetFilterPASS="analysis_type=SetFilterPASS input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/target/gpfs2/gcc/resources/hg19/indices/human_g1k_v37.fa nonDeterministicRandomSeed=false disableRandomization=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 enable_experimental_downsampling=false baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/intermediate/snps/vqsr_1kg_phase1/gonl.biallelic.vqsr_1kg_phase1.2.99.5.vcf) sites=[(RodBinding name=sites source=/target/gpfs2/gcc/home/lfrancioli/resources/1000GP_hg19/EUR.wgs.project_consensus_vqsr2b.20101123.snps.low_coverage.sites.vcf)] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub filter_mismatching_base_and_quals=false"
+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/humgen/1kg/phase1_cleaned_bams/bams/chr1/CHB.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/CHS.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/CLM.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/JPT.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/MXL.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/PUR.phase1.chr1.42.cleaned.bam] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:123000001-126000000] excludeIntervals=null reference_sequence=/humgen/1kg/reference/human_g1k_v37.fasta rodBind=[/humgen/1kg/processing/production_wgs_phase1/consensus/ALL.phase1.wgs.unionBC1.pass.sites.vcf, /humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=50 baq=CALCULATE_AS_NECESSARY baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=SNP p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=GENOTYPE_GIVEN_ALLELES output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=4.0 standard_min_confidence_threshold_for_emitting=4.0 noSLOD=false assume_single_sample_reads=null abort_at_too_much_coverage=-1 min_base_quality_score=17 min_mapping_quality_score=20 max_deletion_fraction=0.05 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 doContextDependentGapPenalties=true getGapPenaltiesFromData=false indel_recal_file=indel.recal_data.csv indelDebug=false dovit=false GSA_PRODUCTION_ONLY=false exactCalculation=LINEAR_EXPERIMENTAL ignoreSNPAlleles=false output_all_callable_bases=false genotype=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[]"
+##VariantAnnotator="analysis_type=VariantAnnotator input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:1-5000001] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/target/gpfs2/gcc/home/lfrancioli/gonl/resources/hg19/indices/human_g1k_v37.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[/target/gpfs2/gcc/home/lfrancioli/gonl/resources/UnifiedGenotyper/GoNL.ped] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/target/gpfs2/gcc/home/lfrancioli/results/trio-analysis/snps/gonl.1_1-5000001.biallelic.vcf) snpEffFile=(RodBinding name= source=UNBOUND) dbsnp=(RodBinding name= source=UNBOUND) comp=[] resource=[] out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub annotation=[TransmissionDisequilibriumTest, InbreedingCoeff, AlleleDosage, ChromosomeCounts] excludeAnnotation=[] group=[] expression=[] useAllAnnotations=false list=false vcfContainsOnlyIndels=false MendelViolationGenotypeQualityThreshold=0.0 requireStrictAlleleMatch=false filter_mismatching_base_and_quals=false"
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=