diff --git a/.gitignore b/.gitignore index b9e4fea..a18ec95 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ docs/_build .DS_Store vcf/cparse.c vcf/cparse.so +.coverage diff --git a/.travis.yml b/.travis.yml index cdbf63a..ad315b2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,18 @@ # Validate this file using http://lint.travis-ci.org/ language: python +sudo: false +cache: + directories: + - $HOME/.cache/pip python: - - "2.6" - "2.7" - - "3.2" + - "3.4" + - "3.5" + - "3.6" + - "nightly" - "pypy" + - "pypy3" install: - - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors pysam argparse ordereddict; fi" - - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors pysam; fi" + - if [[ "$TRAVIS_PYTHON_VERSION" =~ ^pypy ]]; then pip install -r requirements/pypy-requirements.txt; else pip install -r requirements/common-requirements.txt; fi - python setup.py install script: python setup.py test diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..44f678a --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include vcf *.pyx diff --git a/README.rst b/README.rst index 83792ce..67b5d1b 100644 --- a/README.rst +++ b/README.rst @@ -14,7 +14,7 @@ There main interface is the class: ``Reader``. It takes a file-like object and acts as a reader:: >>> import vcf - >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb')) + >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r')) >>> for record in vcf_reader: ... print record Record(CHROM=20, POS=14370, REF=G, ALT=[A]) @@ -49,8 +49,8 @@ one-entry Python lists (see, e.g., ``Record.ALT``). Semicolon-delimited lists of key=value pairs are converted to Python dictionaries, with flags being given a ``True`` value. Integers and floats are handled exactly as you'd expect:: - >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb')) - >>> record = vcf_reader.next() + >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r')) + >>> record = next(vcf_reader) >>> print record.POS 14370 >>> print record.ALT @@ -58,17 +58,17 @@ a ``True`` value. Integers and floats are handled exactly as you'd expect:: >>> print record.INFO['AF'] [0.5] -There are a number of convienience methods and properties for each ``Record`` allowing you to +There are a number of convenience methods and properties for each ``Record`` allowing you to examine properties of interest:: >>> print record.num_called, record.call_rate, record.num_unknown 3 1.0 0 >>> print record.num_hom_ref, record.num_het, record.num_hom_alt 1 1 1 - >>> print record.nucl_diversity, record.aaf - 0.6 0.5 + >>> print record.nucl_diversity, record.aaf, record.heterozygosity + 0.6 [0.5] 0.5 >>> print record.get_hets() - [Call(sample=NA00002, GT=1|0, HQ=[51, 51], DP=8, GQ=48)] + [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))] >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion True False True False >>> print record.var_type, record.var_subtype @@ -82,7 +82,7 @@ fields. In case the FORMAT column does not exist, ``record.FORMAT`` is parsed sample column and ``record.genotype`` is a way of looking up genotypes by sample name:: - >>> record = vcf_reader.next() + >>> record = next(vcf_reader) >>> for sample in record.samples: ... print sample['GT'] 0|0 @@ -101,7 +101,7 @@ call data in ``data``:: >>> print call.sample NA00001 >>> print call.data - {'GT': '0|0', 'HQ': [58, 50], 'DP': 3, 'GQ': 49} + CallData(GT=0|0, GQ=49, DP=3, HQ=[58, 50]) Please note that as of release 0.4.0, attributes known to have single values (such as ``DP`` and ``GQ`` above) are returned as values. Other attributes are returned @@ -134,39 +134,48 @@ For example:: ALT records are actually classes, so that you can interrogate them:: - >>> reader = vcf.Reader(file('vcf/test/example-4.1-bnd.vcf')) - >>> _ = reader.next(); row = reader.next() + >>> reader = vcf.Reader(open('vcf/test/example-4.1-bnd.vcf')) + >>> _ = next(reader); row = next(reader) >>> print row Record(CHROM=1, POS=2, REF=T, ALT=[T[2:3[]) >>> bnd = row.ALT[0] >>> print bnd.withinMainAssembly, bnd.orientation, bnd.remoteOrientation, bnd.connectingSequence True False True T -Random access is supported for files with tabix indexes. Simply call fetch for the -region you are interested in:: +The Reader supports retrieval of records within designated regions for files +with tabix indexes via the fetch method. This requires the pysam module as a +dependency. Pass in a chromosome, and, optionally, start and end coordinates, +for the regions of interest:: >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz') - >>> for record in vcf_reader.fetch('20', 1110696, 1230237): + >>> # fetch all records on chromosome 20 from base 1110696 through 1230237 + >>> for record in vcf_reader.fetch('20', 1110695, 1230237): # doctest: +SKIP ... print record Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T]) Record(CHROM=20, POS=1230237, REF=T, ALT=[None]) -Or extract a single row:: +Note that the start and end coordinates are in the zero-based, half-open +coordinate system, similar to ``_Record.start`` and ``_Record.end``. The very +first base of a chromosome is index 0, and the the region includes bases up +to, but not including the base at the end coordinate. For example:: - >>> print vcf_reader.fetch('20', 1110696) - Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T]) + >>> # fetch all records on chromosome 4 from base 11 through 20 + >>> vcf_reader.fetch('4', 10, 20) # doctest: +SKIP +would include all records overlapping a 10 base pair region from the 11th base +of through the 20th base (which is at index 19) of chromosome 4. It would not +include the 21st base (at index 20). (See +http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms for more +information on the zero-based, half-open coordinate system.) The ``Writer`` class provides a way of writing a VCF file. Currently, you must specify a template ``Reader`` which provides the metadata:: >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz') - >>> vcf_writer = vcf.Writer(file('/dev/null', 'w'), vcf_reader) + >>> vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader) >>> for record in vcf_reader: ... vcf_writer.write_record(record) - An extensible script is available to filter vcf files in vcf_filter.py. VCF filters declared by other packages will be available for use in this script. Please see :doc:`FILTERS` for full description. - diff --git a/docs/API.rst b/docs/API.rst index 7ffc21a..d688893 100644 --- a/docs/API.rst +++ b/docs/API.rst @@ -14,43 +14,43 @@ vcf.Writer :members: vcf.model._Record ------------ +----------------- .. autoclass:: vcf.model._Record :members: vcf.model._Call ---------- +--------------- .. autoclass:: vcf.model._Call :members: vcf.model._AltRecord ------------ +-------------------- .. autoclass:: vcf.model._AltRecord :members: vcf.model._Substitution ------------ +----------------------- .. autoclass:: vcf.model._Substitution :members: vcf.model._SV ------------ +------------- .. autoclass:: vcf.model._SV :members: vcf.model._SingleBreakend ------------ +------------------------- .. autoclass:: vcf.model._SingleBreakend :members: vcf.model._Breakend ------------ +------------------- .. autoclass:: vcf.parser._Breakend :members: diff --git a/docs/HISTORY.rst b/docs/HISTORY.rst index 396ffa7..8a97d8d 100644 --- a/docs/HISTORY.rst +++ b/docs/HISTORY.rst @@ -2,7 +2,7 @@ Development =========== Please use the `PyVCF repository `_. -Pull requests gladly accepted. +Pull requests gladly accepted. Issues should be reported at the github issue tracker. Running tests @@ -10,24 +10,90 @@ Running tests Please check the tests by running them with:: - python setup.py test + python setup.py test New features should have test code sent with them. Changes ======= +0.6.7 Release +------------- + +* Include missing .pyx files + +0.6.6 Release +------------- + +* better walk together record ordering (Thanks @datagram, #141) + +0.6.5 Release +------------- + +* Better contig handling (#115, #116, #119 thanks Martijn) +* INFO lines with type character (#120, #121 thanks @AndrewUzilov, Martijn) +* Single breakends fix (#126 thanks @pkrushe) +* Speedup by losing ordering of INFO (#128 thanks Martijn) +* HOMSEQ and other missing fields in INFO (#130 thanks Martijn) +* Add aaf property, (thanks @mgymrek #131) +* Custom equality for walk_together, thanks bow #132 +* Change default line encoding to '\n' +* Improved __eq__ (#134, thanks bow) + + +0.6.4 Release +------------- + +* Handle INFO fields with multiple values, thanks +* Support writing records without GT data #88, thanks @bow +* Pickleable call data #112, thanks @superbobry +* Write files without FORMAT #95 thanks Martijn +* Strict whitespace mode, thanks Martijn, Lee Lichtenstein and Manawsi Gupta +* Add support for contigs in header, thanks @gcnh and Martijn +* Fix GATK header parsing, thanks @alimanfoo + +0.6.3 Release +------------- + +* cython port of #79 +* correct writing of meta lines #84 + +0.6.2 Release +------------- + +* issues #78, #79 (thanks Sean, Brad) + +0.6.1 Release +------------- + +* Add strict whitespace mode for well formed VCFs with spaces + in sample names (thanks Marco) +* Ignore blank lines in files (thanks Martijn) +* Tweaks for handling missing data (thanks Sean) +* bcftools tests (thanks Martijn) +* record.FILTER is always a list + +0.6.0 Release +------------- + +* Backwards incompatible change: _Call.data is now a + namedtuple (previously it was a dict) +* Optional cython version, much improved performance. +* Improvements to writer (thanks @cmclean) +* Improvements to inheritance of classes (thanks @lennax) + + 0.5.0 Release ------------- -VCF 4.1 support: - * support missing genotype #28 (thanks @martijnvermaat) - * parseALT for svs #42, #48 (thanks @dzerbino) +* VCF 4.1 support: + - support missing genotype #28 (thanks @martijnvermaat) + - parseALT for svs #42, #48 (thanks @dzerbino) * `trim_common_suffix` method #22 (thanks @martijnvermaat) * Multiple metadata with the same key is stored (#52) -Writer improvements - * A/G in Number INFO fields #53 (thanks @lennax) - * Better output #55 (thanks @cmclean) +* Writer improvements: + - A/G in Number INFO fields #53 (thanks @lennax) + - Better output #55 (thanks @cmclean) * Allow malformed INFO fields #49 (thanks @ilyaminkin) * Added bayes factor error bias VCF filter * Added docs on vcf_melt @@ -37,14 +103,14 @@ Writer improvements 0.4.6 Release ------------- -* Performance improvements (#47) +* Performance improvements (#47) * Preserve order of INFO column (#46) 0.4.5 Release ------------- -* Support exponent syntax qual values (#43, #44) (thanks @martijnvermaat) -* Preserve order of header lines (#45) +* Support exponent syntax qual values (#43, #44) (thanks @martijnvermaat) +* Preserve order of header lines (#45) 0.4.4 Release ------------- @@ -73,15 +139,15 @@ Writer improvements 0.4.0 Release ------------- -* Package structure +* Package structure * add ``vcf.utils`` module with ``walk_together`` method -* samtools tests +* samtools tests * support Freebayes' non standard '.' for no call -* fix vcf_melt +* fix vcf_melt * support monomorphic sites, add ``is_monomorphic`` method, handle null QUALs -* filter support for files with monomorphic calls +* filter support for files with monomorphic calls * Values declared as single are no-longer returned in lists -* several performance improvements +* several performance improvements 0.3.0 Release @@ -104,14 +170,14 @@ Documentation release * Add shebang to vcf_filter.py -0.2 Release +0.2 Release ----------- * Replace genotype dictionary with a ``Call`` object * Methods on ``Record`` and ``Call`` (thanks @arq5x) * Shortcut parse_sample when genotype is None -0.1 Release +0.1 Release ----------- * Added test code @@ -122,7 +188,7 @@ Documentation release * Allow opening by filename as well as filesocket * Support fetching rows for tabixed indexed files * Performance improvements (see ``test/prof.py``) -* Added extensible filter script (see FILTERS.md), vcf_filter.py +* Added extensible filter script (see FILTERS.md), vcf_filter.py Contributions ============= @@ -131,4 +197,3 @@ Project started by @jdoughertyii and taken over by @jamescasbon on 12th January Contributions from @arq5x, @brentp, @martijnvermaat, @ian1roberts, @marcelm. This project was supported by `Population Genetics `_. - diff --git a/docs/INTRO.rst b/docs/INTRO.rst index b61e9a9..2b9a587 100644 --- a/docs/INTRO.rst +++ b/docs/INTRO.rst @@ -1,5 +1,4 @@ Introduction ============ -.. automodule:: vcf - +.. include:: ../README.rst diff --git a/requirements/common-requirements.txt b/requirements/common-requirements.txt new file mode 100644 index 0000000..876b75e --- /dev/null +++ b/requirements/common-requirements.txt @@ -0,0 +1,3 @@ +cython +pysam!=0.8.0 +setuptools diff --git a/requirements/pypy-requirements.txt b/requirements/pypy-requirements.txt new file mode 100644 index 0000000..49fe098 --- /dev/null +++ b/requirements/pypy-requirements.txt @@ -0,0 +1 @@ +setuptools diff --git a/scripts/vcf_filter.py b/scripts/vcf_filter.py index 9a08629..fd32b39 100644 --- a/scripts/vcf_filter.py +++ b/scripts/vcf_filter.py @@ -162,7 +162,7 @@ def addfilt(filt): if output_record: # use PASS only if other filter names appear in the FILTER column #FIXME: is this good idea? - if record.FILTER == '.' and not drop_filtered: record.FILTER = 'PASS' + if record.FILTER is None and not drop_filtered: record.FILTER = 'PASS' output.write_record(record) if __name__ == '__main__': main() diff --git a/scripts/vcf_sample_filter.py b/scripts/vcf_sample_filter.py new file mode 100644 index 0000000..d71e6a3 --- /dev/null +++ b/scripts/vcf_sample_filter.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +# Author: Lenna X. Peterson +# github.com/lennax +# arklenna at gmail dot com + +import argparse +import logging + +from vcf import SampleFilter + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("file", help="VCF file to filter") + parser.add_argument("-o", metavar="outfile", + help="File to write out filtered samples") + parser.add_argument("-f", metavar="filters", + help="Comma-separated list of sample indices or names \ + to filter") + parser.add_argument("-i", "--invert", action="store_true", + help="Keep rather than discard the filtered samples") + parser.add_argument("-q", "--quiet", action="store_true", + help="Less output") + + args = parser.parse_args() + + if args.quiet: + log_level = logging.WARNING + else: + log_level = logging.INFO + logging.basicConfig(format='%(message)s', level=log_level) + + sf = SampleFilter(infile=args.file, outfile=args.o, + filters=args.f, invert=args.invert) + if args.f is None: + print "Samples:" + for idx, val in enumerate(sf.samples): + print "{0}: {1}".format(idx, val) diff --git a/setup.py b/setup.py index bca3a0d..b865b8d 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup -from distutils.core import setup from distutils.extension import Extension +import sys try: from Cython.Distutils import build_ext @@ -8,20 +8,7 @@ except: CYTHON = False -requires = [] - -# python 2.6 does not have argparse -try: - import argparse -except ImportError: - requires.append('argparse') - - -try: - import collections - collections.OrderedDict -except AttributeError: - requires.append('ordereddict') +DEPENDENCIES = ['setuptools'] # get the version without an import VERSION = "Undefined" @@ -44,14 +31,14 @@ setup( name='PyVCF', packages=['vcf', 'vcf.test'], - scripts=['scripts/vcf_melt', 'scripts/vcf_filter.py'], + scripts=['scripts/vcf_melt', 'scripts/vcf_filter.py', + 'scripts/vcf_sample_filter.py'], author='James Casbon and @jdoughertyii', author_email='casbon@gmail.com', description='Variant Call Format (VCF) parser for Python', long_description=DOC, test_suite='vcf.test.test_vcf.suite', - install_requires=['distribute'], - requires=requires, + install_requires=DEPENDENCIES, entry_points = { 'vcf.filters': [ 'site_quality = vcf.filters:SiteQuality', @@ -68,10 +55,20 @@ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: BSD License', + 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', + 'Programming Language :: Cython', 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Topic :: Scientific/Engineering', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: PyPy', + 'Topic :: Scientific/Engineering :: Bio-Informatics', ], keywords='bioinformatics', use_2to3=True, diff --git a/tox.ini b/tox.ini index 16847bb..af7049e 100644 --- a/tox.ini +++ b/tox.ini @@ -4,25 +4,18 @@ # and then run "tox" from this directory. [tox] -envlist = py26, py27, py32 +envlist = py27, py34, py35, py36, pypy, pypy3 [testenv] +deps = + -rrequirements/common-requirements.txt commands = - rm -rf {toxinidir}/build - python setup.py test + python setup.py clean --all test -[testenv:py26] +[testenv:pypy] deps = - argparse - ordereddict - pysam + -rrequirements/pypy-requirements.txt -[testenv:py27] +[testenv:pypy3] deps = - pysam - cython - -[testenv:py32] -deps = - cython - + -rrequirements/pypy-requirements.txt diff --git a/vcf/__init__.py b/vcf/__init__.py index 2935c73..e1aae58 100644 --- a/vcf/__init__.py +++ b/vcf/__init__.py @@ -1,180 +1,15 @@ #!/usr/bin/env python -'''A VCFv4.0 and 4.1 parser for Python. +""" +A VCFv4.0 and 4.1 parser for Python. Online version of PyVCF documentation is available at http://pyvcf.rtfd.org/ +""" -The intent of this module is to mimic the ``csv`` module in the Python stdlib, -as opposed to more flexible serialization formats like JSON or YAML. ``vcf`` -will attempt to parse the content of each record based on the data types -specified in the meta-information lines -- specifically the ##INFO and -##FORMAT lines. If these lines are missing or incomplete, it will check -against the reserved types mentioned in the spec. Failing that, it will just -return strings. -There main interface is the class: ``Reader``. It takes a file-like -object and acts as a reader:: - - >>> import vcf - >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r')) - >>> for record in vcf_reader: - ... print record - Record(CHROM=20, POS=14370, REF=G, ALT=[A]) - Record(CHROM=20, POS=17330, REF=T, ALT=[A]) - Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T]) - Record(CHROM=20, POS=1230237, REF=T, ALT=[None]) - Record(CHROM=20, POS=1234567, REF=GTCT, ALT=[G, GTACT]) - - -This produces a great deal of information, but it is conveniently accessed. -The attributes of a Record are the 8 fixed fields from the VCF spec:: - - * ``Record.CHROM`` - * ``Record.POS`` - * ``Record.ID`` - * ``Record.REF`` - * ``Record.ALT`` - * ``Record.QUAL`` - * ``Record.FILTER`` - * ``Record.INFO`` - -plus attributes to handle genotype information: - - * ``Record.FORMAT`` - * ``Record.samples`` - * ``Record.genotype`` - -``samples`` and ``genotype``, not being the title of any column, are left lowercase. The format -of the fixed fields is from the spec. Comma-separated lists in the VCF are -converted to lists. In particular, one-entry VCF lists are converted to -one-entry Python lists (see, e.g., ``Record.ALT``). Semicolon-delimited lists -of key=value pairs are converted to Python dictionaries, with flags being given -a ``True`` value. Integers and floats are handled exactly as you'd expect:: - - >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r')) - >>> record = vcf_reader.next() - >>> print record.POS - 14370 - >>> print record.ALT - [A] - >>> print record.INFO['AF'] - [0.5] - -There are a number of convienience methods and properties for each ``Record`` allowing you to -examine properties of interest:: - - >>> print record.num_called, record.call_rate, record.num_unknown - 3 1.0 0 - >>> print record.num_hom_ref, record.num_het, record.num_hom_alt - 1 1 1 - >>> print record.nucl_diversity, record.aaf - 0.6 0.5 - >>> print record.get_hets() - [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))] - >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion - True False True False - >>> print record.var_type, record.var_subtype - snp ts - >>> print record.is_monomorphic - False - -``record.FORMAT`` will be a string specifying the format of the genotype -fields. In case the FORMAT column does not exist, ``record.FORMAT`` is -``None``. Finally, ``record.samples`` is a list of dictionaries containing the -parsed sample column and ``record.genotype`` is a way of looking up genotypes -by sample name:: - - >>> record = vcf_reader.next() - >>> for sample in record.samples: - ... print sample['GT'] - 0|0 - 0|1 - 0/0 - >>> print record.genotype('NA00001')['GT'] - 0|0 - -The genotypes are represented by ``Call`` objects, which have three attributes: the -corresponding Record ``site``, the sample name in ``sample`` and a dictionary of -call data in ``data``:: - - >>> call = record.genotype('NA00001') - >>> print call.site - Record(CHROM=20, POS=17330, REF=T, ALT=[A]) - >>> print call.sample - NA00001 - >>> print call.data - CallData(GT=0|0, GQ=49, DP=3, HQ=[58, 50]) - -Please note that as of release 0.4.0, attributes known to have single values (such as -``DP`` and ``GQ`` above) are returned as values. Other attributes are returned -as lists (such as ``HQ`` above). - -There are also a number of methods:: - - >>> print call.called, call.gt_type, call.gt_bases, call.phased - True 0 T|T True - -Metadata regarding the VCF file itself can be investigated through the -following attributes: - - * ``Reader.metadata`` - * ``Reader.infos`` - * ``Reader.filters`` - * ``Reader.formats`` - * ``Reader.samples`` - -For example:: - - >>> vcf_reader.metadata['fileDate'] - '20090805' - >>> vcf_reader.samples - ['NA00001', 'NA00002', 'NA00003'] - >>> vcf_reader.filters - OrderedDict([('q10', Filter(id='q10', desc='Quality below 10')), ('s50', Filter(id='s50', desc='Less than 50% of samples have data'))]) - >>> vcf_reader.infos['AA'].desc - 'Ancestral Allele' - -ALT records are actually classes, so that you can interrogate them:: - - >>> reader = vcf.Reader(open('vcf/test/example-4.1-bnd.vcf')) - >>> _ = reader.next(); row = reader.next() - >>> print row - Record(CHROM=1, POS=2, REF=T, ALT=[T[2:3[]) - >>> bnd = row.ALT[0] - >>> print bnd.withinMainAssembly, bnd.orientation, bnd.remoteOrientation, bnd.connectingSequence - True False True T - -Random access is supported for files with tabix indexes. Simply call fetch for the -region you are interested in:: - - >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz') - >>> for record in vcf_reader.fetch('20', 1110696, 1230237): # doctest: +SKIP - ... print record - Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T]) - Record(CHROM=20, POS=1230237, REF=T, ALT=[None]) - -Or extract a single row:: - - >>> print vcf_reader.fetch('20', 1110696) # doctest: +SKIP - Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T]) - - -The ``Writer`` class provides a way of writing a VCF file. Currently, you must specify a -template ``Reader`` which provides the metadata:: - - >>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz') - >>> vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader) - >>> for record in vcf_reader: - ... vcf_writer.write_record(record) - - -An extensible script is available to filter vcf files in vcf_filter.py. VCF filters -declared by other packages will be available for use in this script. Please -see :doc:`FILTERS` for full description. - -''' from vcf.parser import Reader, Writer from vcf.parser import VCFReader, VCFWriter from vcf.filters import Base as Filter from vcf.parser import RESERVED_INFO, RESERVED_FORMAT +from vcf.sample_filter import SampleFilter -VERSION = '0.5.0' +VERSION = '0.6.8' diff --git a/vcf/cparse.pyx b/vcf/cparse.pyx index 4a473d7..334542a 100644 --- a/vcf/cparse.pyx +++ b/vcf/cparse.pyx @@ -1,14 +1,27 @@ from model import _Call -cdef _map(func, iterable, bad='.'): +cdef _map(func, iterable, bad=['.', '']): '''``map``, but make bad values None.''' - return [func(x) if x != bad else None + return [func(x) if x not in bad else None for x in iterable] INTEGER = 'Integer' FLOAT = 'Float' NUMERIC = 'Numeric' +def _parse_filter(filt_str): + '''Parse the FILTER field of a VCF entry into a Python list + + NOTE: this method has a python equivalent and care must be taken + to keep the two methods equivalent + ''' + if filt_str == '.': + return None + elif filt_str == 'PASS': + return [] + else: + return filt_str.split(';') + def parse_samples( list names, list samples, samp_fmt, list samp_fmt_types, list samp_fmt_nums, site): @@ -36,7 +49,14 @@ def parse_samples( vals = sampvals[j] # short circuit the most common - if vals == '.' or vals == './.': + if samp_fmt._fields[j] == 'GT': + sampdat[j] = vals + continue + # genotype filters are a special case + elif samp_fmt._fields[j] == 'FT': + sampdat[j] = _parse_filter(vals) + continue + elif not vals or vals == '.': sampdat[j] = None continue @@ -45,24 +65,24 @@ def parse_samples( entry_num = samp_fmt_nums[j] # we don't need to split single entries - if entry_num == 1 or ',' not in vals: - + if entry_num == 1: if entry_type == INTEGER: - sampdat[j] = int(vals) + try: + sampdat[j] = int(vals) + except ValueError: + sampdat[j] = float(vals) elif entry_type == FLOAT or entry_type == NUMERIC: sampdat[j] = float(vals) else: sampdat[j] = vals - - if entry_num != 1: - sampdat[j] = (sampdat[j]) - continue vals = vals.split(',') - if entry_type == INTEGER: - sampdat[j] = _map(int, vals) + try: + sampdat[j] = _map(int, vals) + except ValueError: + sampdat[j] = map(float, vals) elif entry_type == FLOAT or entry_type == NUMERIC: sampdat[j] = _map(float, vals) else: diff --git a/vcf/model.py b/vcf/model.py index 9a27f87..34a4d17 100644 --- a/vcf/model.py +++ b/vcf/model.py @@ -1,27 +1,39 @@ from abc import ABCMeta, abstractmethod import collections import sys +import re + +try: + from collections import Counter +except ImportError: + from counter import Counter + +allele_delimiter = re.compile(r'''[|/]''') # to split a genotype into alleles class _Call(object): """ A genotype call, a cell entry in a VCF file""" - __slots__ = ['site', 'sample', 'data', 'gt_nums', 'called'] + __slots__ = ['site', 'sample', 'data', 'gt_nums', 'gt_alleles', 'called', 'ploidity'] def __init__(self, site, sample, data): #: The ``_Record`` for this ``_Call`` self.site = site #: The sample name self.sample = sample - #: Dictionary of data from the VCF file + #: Namedtuple of data from the VCF file self.data = data - try: - self.gt_nums = self.data.GT - #: True if the GT is not ./. - self.called = self.gt_nums is not None - except AttributeError: - self.gt_nums = None - # FIXME how do we know if a non GT call is called? + + if getattr(self.data, 'GT', None) is not None: + self.gt_alleles = [(al if al != '.' else None) for al in allele_delimiter.split(self.data.GT)] + self.ploidity = len(self.gt_alleles) + self.called = any(al is not None for al in self.gt_alleles) + self.gt_nums = self.data.GT if self.called else None + else: + #62 a call without a genotype is not defined as called or not + self.gt_alleles = None + self.ploidity = None self.called = None + self.gt_nums = None def __repr__(self): return "Call(sample=%s, %s)" % (self.sample, str(self.data)) @@ -30,19 +42,20 @@ def __eq__(self, other): """ Two _Calls are equal if their _Records are equal and the samples and ``gt_type``s are the same """ - return (self.site == other.site - and self.sample == other.sample - and self.gt_type == other.gt_type) + return (self.site == getattr(other, "site", None) + and self.sample == getattr(other, "sample", None) + and self.gt_type == getattr(other, "gt_type", None)) + + def __getstate__(self): + return dict((attr, getattr(self, attr)) for attr in self.__slots__) + + def __setstate__(self, state): + for attr in self.__slots__: + setattr(self, attr, state.get(attr)) def gt_phase_char(self): return "/" if not self.phased else "|" - @property - def gt_alleles(self): - '''The numbers of the alleles called at a given sample''' - # grab the numeric alleles of the gt string; tokenize by phasing - return self.gt_nums.split(self.gt_phase_char()) - @property def gt_bases(self): '''The actual genotype alleles. @@ -52,7 +65,7 @@ def gt_bases(self): if self.called: # lookup and return the actual DNA alleles try: - return self.gt_phase_char().join(str(self.site.alleles[int(X)]) for X in self.gt_alleles) + return self.gt_phase_char().join(str(self.site.alleles[int(X)] if X is not None else '.') for X in self.gt_alleles) except: sys.stderr.write("Allele number not found in list of alleles\n") else: @@ -70,10 +83,14 @@ def gt_type(self): if self.called: alleles = self.gt_alleles if all(X == alleles[0] for X in alleles[1:]): - if alleles[0] == "0": return 0 - else: return 2 - else: return 1 - else: return None + if alleles[0] == "0": + return 0 + else: + return 2 + else: + return 1 + else: + return None @property def phased(self): @@ -100,6 +117,18 @@ def is_het(self): return None return self.gt_type == 1 + @property + def is_filtered(self): + """ Return True for filtered calls """ + try: # no FT annotation present for this variant + filt = self.data.FT + except AttributeError: + return False + if filt is None or len(filt) == 0: # FT is not set or set to PASS + return False + else: + return True + class _Record(object): """ A set of calls at a site. Equivalent to a row in a VCF file. @@ -108,10 +137,45 @@ class _Record(object): INFO and FORMAT are available as properties. The list of genotype calls is in the ``samples`` property. + + Regarding the coordinates associated with each instance: + + - ``POS``, per VCF specification, is the one-based index + (the first base of the contig has an index of 1) of the first + base of the ``REF`` sequence. + - The ``start`` and ``end`` denote the coordinates of the entire + ``REF`` sequence in the zero-based, half-open coordinate + system (see + http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms), + where the first base of the contig has an index of 0, and the + interval runs up to, but does not include, the base at the + ``end`` index. This indexing scheme is analagous to Python + slice notation. + - The ``affected_start`` and ``affected_end`` coordinates are + also in the zero-based, half-open coordinate system. These + coordinates indicate the precise region of the reference + genome actually affected by the events denoted in ``ALT`` + (i.e., the minimum ``affected_start`` and maximum + ``affected_end``). + + - For SNPs and structural variants, the affected region + includes all bases of ``REF``, including the first base + (i.e., ``affected_start = start = POS - 1``). + - For deletions, the region includes all bases of ``REF`` + except the first base, which flanks upstream the actual + deletion event, per VCF specification. + - For insertions, the ``affected_start`` and ``affected_end`` + coordinates represent a 0 bp-length region between the two + flanking bases (i.e., ``affected_start`` = + ``affected_end``). This is analagous to Python slice + notation (see http://stackoverflow.com/a/2947881/38140). + Neither the upstream nor downstream flanking bases are + included in the region. """ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, sample_indexes, samples=None): self.CHROM = CHROM + #: the one-based coordinate of the first nucleotide in ``REF`` self.POS = POS self.ID = ID self.REF = REF @@ -120,23 +184,87 @@ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, self.FILTER = FILTER self.INFO = INFO self.FORMAT = FORMAT - #: 0-based start coordinate + #: zero-based, half-open start coordinate of ``REF`` self.start = self.POS - 1 - #: 1-based end coordinate + #: zero-based, half-open end coordinate of ``REF`` self.end = self.start + len(self.REF) #: list of alleles. [0] = REF, [1:] = ALTS self.alleles = [self.REF] self.alleles.extend(self.ALT) #: list of ``_Calls`` for each sample ordered as in source VCF - self.samples = samples + self.samples = samples or [] self._sample_indexes = sample_indexes + # Setting affected_start and affected_end here for Sphinx + # autodoc purposes... + #: zero-based, half-open start coordinate of affected region of reference genome + self.affected_start = None + #: zero-based, half-open end coordinate of affected region of reference genome (not included in the region) + self.affected_end = None + self._set_start_and_end() + + + def _set_start_and_end(self): + self.affected_start = self.affected_end = self.POS + for alt in self.ALT: + if alt is None: + start, end = self._compute_coordinates_for_none_alt() + elif alt.type == 'SNV': + start, end = self._compute_coordinates_for_snp() + elif alt.type == 'MNV': + start, end = self._compute_coordinates_for_indel() + else: + start, end = self._compute_coordinates_for_sv() + self.affected_start = min(self.affected_start, start) + self.affected_end = max(self.affected_end, end) + + + def _compute_coordinates_for_none_alt(self): + start = self.POS - 1 + end = start + len(self.REF) + return (start, end) + + + def _compute_coordinates_for_snp(self): + if len(self.REF) > 1: + start = self.POS + end = start + (len(self.REF) - 1) + else: + start = self.POS - 1 + end = self.POS + return (start, end) + + + def _compute_coordinates_for_indel(self): + if len(self.REF) > 1: + start = self.POS + end = start + (len(self.REF) - 1) + else: + start = end = self.POS + return (start, end) + + + def _compute_coordinates_for_sv(self): + start = self.POS - 1 + end = start + len(self.REF) + return (start, end) + + + # For Python 2 + def __cmp__(self, other): + return cmp((self.CHROM, self.POS), (getattr(other, "CHROM", None), getattr(other, "POS", None))) + + # For Python 3 def __eq__(self, other): """ _Records are equal if they describe the same variant (same position, alleles) """ - return (self.CHROM == other.CHROM and - self.POS == other.POS and - self.REF == other.REF and - self.ALT == other.ALT) + return (self.CHROM == getattr(other, "CHROM", None) and + self.POS == getattr(other, "POS", None) and + self.REF == getattr(other, "REF", None) and + self.ALT == getattr(other, "ALT", None)) + + # For Python 3 + def __lt__(self, other): + return (self.CHROM, self.POS) < (getattr(other, "CHROM", None), getattr(other, "POS", None)) def __iter__(self): return iter(self.samples) @@ -144,14 +272,14 @@ def __iter__(self): def __str__(self): return "Record(CHROM=%(CHROM)s, POS=%(POS)s, REF=%(REF)s, ALT=%(ALT)s)" % self.__dict__ - def __cmp__(self, other): - return cmp( (self.CHROM, self.POS), (other.CHROM, other.POS)) - def add_format(self, fmt): self.FORMAT = self.FORMAT + ':' + fmt def add_filter(self, flt): - self.FILTER.append(flt) + if self.FILTER is None: + self.FILTER = [flt] + else: + self.FILTER.append(flt) def add_info(self, info, value=True): self.INFO[info] = value @@ -163,7 +291,7 @@ def genotype(self, name): @property def num_called(self): """ The number of called samples""" - return sum(s.called for s in self.samples) + return sum(1 for s in self.samples if s.called) @property def call_rate(self): @@ -192,18 +320,17 @@ def num_unknown(self): @property def aaf(self): - """ The allele frequency of the alternate allele. - NOTE 1: Punt if more than one alternate allele. - NOTE 2: Denominator calc'ed from _called_ genotypes. + """ A list of allele frequencies of alternate alleles. + NOTE: Denominator calc'ed from _called_ genotypes. """ - # skip if more than one alternate allele. assumes bi-allelic - if len(self.ALT) > 1: - return None - hom_ref = self.num_hom_ref - het = self.num_het - hom_alt = self.num_hom_alt - num_chroms = float(2.0 * self.num_called) - return float(het + 2 * hom_alt) / float(num_chroms) + num_chroms = 0.0 + allele_counts = Counter() + for s in self.samples: + if s.gt_type is not None: + for a in s.gt_alleles: + allele_counts.update([a]) + num_chroms += 1 + return [allele_counts[str(i)]/num_chroms for i in range(1, len(self.ALT)+1)] @property def nucl_diversity(self): @@ -215,16 +342,28 @@ def nucl_diversity(self): Derived from: \"Population Genetics: A Concise Guide, 2nd ed., p.45\" - John Gillespie. + John Gillespie. """ # skip if more than one alternate allele. assumes bi-allelic if len(self.ALT) > 1: return None - p = self.aaf + p = self.aaf[0] q = 1.0 - p num_chroms = float(2.0 * self.num_called) return float(num_chroms / (num_chroms - 1.0)) * (2.0 * p * q) + @property + def heterozygosity(self): + """ + Heterozygosity of a site. Heterozygosity gives the probability that + two randomly chosen chromosomes from the population have different + alleles, giving a measure of the degree of polymorphism in a population. + + If there are i alleles with frequency p_i, H=1-sum_i(p_i^2) + """ + allele_freqs = [1-sum(self.aaf)] + self.aaf + return 1 - sum(map(lambda x: x**2, allele_freqs)) + def get_hom_refs(self): """ The list of hom ref genotypes""" return [s for s in self.samples if s.gt_type == 0] @@ -244,11 +383,12 @@ def get_unknowns(self): @property def is_snp(self): """ Return whether or not the variant is a SNP """ - if len(self.REF) > 1: return False + if len(self.REF) > 1: + return False for alt in self.ALT: if alt is None or alt.type != "SNV": return False - if alt not in ['A', 'C', 'G', 'T']: + if alt not in ['A', 'C', 'G', 'T', 'N', '*']: return False return True @@ -257,10 +397,11 @@ def is_indel(self): """ Return whether or not the variant is an INDEL """ is_sv = self.is_sv - if len(self.REF) > 1 and not is_sv: return True + if len(self.REF) > 1 and not is_sv: + return True for alt in self.ALT: if alt is None: - return True + return False if alt.type != "SNV" and alt.type != "MNV": return False elif len(alt) != len(self.REF): @@ -284,7 +425,8 @@ def is_sv(self): def is_transition(self): """ Return whether or not the SNP is a transition """ # if multiple alts, it is unclear if we have a transition - if len(self.ALT) > 1: return False + if len(self.ALT) > 1: + return False if self.is_snp: # just one alt allele @@ -294,24 +436,29 @@ def is_transition(self): (self.REF == "C" and alt_allele == "T") or (self.REF == "T" and alt_allele == "C")): return True - else: return False - else: return False + else: + return False + else: + return False @property def is_deletion(self): """ Return whether or not the INDEL is a deletion """ # if multiple alts, it is unclear if we have a transition - if len(self.ALT) > 1: return False + if len(self.ALT) > 1: + return False if self.is_indel: # just one alt allele alt_allele = self.ALT[0] if alt_allele is None: - return True + return False if len(self.REF) > len(alt_allele): return True - else: return False - else: return False + else: + return False + else: + return False @property def var_type(self): @@ -332,13 +479,14 @@ def var_type(self): def var_subtype(self): """ Return the subtype of variant. + - For SNPs and INDELs, yeild one of: [ts, tv, ins, del] - - For SVs yield either "complex" or the SV type defined - in the ALT fields (removing the brackets). - E.g.: - -> DEL - -> INS:ME:L1 - -> DUP + - For SVs yield either "complex" or the SV type defined in the ALT + fields (removing the brackets). E.g.:: + + -> DEL + -> INS:ME:L1 + -> DUP The logic is meant to follow the rules outlined in the following paragraph at: @@ -400,6 +548,15 @@ def is_monomorphic(self): """ Return True for reference calls """ return len(self.ALT) == 1 and self.ALT[0] is None + @property + def is_filtered(self): + """ Return True if a variant has been filtered """ + filt = self.FILTER + if filt is None or len(filt) == 0: # FILTER is not set or set to PASS + return False + else: + return True + class _AltRecord(object): '''An alternative allele record: either replacement string, SV placeholder, or breakend''' @@ -415,7 +572,7 @@ def __str__(self): raise NotImplementedError def __eq__(self, other): - return self.type == other.type + return self.type == getattr(other, 'type', None) class _Substitution(_AltRecord): @@ -441,8 +598,9 @@ def __len__(self): def __eq__(self, other): if isinstance(other, basestring): return self.sequence == other - else: - return super(_Substitution, self).__eq__(other) and self.sequence == other.sequence + elif not isinstance(other, self.__class__): + return False + return super(_Substitution, self).__eq__(other) and self.sequence == other.sequence class _Breakend(_AltRecord): @@ -451,9 +609,15 @@ class _Breakend(_AltRecord): def __init__(self, chr, pos, orientation, remoteOrientation, connectingSequence, withinMainAssembly, **kwargs): super(_Breakend, self).__init__(type="BND", **kwargs) #: The chromosome of breakend's mate. - self.chr = str(chr) + if chr is not None: + self.chr = str(chr) + else: + self.chr = None # Single breakend #: The coordinate of breakend's mate. - self.pos = int(pos) + if pos is not None: + self.pos = int(pos) + else: + self.pos = None #: The orientation of breakend's mate. If the sequence 3' of the breakend's mate is connected, True, else if the sequence 5' of the breakend's mate is connected, False. self.remoteOrientation = remoteOrientation #: If the breakend mate is within the assembly, True, else False if the breakend mate is on a contig in an ancillary assembly file. @@ -485,13 +649,15 @@ def __str__(self): return self.connectingSequence + remoteTag def __eq__(self, other): + if not isinstance(other, self.__class__): + return False return super(_Breakend, self).__eq__(other) \ - and self.chr == other.chr \ - and self.pos == other.pos \ - and self.remoteOrientation == other.remoteOrientation \ - and self.withinMainAssembly == other.withinMainAssembly \ - and self.orientation == other.orientation \ - and self.connectingSequence == other.connectingSequence + and self.chr == getattr(other, "chr", None) \ + and self.pos == getattr(other, "pos", None) \ + and self.remoteOrientation == getattr(other, "remoteOrientation", None) \ + and self.withinMainAssembly == getattr(other, "withinMainAssembly", None) \ + and self.orientation == getattr(other, "orientation", None) \ + and self.connectingSequence == getattr(other, "connectingSequence", None) class _SingleBreakend(_Breakend): @@ -528,4 +694,8 @@ def __str__(self): for (x, y) in zip(self._fields, self)]) return "CallData(" + dat + ')' + def __reduce__(self): + args = super(CallData, self).__reduce__() + return make_calldata_tuple, (fields, ) + return CallData diff --git a/vcf/parser.py b/vcf/parser.py index f274e9c..c3c3d08 100644 --- a/vcf/parser.py +++ b/vcf/parser.py @@ -1,10 +1,11 @@ +import codecs import collections -import re import csv import gzip -import sys import itertools -import codecs +import os +import re +import sys try: from collections import OrderedDict @@ -29,25 +30,30 @@ RESERVED_INFO = { 'AA': 'String', 'AC': 'Integer', 'AF': 'Float', 'AN': 'Integer', 'BQ': 'Float', 'CIGAR': 'String', 'DB': 'Flag', 'DP': 'Integer', - 'END': 'Integer', 'H2': 'Flag', 'MQ': 'Float', 'MQ0': 'Integer', - 'NS': 'Integer', 'SB': 'String', 'SOMATIC': 'Flag', 'VALIDATED': 'Flag', - - # VCF 4.1 Additions - 'IMPRECISE':'Flag', 'NOVEL':'Flag', 'END':'Integer', 'SVTYPE':'String', - 'CIPOS':'Integer','CIEND':'Integer','HOMLEN':'Integer','HOMSEQ':'Integer', - 'BKPTID':'String','MEINFO':'String','METRANS':'String','DGVID':'String', - 'DBVARID':'String','MATEID':'String','PARID':'String','EVENT':'String', - 'CILEN':'Integer','CN':'Integer','CNADJ':'Integer','CICN':'Integer', - 'CICNADJ':'Integer' + 'END': 'Integer', 'H2': 'Flag', 'H3': 'Flag', 'MQ': 'Float', + 'MQ0': 'Integer', 'NS': 'Integer', 'SB': 'String', 'SOMATIC': 'Flag', + 'VALIDATED': 'Flag', '1000G': 'Flag', + + # Keys used for structural variants + 'IMPRECISE': 'Flag', 'NOVEL': 'Flag', 'SVTYPE': 'String', + 'SVLEN': 'Integer', 'CIPOS': 'Integer', 'CIEND': 'Integer', + 'HOMLEN': 'Integer', 'HOMSEQ': 'String', 'BKPTID': 'String', + 'MEINFO': 'String', 'METRANS': 'String', 'DGVID': 'String', + 'DBVARID': 'String', 'DBRIPID': 'String', 'MATEID': 'String', + 'PARID': 'String', 'EVENT': 'String', 'CILEN': 'Integer', + 'DPADJ': 'Integer', 'CN': 'Integer', 'CNADJ': 'Integer', + 'CICN': 'Integer', 'CICNADJ': 'Integer' } RESERVED_FORMAT = { 'GT': 'String', 'DP': 'Integer', 'FT': 'String', 'GL': 'Float', - 'GQ': 'Float', 'HQ': 'Float', + 'GLE': 'String', 'PL': 'Integer', 'GP': 'Float', 'GQ': 'Integer', + 'HQ': 'Integer', 'PS': 'Integer', 'PQ': 'Integer', 'EC': 'Integer', + 'MQ': 'Integer', - # VCF 4.1 Additions - 'CN':'Integer','CNQ':'Float','CNL':'Float','NQ':'Integer','HAP':'Integer', - 'AHAP':'Integer' + # Keys used for structural variants + 'CN': 'Integer', 'CNQ': 'Float', 'CNL': 'Float', 'NQ': 'Integer', + 'HAP': 'Integer', 'AHAP': 'Integer' } # Spec is a bit weak on which metadata lines are singular, like fileformat @@ -57,47 +63,58 @@ # Conversion between value in file and Python value field_counts = { '.': None, # Unknown number of values - 'A': -1, # Equal to the number of alleles in a given record + 'A': -1, # Equal to the number of alternate alleles in a given record 'G': -2, # Equal to the number of genotypes in a given record + 'R': -3, # Equal to the number of alleles including reference in a given record } -_Info = collections.namedtuple('Info', ['id', 'num', 'type', 'desc']) +_Info = collections.namedtuple('Info', ['id', 'num', 'type', 'desc', 'source', 'version']) _Filter = collections.namedtuple('Filter', ['id', 'desc']) _Alt = collections.namedtuple('Alt', ['id', 'desc']) _Format = collections.namedtuple('Format', ['id', 'num', 'type', 'desc']) _SampleInfo = collections.namedtuple('SampleInfo', ['samples', 'gt_bases', 'gt_types', 'gt_phases']) +_Contig = collections.namedtuple('Contig', ['id', 'length']) class _vcf_metadata_parser(object): - '''Parse the metadat in the header of a VCF file.''' + '''Parse the metadata in the header of a VCF file.''' def __init__(self): super(_vcf_metadata_parser, self).__init__() self.info_pattern = re.compile(r'''\#\#INFO=< - ID=(?P[^,]+), - Number=(?P-?\d+|\.|[AG]), - Type=(?PInteger|Float|Flag|Character|String), + ID=(?P[^,]+),\s* + Number=(?P-?\d+|\.|[AGR])?,\s* + Type=(?PInteger|Float|Flag|Character|String),\s* Description="(?P[^"]*)" + (?:,\s*Source="(?P[^"]*)")? + (?:,\s*Version="?(?P[^"]*)"?)? >''', re.VERBOSE) self.filter_pattern = re.compile(r'''\#\#FILTER=< - ID=(?P[^,]+), + ID=(?P[^,]+),\s* Description="(?P[^"]*)" >''', re.VERBOSE) self.alt_pattern = re.compile(r'''\#\#ALT=< - ID=(?P[^,]+), + ID=(?P[^,]+),\s* Description="(?P[^"]*)" >''', re.VERBOSE) self.format_pattern = re.compile(r'''\#\#FORMAT=< - ID=(?P.+), - Number=(?P-?\d+|\.|[AG]), - Type=(?P.+), + ID=(?P.+),\s* + Number=(?P-?\d+|\.|[AGR]),\s* + Type=(?P.+),\s* Description="(?P.*)" >''', re.VERBOSE) + self.contig_pattern = re.compile(r'''\#\#contig=< + ID=(?P[^>,]+) + (,.*length=(?P-?\d+))? + .* + >''', re.VERBOSE) self.meta_pattern = re.compile(r'''##(?P.+?)=(?P.+)''') def vcf_field_count(self, num_str): """Cast vcf header numbers to integer or None""" - if num_str not in field_counts: + if num_str is None: + return None + elif num_str not in field_counts: # Fixed, specified number return int(num_str) else: @@ -113,7 +130,8 @@ def read_info(self, info_string): num = self.vcf_field_count(match.group('number')) info = _Info(match.group('id'), num, - match.group('type'), match.group('desc')) + match.group('type'), match.group('desc'), + match.group('source'), match.group('version')) return (match.group('id'), info) @@ -133,7 +151,7 @@ def read_alt(self, alt_string): match = self.alt_pattern.match(alt_string) if not match: raise SyntaxError( - "One of the FILTER lines is malformed: %s" % alt_string) + "One of the ALT lines is malformed: %s" % alt_string) alt = _Alt(match.group('id'), match.group('desc')) @@ -153,31 +171,82 @@ def read_format(self, format_string): return (match.group('id'), form) + def read_contig(self, contig_string): + '''Read a meta-contigrmation INFO line.''' + match = self.contig_pattern.match(contig_string) + if not match: + raise SyntaxError( + "One of the contig lines is malformed: %s" % contig_string) + length = self.vcf_field_count(match.group('length')) + contig = _Contig(match.group('id'), length) + return (match.group('id'), contig) + def read_meta_hash(self, meta_string): - items = re.split("[<>]", meta_string) - # Removing initial hash marks and final equal sign - key = items[0][2:-1] - hashItems = items[1].split(',') - val = dict(item.split("=") for item in hashItems) + # assert re.match("##.+=<", meta_string) + items = meta_string.split('=', 1) + # Removing initial hash marks + key = items[0].lstrip('#') + # N.B., items can have quoted values, so cannot just split on comma + val = OrderedDict() + state = 0 + k = '' + v = '' + for c in items[1].strip('[<>]'): + + if state == 0: # reading item key + if c == '=': + state = 1 # end of key, start reading value + else: + k += c # extend key + elif state == 1: # reading item value + if v == '' and c == '"': + v += c # include quote mark in value + state = 2 # start reading quoted value + elif c == ',': + val[k] = v # store parsed item + state = 0 # read next key + k = '' + v = '' + else: + v += c + elif state == 2: # reading quoted item value + if c == '"': + v += c # include quote mark in value + state = 1 # end quoting + else: + v += c + if k != '': + val[k] = v return key, val def read_meta(self, meta_string): if re.match("##.+=<", meta_string): return self.read_meta_hash(meta_string) - else: - match = self.meta_pattern.match(meta_string) - return match.group('key'), match.group('val') + match = self.meta_pattern.match(meta_string) + if not match: + # Spec only allows key=value, but we try to be liberal and + # interpret anything else as key=none (and all values are parsed + # as strings). + return meta_string.lstrip('#'), 'none' + return match.group('key'), match.group('val') class Reader(object): """ Reader for a VCF v 4.0 file, an iterator returning ``_Record objects`` """ - def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=False): + def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False, + strict_whitespace=False, encoding='ascii'): """ Create a new Reader for a VCF file. You must specify either fsock (stream) or filename. Gzipped streams or files are attempted to be recogized by the file extension, or gzipped can be forced with ``compressed=True`` + + 'prepend_chr=True' will put 'chr' before all the CHROM values, useful + for different sources. + + 'strict_whitespace=True' will split records on tabs only (as with VCF + spec) which allows you to parse files with spaces in the sample names. """ super(Reader, self).__init__() @@ -188,15 +257,25 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals self._reader = fsock if filename is None and hasattr(fsock, 'name'): filename = fsock.name - compressed = compressed or filename.endswith('.gz') + if compressed is None: + compressed = filename.endswith('.gz') elif filename: - compressed = compressed or filename.endswith('.gz') + if compressed is None: + compressed = filename.endswith('.gz') self._reader = open(filename, 'rb' if compressed else 'rt') self.filename = filename if compressed: self._reader = gzip.GzipFile(fileobj=self._reader) if sys.version > '3': - self._reader = codecs.getreader('ascii')(self._reader) + self._reader = codecs.getreader(encoding)(self._reader) + + if strict_whitespace: + self._separator = '\t' + else: + self._separator = '\t| +' + + self._row_pattern = re.compile(self._separator) + self._alt_pattern = re.compile('[\[\]]') self.reader = (line.strip() for line in self._reader if line.strip()) @@ -210,13 +289,17 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals self.alts = None #: FORMAT fields from header self.formats = None + #: contig fields from header + self.contigs = None self.samples = None self._sample_indexes = None self._header_lines = [] + self._column_headers = [] self._tabix = None self._prepend_chr = prepend_chr self._parse_metainfo() self._format_cache = {} + self.encoding = encoding def __iter__(self): return self @@ -226,12 +309,12 @@ def _parse_metainfo(self): The end user shouldn't have to use this. She can access the metainfo directly with ``self.metadata``.''' - for attr in ('metadata', 'infos', 'filters', 'alts', 'formats'): + for attr in ('metadata', 'infos', 'filters', 'alts', 'contigs', 'formats'): setattr(self, attr, OrderedDict()) parser = _vcf_metadata_parser() - line = self.reader.next() + line = next(self.reader) while line.startswith('##'): self._header_lines.append(line) @@ -251,6 +334,10 @@ def _parse_metainfo(self): key, val = parser.read_format(line) self.formats[key] = val + elif line.startswith('##contig'): + key, val = parser.read_contig(line) + self.contigs[key] = val + else: key, val = parser.read_meta(line) if key in SINGULAR_METADATA: @@ -260,17 +347,31 @@ def _parse_metainfo(self): self.metadata[key] = [] self.metadata[key].append(val) - line = self.reader.next() + line = next(self.reader) - fields = re.split('\t| +', line) + fields = self._row_pattern.split(line[1:]) + self._column_headers = fields[:9] self.samples = fields[9:] self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)]) - def _map(self, func, iterable, bad='.'): + def _map(self, func, iterable, bad=['.', '']): '''``map``, but make bad values None.''' - return [func(x) if x != bad else None + return [func(x) if x not in bad else None for x in iterable] + def _parse_filter(self, filt_str): + '''Parse the FILTER field of a VCF entry into a Python list + + NOTE: this method has a cython equivalent and care must be taken + to keep the two methods equivalent + ''' + if filt_str == '.': + return None + elif filt_str == 'PASS': + return [] + else: + return filt_str.split(';') + def _parse_info(self, info_str): '''Parse the INFO field of a VCF entry into a dictionary of Python types. @@ -280,10 +381,10 @@ def _parse_info(self, info_str): return {} entries = info_str.split(';') - retdict = OrderedDict() + retdict = {} for entry in entries: - entry = entry.split('=') + entry = entry.split('=', 1) ID = entry[0] try: entry_type = self.infos[ID].type @@ -298,20 +399,27 @@ def _parse_info(self, info_str): if entry_type == 'Integer': vals = entry[1].split(',') - val = self._map(int, vals) + try: + val = self._map(int, vals) + # Allow specified integers to be flexibly parsed as floats. + # Handles cases with incorrectly specified header types. + except ValueError: + val = self._map(float, vals) elif entry_type == 'Float': vals = entry[1].split(',') val = self._map(float, vals) elif entry_type == 'Flag': val = True - elif entry_type == 'String': + elif entry_type in ('String', 'Character'): try: - val = entry[1] + vals = entry[1].split(',') # commas are reserved characters indicating multiple values + val = self._map(str, vals) except IndexError: + entry_type = 'Flag' val = True try: - if self.infos[ID].num == 1 and entry_type != 'String': + if self.infos[ID].num == 1 and entry_type not in ( 'Flag', ): val = val[0] except KeyError: pass @@ -349,7 +457,6 @@ def _parse_samples(self, samples, samp_fmt, site): # check whether we already know how to parse this format if samp_fmt not in self._format_cache: self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt) - samp_fmt = self._format_cache[samp_fmt] if cparse: @@ -369,7 +476,14 @@ def _parse_samples(self, samples, samp_fmt, site): for i, vals in enumerate(sample.split(':')): # short circuit the most common - if vals == '.' or vals == './.': + if samp_fmt._fields[i] == 'GT': + sampdat[i] = vals + continue + # genotype filters are a special case + elif samp_fmt._fields[i] == 'FT': + sampdat[i] = self._parse_filter(vals) + continue + elif not vals or vals == ".": sampdat[i] = None continue @@ -377,24 +491,24 @@ def _parse_samples(self, samples, samp_fmt, site): entry_type = samp_fmt._types[i] # we don't need to split single entries - if entry_num == 1 or ',' not in vals: - + if entry_num == 1: if entry_type == 'Integer': - sampdat[i] = int(vals) - elif entry_type == 'Float': + try: + sampdat[i] = int(vals) + except ValueError: + sampdat[i] = float(vals) + elif entry_type == 'Float' or entry_type == 'Numeric': sampdat[i] = float(vals) else: sampdat[i] = vals - - if entry_num != 1: - sampdat[i] = (sampdat[i]) - continue vals = vals.split(',') - if entry_type == 'Integer': - sampdat[i] = _map(int, vals) + try: + sampdat[i] = _map(int, vals) + except ValueError: + sampdat[i] = _map(float, vals) elif entry_type == 'Float' or entry_type == 'Numeric': sampdat[i] = _map(float, vals) else: @@ -407,9 +521,9 @@ def _parse_samples(self, samples, samp_fmt, site): return samp_data def _parse_alt(self, str): - if re.search('[\[\]]', str) is not None: + if self._alt_pattern.search(str) is not None: # Paired breakend - items = re.split('[\[\]]', str) + items = self._alt_pattern.split(str) remoteCoords = items[1].split(':') chr = remoteCoords[0] if chr[0] == '<': @@ -436,8 +550,8 @@ def _parse_alt(self, str): def next(self): '''Return the next record in the file.''' - line = self.reader.next() - row = re.split('\t| +', line) + line = next(self.reader) + row = self._row_pattern.split(line.rstrip()) chrom = row[0] if self._prepend_chr: chrom = 'chr' + chrom @@ -459,17 +573,16 @@ def next(self): except ValueError: qual = None - filt = row[6] - if filt == 'PASS' or filt == '.': - filt = [] - else: - filt = filt.split(';') + filt = self._parse_filter(row[6]) info = self._parse_info(row[7]) try: fmt = row[8] except IndexError: fmt = None + else: + if fmt == '.': + fmt = None record = _Record(chrom, pos, ID, ref, alt, qual, filt, info, fmt, self._sample_indexes) @@ -480,50 +593,69 @@ def next(self): return record - def fetch(self, chrom, start, end=None): - """ fetch records from a Tabix indexed VCF, requires pysam - if start and end are specified, return iterator over positions - if end not specified, return individual ``_Call`` at start or None + def fetch(self, chrom, start=None, end=None): + """ Fetches records from a tabix-indexed VCF file and returns an + iterable of ``_Record`` instances + + chrom must be specified. + + The start and end coordinates are in the zero-based, + half-open coordinate system, similar to ``_Record.start`` and + ``_Record.end``. The very first base of a chromosome is + index 0, and the the region includes bases up to, but not + including the base at the end coordinate. For example + ``fetch('4', 10, 20)`` would include all variants + overlapping a 10 base pair region from the 11th base of + through the 20th base (which is at index 19) of chromosome + 4. It would not include the 21st base (at index 20). See + http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms + for more information on the zero-based, half-open coordinate + system. + + If end is omitted, all variants from start until the end of + the chromosome chrom will be included. + + If start and end are omitted, all variants on chrom will be + returned. + + requires pysam + """ if not pysam: raise Exception('pysam not available, try "pip install pysam"?') - if not self.filename: raise Exception('Please provide a filename (or a "normal" fsock)') if not self._tabix: - self._tabix = pysam.Tabixfile(self.filename) + self._tabix = pysam.Tabixfile(self.filename, + encoding=self.encoding) if self._prepend_chr and chrom[:3] == 'chr': chrom = chrom[3:] - # not sure why tabix needs position -1 - start = start - 1 - - if end is None: - self.reader = self._tabix.fetch(chrom, start, start + 1) - try: - return self.next() - except StopIteration: - return None - self.reader = self._tabix.fetch(chrom, start, end) return self class Writer(object): - """ VCF Writer """ - - fixed_fields = "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT".split() + """VCF Writer. On Windows Python 2, open stream with 'wb'.""" # Reverse keys and values in header field count dictionary counts = dict((v,k) for k,v in field_counts.iteritems()) - def __init__(self, stream, template, lineterminator="\r\n"): - self.writer = csv.writer(stream, delimiter="\t", lineterminator=lineterminator) + def __init__(self, stream, template, lineterminator="\n"): + self.writer = csv.writer(stream, delimiter="\t", + lineterminator=lineterminator, + quotechar='', quoting=csv.QUOTE_NONE) self.template = template self.stream = stream + # Order keys for INFO fields defined in the header (undefined fields + # get a maximum key). + self.info_order = collections.defaultdict( + lambda: len(template.infos), + dict(zip(template.infos.iterkeys(), itertools.count()))) + two = '##{key}=\n' four = '##{key}=\n' _num = self._fix_field_count @@ -531,7 +663,12 @@ def __init__(self, stream, template, lineterminator="\r\n"): if key in SINGULAR_METADATA: vals = [vals] for val in vals: - stream.write('##{0}={1}\n'.format(key, val)) + if isinstance(val, dict): + values = ','.join('{0}={1}'.format(key, value) + for key, value in val.items()) + stream.write('##{0}=<{1}>\n'.format(key, values)) + else: + stream.write('##{0}={1}\n'.format(key, val)) for line in template.infos.itervalues(): stream.write(four.format(key="INFO", *line, num=_num(line.num))) for line in template.formats.itervalues(): @@ -540,18 +677,26 @@ def __init__(self, stream, template, lineterminator="\r\n"): stream.write(two.format(key="FILTER", *line)) for line in template.alts.itervalues(): stream.write(two.format(key="ALT", *line)) + for line in template.contigs.itervalues(): + if line.length: + stream.write('##contig=\n'.format(*line)) + else: + stream.write('##contig=\n'.format(*line)) self._write_header() def _write_header(self): # TODO: write INFO, etc - self.writer.writerow(self.fixed_fields + self.template.samples) + self.stream.write('#' + '\t'.join(self.template._column_headers + + self.template.samples) + '\n') def write_record(self, record): """ write a record to the file """ ffs = self._map(str, [record.CHROM, record.POS, record.ID, record.REF]) \ + [self._format_alt(record.ALT), record.QUAL or '.', self._format_filter(record.FILTER), - self._format_info(record.INFO), record.FORMAT] + self._format_info(record.INFO)] + if record.FORMAT: + ffs.append(record.FORMAT) samples = [self._format_sample(record.FORMAT, sample) for sample in record.samples] @@ -589,12 +734,29 @@ def _format_filter(self, flt): def _format_info(self, info): if not info: return '.' - return ';'.join([self._stringify_pair(x,y) for x, y in info.iteritems()]) + def order_key(field): + # Order by header definition first, alphabetically second. + return self.info_order[field], field + return ';'.join(self._stringify_pair(f, info[f]) for f in + sorted(info, key=order_key)) def _format_sample(self, fmt, sample): - if getattr(sample.data, 'GT', None) is None: - return "./." - return ':'.join([self._stringify(x) for x in sample.data]) + if hasattr(sample.data, 'GT'): + gt = sample.data.GT + else: + gt = './.' if 'GT' in fmt else '' + + result = [gt] if gt else [] + # Following the VCF spec, GT is always the first item whenever it is present. + for field in sample.data._fields: + value = getattr(sample.data,field) + if field == 'GT': + continue + if field == 'FT': + result.append(self._format_filter(value)) + else: + result.append(self._stringify(value)) + return ':'.join(result) def _stringify(self, x, none='.', delim=','): if type(x) == type([]): diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py new file mode 100644 index 0000000..b156b45 --- /dev/null +++ b/vcf/sample_filter.py @@ -0,0 +1,115 @@ +# Author: Lenna X. Peterson +# github.com/lennax +# arklenna at gmail dot com + +import logging +import sys +import warnings + + +from parser import Reader, Writer + + +class SampleFilter(object): + """ + Modifies the vcf Reader to filter each row by sample as it is parsed. + + """ + + def __init__(self, infile, outfile=None, filters=None, invert=False): + # Methods to add to Reader + def get_filter(self): + return self._samp_filter + + def set_filter(self, filt): + self._samp_filter = filt + if filt: + self.samples = [val for idx, val in enumerate(self.samples) + if idx not in set(filt)] + + def filter_samples(fn): + """Decorator function to filter sample parameter""" + def filt(self, samples, *args): + samples = [val for idx, val in enumerate(samples) + if idx not in set(self.sample_filter)] + return fn(self, samples, *args) + return filt + + # Add property to Reader for filter list + Reader.sample_filter = property(get_filter, set_filter) + Reader._samp_filter = [] + # Modify Reader._parse_samples to filter samples + self._orig_parse_samples = Reader._parse_samples + Reader._parse_samples = filter_samples(Reader._parse_samples) + self.parser = Reader(filename=infile) + # Store initial samples and indices + self.samples = self.parser.samples + self.smp_idx = dict([(v, k) for k, v in enumerate(self.samples)]) + # Properties for filter/writer + self.outfile = outfile + self.invert = invert + self.filters = filters + if filters is not None: + self.set_filters() + self.write() + + def __del__(self): + try: + self._undo_monkey_patch() + except AttributeError: + pass + + def set_filters(self, filters=None, invert=False): + """Convert filters from string to list of indices, set on Reader""" + if filters is not None: + self.filters = filters + if invert: + self.invert = invert + filt_l = self.filters.split(",") + filt_s = set(filt_l) + if len(filt_s) < len(filt_l): + warnings.warn("Non-unique filters, ignoring", RuntimeWarning) + + def filt2idx(item): + """Convert filter to valid sample index""" + try: + item = int(item) + except ValueError: + # not an idx, check if it's a value + return self.smp_idx.get(item) + else: + # is int, check if it's an idx + if item < len(self.samples): + return item + filters = set(filter(lambda x: x is not None, map(filt2idx, filt_s))) + if len(filters) < len(filt_s): + # TODO print the filters that were ignored + warnings.warn("Invalid filters, ignoring", RuntimeWarning) + + if self.invert: + filters = set(xrange(len(self.samples))).difference(filters) + + # `sample_filter` setter updates `samples` + self.parser.sample_filter = filters + if len(self.parser.samples) == 0: + warnings.warn("Number of samples to keep is zero", RuntimeWarning) + logging.info("Keeping these samples: {0}\n".format(self.parser.samples)) + return self.parser.samples + + def write(self, outfile=None): + if outfile is not None: + self.outfile = outfile + if self.outfile is None: + _out = sys.stdout + elif hasattr(self.outfile, 'write'): + _out = self.outfile + else: + _out = open(self.outfile, "wb") + logging.info("Writing to '{0}'\n".format(self.outfile)) + writer = Writer(_out, self.parser) + for row in self.parser: + writer.write_record(row) + + def _undo_monkey_patch(self): + Reader._parse_samples = self._orig_parse_samples + delattr(Reader, 'sample_filter') diff --git a/vcf/test/1kg.sites.vcf b/vcf/test/1kg.sites.vcf new file mode 100644 index 0000000..857a944 --- /dev/null +++ b/vcf/test/1kg.sites.vcf @@ -0,0 +1,200 @@ +##fileformat=VCFv4.1 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=GRCh37 +#CHROM POS ID REF ALT QUAL FILTER INFO +1 10583 rs58108140 G A 100 PASS AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21 +1 10611 rs189107123 C G 100 PASS AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02 +1 13302 rs180734498 C T 100 PASS THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14 +1 13327 rs144762171 G C 100 PASS AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04 +1 13957 rs201747181 TC T 28 PASS AA=TC;AC=35;AF=0.02;AFR_AF=0.02;AMR_AF=0.02;AN=2184;ASN_AF=0.01;AVGPOST=0.8711;ERATE=0.0065;EUR_AF=0.02;LDAF=0.0788;RSQ=0.2501;THETA=0.0100;VT=INDEL +1 13980 rs151276478 T C 100 PASS AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02 +1 30923 rs140337953 G T 100 PASS AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73 +1 46402 rs199681827 C CTGT 31 PASS AA=.;AC=8;AF=0.0037;AFR_AF=0.01;AN=2184;ASN_AF=0.0017;AVGPOST=0.8325;ERATE=0.0072;LDAF=0.0903;RSQ=0.0960;THETA=0.0121;VT=INDEL +1 47190 rs200430748 G GA 192 PASS AA=G;AC=29;AF=0.01;AFR_AF=0.06;AMR_AF=0.0028;AN=2184;AVGPOST=0.9041;ERATE=0.0041;LDAF=0.0628;RSQ=0.2883;THETA=0.0153;VT=INDEL +1 51476 rs187298206 T C 100 PASS ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01 +1 51479 rs116400033 T A 100 PASS RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22 +1 51914 rs190452223 T G 100 PASS ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017 +1 51935 rs181754315 C T 100 PASS THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0 +1 51954 rs185832753 G C 100 PASS LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01 +1 52058 rs62637813 G C 100 PASS AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05 +1 52144 rs190291950 T A 100 PASS THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01 +1 52185 rs201374420 TTAA T 244 PASS AA=.;AC=10;AF=0.0046;AFR_AF=0.0020;AMR_AF=0.02;AN=2184;ASN_AF=0.0035;AVGPOST=0.9840;ERATE=0.0037;LDAF=0.0124;RSQ=0.4271;THETA=0.0232;VT=INDEL +1 52238 rs150021059 T G 100 PASS THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95 +1 53234 rs199502715 CAT C 227 PASS AA=CAT;AC=10;AF=0.0046;AFR_AF=0.02;AMR_AF=0.0028;AN=2184;AVGPOST=0.9936;ERATE=0.0007;LDAF=0.0074;RSQ=0.6237;THETA=0.0119;VT=INDEL +1 54353 rs140052487 C A 100 PASS THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013 +1 54421 rs146477069 A G 100 PASS ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02 +1 54490 rs141149254 G A 100 PASS ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15 +1 54676 rs2462492 C T 100 PASS LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18 +1 54753 rs143174675 T G 100 PASS AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03 +1 55164 rs3091274 C A 100 PASS AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96 +1 55249 rs200769871 C CTATGG 443 PASS AA=C;AC=151;AF=0.07;AFR_AF=0.03;AMR_AF=0.08;AN=2184;ASN_AF=0.16;AVGPOST=0.9073;ERATE=0.0063;EUR_AF=0.02;LDAF=0.0968;RSQ=0.5891;THETA=0.0038;VT=INDEL +1 55299 rs10399749 C T 100 PASS RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13 +1 55313 rs182462964 A T 100 PASS ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020 +1 55326 rs3107975 T C 100 PASS AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01 +1 55330 rs185215913 G A 100 PASS ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020 +1 55367 rs190850374 G A 100 PASS ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01 +1 55388 rs182711216 C T 100 PASS THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017 +1 55394 rs2949420 T A 100 PASS AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02 +1 55416 rs193242050 G A 100 PASS AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02 +1 55427 rs183189405 T C 100 PASS THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020 +1 55816 rs187434873 G A 100 PASS AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01 +1 55850 rs191890754 C G 100 PASS AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01 +1 55852 rs184233019 G C 100 PASS THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013 +1 56644 rs143342222 A C 100 PASS AN=2184;AVGPOST=0.9962;LDAF=0.0040;ERATE=0.0024;VT=SNP;AA=A;RSQ=0.5700;AC=5;SNPSOURCE=LOWCOV;THETA=0.0117;AF=0.0023;AFR_AF=0.01 +1 57952 rs189727433 A C 100 PASS AA=C;ERATE=0.0085;AN=2184;LDAF=0.7878;VT=SNP;THETA=0.0076;RSQ=0.4712;AC=1902;SNPSOURCE=LOWCOV;AVGPOST=0.7578;AF=0.87;ASN_AF=0.98;AMR_AF=0.91;AFR_AF=0.64;EUR_AF=0.91 +1 58814 rs114420996 G A 100 PASS AC=223;THETA=0.0032;AA=G;AN=2184;RSQ=0.9087;LDAF=0.1074;VT=SNP;SNPSOURCE=LOWCOV;ERATE=0.0006;AVGPOST=0.9777;AF=0.10;ASN_AF=0.03;AMR_AF=0.17;AFR_AF=0.20;EUR_AF=0.06 +1 59040 rs149755937 T C 100 PASS AVGPOST=0.9710;AC=115;AA=T;AN=2184;RSQ=0.8248;VT=SNP;ERATE=0.0017;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.0613;AF=0.05;ASN_AF=0.03;AMR_AF=0.15;AFR_AF=0.0041;EUR_AF=0.06 +1 60726 rs192328835 C A 100 PASS AVGPOST=0.9092;AN=2184;RSQ=0.5988;ERATE=0.0081;AC=144;VT=SNP;THETA=0.0045;AA=A;SNPSOURCE=LOWCOV;LDAF=0.0959;AF=0.07;ASN_AF=0.05;AMR_AF=0.10;AFR_AF=0.11;EUR_AF=0.03 +1 61442 rs74970982 A G 100 PASS LDAF=0.9152;AA=G;AN=2184;VT=SNP;ERATE=0.0026;RSQ=0.4867;AVGPOST=0.9004;SNPSOURCE=LOWCOV;THETA=0.0013;AC=2084;AF=0.95;ASN_AF=1.00;AMR_AF=0.97;AFR_AF=0.84;EUR_AF=0.99 +1 61462 rs56992750 T A 100 PASS THETA=0.0023;LDAF=0.0378;RSQ=0.7396;AA=T;AN=2184;AVGPOST=0.9773;VT=SNP;AC=68;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.03;AMR_AF=0.02;AFR_AF=0.13 +1 61743 rs184286948 G C 100 PASS AVGPOST=0.9939;LDAF=0.0047;AA=G;AN=2184;VT=SNP;ERATE=0.0011;SNPSOURCE=LOWCOV;AC=4;THETA=0.0016;RSQ=0.4838;AF=0.0018;AMR_AF=0.01;EUR_AF=0.0026 +1 61987 rs76735897 A G 100 PASS THETA=0.0015;AN=2184;AC=569;VT=SNP;AA=A;RSQ=0.7192;AVGPOST=0.8533;LDAF=0.2944;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.26;ASN_AF=0.07;AMR_AF=0.31;AFR_AF=0.25;EUR_AF=0.39 +1 61989 rs77573425 G C 100 PASS RSQ=0.7254;AVGPOST=0.8584;AA=G;AN=2184;LDAF=0.2849;VT=SNP;AC=555;THETA=0.0019;SNPSOURCE=LOWCOV;ERATE=0.0007;AF=0.25;ASN_AF=0.07;AMR_AF=0.31;AFR_AF=0.22;EUR_AF=0.39 +1 61993 rs190553843 C T 100 PASS AC=7;RSQ=0.6106;AA=C;THETA=0.0143;AN=2184;ERATE=0.0009;VT=SNP;AVGPOST=0.9953;SNPSOURCE=LOWCOV;LDAF=0.0050;AF=0.0032;AFR_AF=0.01 +1 62156 rs181864839 C T 100 PASS ERATE=0.0005;AA=C;AN=2184;AVGPOST=0.9979;LDAF=0.0015;VT=SNP;THETA=0.0094;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4561;AF=0.0005;AFR_AF=0.0020 +1 62157 rs10399597 G A 100 PASS AVGPOST=0.9945;AA=G;AN=2184;ERATE=0.0025;VT=SNP;RSQ=0.5217;AC=5;THETA=0.0066;SNPSOURCE=LOWCOV;LDAF=0.0050;AF=0.0023;AFR_AF=0.01 +1 62162 rs140556834 G A 100 PASS AA=G;AN=2184;AC=8;LDAF=0.0057;VT=SNP;THETA=0.0018;ERATE=0.0017;RSQ=0.6089;AVGPOST=0.9948;SNPSOURCE=LOWCOV;AF=0.0037;AMR_AF=0.0028;AFR_AF=0.01;EUR_AF=0.0013 +1 63276 rs185977555 G A 100 PASS RSQ=0.2744;AA=G;AN=2184;AVGPOST=0.9947;VT=SNP;ERATE=0.0010;SNPSOURCE=LOWCOV;AC=1;THETA=0.0010;LDAF=0.0031;AF=0.0005;AFR_AF=0.0020 +1 63297 rs188886746 G A 100 PASS ERATE=0.0005;AVGPOST=0.9986;AA=G;AN=2184;VT=SNP;AC=0;SNPSOURCE=LOWCOV;RSQ=0.2459;THETA=0.0024;LDAF=0.0008;AF=0 +1 63671 rs116440577 G A 100 PASS AA=G;AN=2184;ERATE=0.0047;LDAF=0.1773;VT=SNP;THETA=0.0072;AC=369;SNPSOURCE=LOWCOV;RSQ=0.8980;AVGPOST=0.9652;AF=0.17;ASN_AF=0.05;AMR_AF=0.22;AFR_AF=0.35;EUR_AF=0.11 +1 63735 rs201888535 CCTA C 455 PASS AA=CCTA;AC=829;AF=0.38;AFR_AF=0.13;AMR_AF=0.33;AN=2184;ASN_AF=0.69;AVGPOST=0.7654;ERATE=0.0047;EUR_AF=0.34;LDAF=0.4128;RSQ=0.6424;THETA=0.0062;VT=INDEL +1 64649 rs181431124 A C 100 PASS RSQ=0.6975;AN=2184;VT=SNP;AA=.;ERATE=0.0008;AVGPOST=0.9918;SNPSOURCE=LOWCOV;AC=21;THETA=0.0024;LDAF=0.0114;AF=0.01;AMR_AF=0.01;EUR_AF=0.03 +1 66162 rs62639105 A T 100 PASS THETA=0.0026;ERATE=0.0166;LDAF=0.3089;AN=2184;VT=SNP;AA=.;AC=544;SNPSOURCE=LOWCOV;RSQ=0.5681;AVGPOST=0.7777;AF=0.25;ASN_AF=0.07;AMR_AF=0.30;AFR_AF=0.23;EUR_AF=0.38 +1 66176 rs28552463 T A 100 PASS AN=2184;RSQ=0.4451;VT=SNP;AA=.;THETA=0.0095;LDAF=0.0631;AC=70;SNPSOURCE=LOWCOV;ERATE=0.0061;AVGPOST=0.9210;AF=0.03;ASN_AF=0.0017;AMR_AF=0.01;AFR_AF=0.13;EUR_AF=0.0013 +1 66219 rs181028663 A T 100 PASS LDAF=0.1137;ERATE=0.0074;AN=2184;VT=SNP;AA=.;AC=68;THETA=0.0059;RSQ=0.2946;AVGPOST=0.8268;SNPSOURCE=LOWCOV;AF=0.03;ASN_AF=0.08;AMR_AF=0.04;AFR_AF=0.01;EUR_AF=0.01 +1 66331 rs186063952 A C 100 PASS THETA=0.0126;AVGPOST=0.7656;RSQ=0.1616;AN=2184;LDAF=0.1387;ERATE=0.0093;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=42;AF=0.02;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.07 +1 66442 rs192044252 T A 100 PASS RSQ=0.1763;AVGPOST=0.7894;AN=2184;THETA=0.0031;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=36;ERATE=0.0107;LDAF=0.1241;AF=0.02;ASN_AF=0.0035;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.01 +1 66457 rs13328655 T A 100 PASS ERATE=0.0085;AN=2184;VT=SNP;AA=.;AC=31;AVGPOST=0.8340;LDAF=0.0957;RSQ=0.1836;SNPSOURCE=LOWCOV;THETA=0.0024;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.03;EUR_AF=0.01 +1 66507 rs12401368 T A 100 PASS ERATE=0.0197;AN=2184;VT=SNP;AA=.;THETA=0.0122;SNPSOURCE=LOWCOV;AC=170;RSQ=0.2110;LDAF=0.2457;AVGPOST=0.6536;AF=0.08;ASN_AF=0.07;AMR_AF=0.09;AFR_AF=0.05;EUR_AF=0.09 +1 67179 rs149952626 C G 100 PASS AVGPOST=0.9946;AN=2184;VT=SNP;AA=.;THETA=0.0046;SNPSOURCE=LOWCOV;ERATE=0.0012;AC=11;RSQ=0.6333;LDAF=0.0069;AF=0.01;ASN_AF=0.02 +1 67181 rs77662731 A G 100 PASS AVGPOST=0.9817;THETA=0.0096;ERATE=0.0013;AN=2184;RSQ=0.8542;AC=104;LDAF=0.0529;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AF=0.05;AMR_AF=0.02;AFR_AF=0.20 +1 69511 rs75062661 A G 100 PASS LDAF=0.6051;AC=1424;ERATE=0.0237;AN=2184;RSQ=0.5669;VT=SNP;AA=.;AVGPOST=0.7173;SNPSOURCE=LOWCOV;THETA=0.0052;AF=0.65;ASN_AF=0.87;AMR_AF=0.65;AFR_AF=0.33;EUR_AF=0.70 +1 69534 rs190717287 T C 100 PASS AVGPOST=0.9986;LDAF=0.0013;AN=2184;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4002;THETA=0.0016;ERATE=0.0006;AF=0.0005;ASN_AF=0.0017 +1 69536 rs200013390 C T 100 PASS AA=.;AC=0;AF=0;AN=2184;AVGPOST=0.9986;ERATE=0.0006;LDAF=0.0008;RSQ=0.0677;SNPSOURCE=EXOME;THETA=0.0087;VT=SNP +1 72119 rs199639004 G GTA 158 PASS AA=.;AC=8;AF=0.0037;AMR_AF=0.0028;AN=2184;ASN_AF=0.01;AVGPOST=0.9589;ERATE=0.0026;EUR_AF=0.0013;LDAF=0.0243;RSQ=0.2268;THETA=0.0016;VT=INDEL +1 72148 rs182862337 C T 100 PASS AN=2184;RSQ=0.2794;THETA=0.0130;VT=SNP;AA=.;LDAF=0.0019;AVGPOST=0.9971;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0007;AF=0.0005;AMR_AF=0.0028 +1 72297 rs200651397 G GTAT 160 PASS AA=G;AC=19;AF=0.01;AMR_AF=0.02;AN=2184;ASN_AF=0.01;AVGPOST=0.9383;ERATE=0.0055;EUR_AF=0.01;LDAF=0.0399;RSQ=0.3194;THETA=0.0064;VT=INDEL +1 73841 rs143773730 C T 100 PASS ERATE=0.0303;THETA=0.0044;AN=2184;AVGPOST=0.8178;RSQ=0.5832;VT=SNP;AA=.;SNPSOURCE=LOWCOV;LDAF=0.2588;AC=425;AF=0.19;ASN_AF=0.15;AMR_AF=0.22;AFR_AF=0.17;EUR_AF=0.23 +1 77462 rs188023513 G A 100 PASS LDAF=0.1685;AN=2184;AVGPOST=0.8149;VT=SNP;AA=.;RSQ=0.4624;AC=198;THETA=0.0100;SNPSOURCE=LOWCOV;ERATE=0.0222;AF=0.09;ASN_AF=0.11;AMR_AF=0.12;AFR_AF=0.08;EUR_AF=0.07 +1 77470 rs192898053 T C 100 PASS LDAF=0.0047;AN=2184;VT=SNP;AA=.;ERATE=0.0011;AVGPOST=0.9918;THETA=0.0025;RSQ=0.1818;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;AFR_AF=0.0020 +1 77874 rs184538873 G A 100 PASS THETA=0.0068;LDAF=0.0516;AN=2184;VT=SNP;AA=.;AVGPOST=0.9594;ERATE=0.0011;AC=87;SNPSOURCE=LOWCOV;RSQ=0.6970;AF=0.04;ASN_AF=0.01;AMR_AF=0.12;AFR_AF=0.0041;EUR_AF=0.04 +1 77961 rs78385339 G A 100 PASS AVGPOST=0.9114;AN=2184;VT=SNP;AA=.;THETA=0.0072;RSQ=0.6667;ERATE=0.0011;SNPSOURCE=LOWCOV;AC=192;LDAF=0.1180;AF=0.09;ASN_AF=0.20;AMR_AF=0.14;AFR_AF=0.01;EUR_AF=0.03 +1 79033 rs62641298 A G 100 PASS AVGPOST=0.7371;THETA=0.0022;LDAF=0.7962;AN=2184;ERATE=0.0054;VT=SNP;AA=.;AC=1961;SNPSOURCE=LOWCOV;RSQ=0.3963;AF=0.90;ASN_AF=0.98;AMR_AF=0.95;AFR_AF=0.65;EUR_AF=0.97 +1 79050 rs62641299 G T 100 PASS AC=1871;AN=2184;THETA=0.0031;RSQ=0.3928;VT=SNP;AA=.;AVGPOST=0.6803;SNPSOURCE=LOWCOV;LDAF=0.7318;ERATE=0.0107;AF=0.86;ASN_AF=0.98;AMR_AF=0.93;AFR_AF=0.54;EUR_AF=0.94 +1 79137 rs143777184 A T 100 PASS AN=2184;AC=55;ERATE=0.0009;AVGPOST=0.9773;LDAF=0.0324;VT=SNP;AA=.;THETA=0.0091;SNPSOURCE=LOWCOV;RSQ=0.7309;AF=0.03;AMR_AF=0.01;AFR_AF=0.10 +1 79417 rs184768190 C T 100 PASS ERATE=0.0005;THETA=0.0166;AN=2184;RSQ=0.5026;AVGPOST=0.9975;VT=SNP;AA=.;LDAF=0.0022;SNPSOURCE=LOWCOV;AC=2;AF=0.0009;ASN_AF=0.0035 +1 79772 rs147215883 C G 100 PASS LDAF=0.1066;AN=2184;THETA=0.0138;RSQ=0.7199;VT=SNP;AA=.;AVGPOST=0.9271;AC=176;ERATE=0.0011;SNPSOURCE=LOWCOV;AF=0.08;ASN_AF=0.07;AMR_AF=0.06;AFR_AF=0.10;EUR_AF=0.09 +1 79872 rs189224661 T G 100 PASS THETA=0.0054;AN=2184;LDAF=0.0057;VT=SNP;AA=.;ERATE=0.0017;AC=9;AVGPOST=0.9956;SNPSOURCE=LOWCOV;RSQ=0.6548;AF=0.0041;AFR_AF=0.02 +1 80454 rs144226842 G C 100 PASS RSQ=0.6549;LDAF=0.0035;AN=2184;AVGPOST=0.9975;VT=SNP;AA=.;AC=5;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0015;AF=0.0023;ASN_AF=0.01 +1 81949 rs181567186 T C 100 PASS AN=2184;ERATE=0.0009;VT=SNP;AA=.;AVGPOST=0.9948;LDAF=0.0030;SNPSOURCE=LOWCOV;AC=1;THETA=0.0052;RSQ=0.2129;AF=0.0005;ASN_AF=0.0017 +1 82163 rs139113303 G A 100 PASS AN=2184;LDAF=0.0375;ERATE=0.0009;VT=SNP;AA=.;RSQ=0.7842;AC=66;THETA=0.0053;SNPSOURCE=LOWCOV;AVGPOST=0.9761;AF=0.03;ASN_AF=0.0017;AMR_AF=0.01;AFR_AF=0.0020;EUR_AF=0.08 +1 82249 rs1851945 A G 100 PASS THETA=0.0137;LDAF=0.0712;AVGPOST=0.9150;AN=2184;VT=SNP;AA=.;RSQ=0.4689;AC=75;ERATE=0.0116;SNPSOURCE=LOWCOV;AF=0.03;ASN_AF=0.03;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.04 +1 82609 rs149189449 C G 100 PASS ERATE=0.0005;AN=2184;LDAF=0.0364;VT=SNP;AA=.;AC=68;AVGPOST=0.9822;RSQ=0.8408;SNPSOURCE=LOWCOV;THETA=0.0024;AF=0.03;AMR_AF=0.02;AFR_AF=0.0020;EUR_AF=0.08 +1 82676 rs185237834 T G 100 PASS LDAF=0.1144;AN=2184;AVGPOST=0.9264;VT=SNP;AA=.;RSQ=0.7176;AC=198;THETA=0.0025;SNPSOURCE=LOWCOV;ERATE=0.0056;AF=0.09;ASN_AF=0.07;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.10 +1 82734 rs4030331 T C 100 PASS AN=2184;THETA=0.0008;VT=SNP;AA=.;ERATE=0.0158;RSQ=0.6316;AVGPOST=0.8280;LDAF=0.2433;SNPSOURCE=LOWCOV;AC=435;AF=0.20;ASN_AF=0.15;AMR_AF=0.28;AFR_AF=0.24;EUR_AF=0.17 +1 82957 rs189774606 C T 100 PASS RSQ=0.5163;AN=2184;VT=SNP;AA=.;LDAF=0.0072;THETA=0.0028;AC=9;AVGPOST=0.9918;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.0041;AMR_AF=0.01;AFR_AF=0.01 +1 83084 rs181193408 T A 100 PASS AN=2184;AVGPOST=0.8261;VT=SNP;AA=.;RSQ=0.5750;AC=1914;LDAF=0.8278;SNPSOURCE=LOWCOV;ERATE=0.0061;THETA=0.0064;AF=0.88;ASN_AF=0.99;AMR_AF=0.92;AFR_AF=0.58;EUR_AF=0.96 +1 83088 rs186081601 G C 100 PASS ERATE=0.0013;AN=2184;LDAF=0.0043;VT=SNP;AA=.;AVGPOST=0.9922;RSQ=0.1618;THETA=0.0019;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;AFR_AF=0.0020 +1 83771 rs189906733 T G 100 PASS RSQ=0.6473;AN=2184;AVGPOST=0.9871;VT=SNP;AA=.;AC=24;ERATE=0.0011;SNPSOURCE=LOWCOV;THETA=0.0043;LDAF=0.0158;AF=0.01;AMR_AF=0.01;AFR_AF=0.04;EUR_AF=0.0013 +1 83977 rs180759811 A G 100 PASS AN=2184;ERATE=0.0009;VT=SNP;AA=.;THETA=0.0059;LDAF=0.0038;RSQ=0.2074;SNPSOURCE=LOWCOV;AC=1;AVGPOST=0.9932;AF=0.0005;AFR_AF=0.0020 +1 84002 rs28850140 G A 100 PASS THETA=0.0050;ERATE=0.0211;AN=2184;AC=236;VT=SNP;AA=.;AVGPOST=0.8144;LDAF=0.1921;SNPSOURCE=LOWCOV;RSQ=0.4810;AF=0.11;ASN_AF=0.12;AMR_AF=0.15;AFR_AF=0.07;EUR_AF=0.11 +1 84005 rs202079949 AG A 78 PASS AA=.;AC=52;AF=0.02;AFR_AF=0.02;AMR_AF=0.03;AN=2184;ASN_AF=0.01;AVGPOST=0.9360;ERATE=0.0049;EUR_AF=0.04;LDAF=0.0514;RSQ=0.4690;THETA=0.0005;VT=INDEL +1 84010 rs186443818 G A 100 PASS AVGPOST=0.9169;AN=2184;VT=SNP;AA=.;AC=97;THETA=0.0087;LDAF=0.0789;SNPSOURCE=LOWCOV;ERATE=0.0061;RSQ=0.5318;AF=0.04;ASN_AF=0.03;AMR_AF=0.05;AFR_AF=0.03;EUR_AF=0.06 +1 84079 rs190867312 T C 100 PASS ERATE=0.0021;AN=2184;AC=6;VT=SNP;AA=.;LDAF=0.0049;AVGPOST=0.9956;RSQ=0.5906;SNPSOURCE=LOWCOV;THETA=0.0016;AF=0.0027;AMR_AF=0.0028;AFR_AF=0.01 +1 84139 rs183605470 A T 100 PASS THETA=0.0023;AC=28;AN=2184;RSQ=0.6469;VT=SNP;AA=.;LDAF=0.0180;SNPSOURCE=LOWCOV;AVGPOST=0.9835;ERATE=0.0006;AF=0.01;ASN_AF=0.0017;AMR_AF=0.07;AFR_AF=0.0041 +1 84156 rs188652299 A C 100 PASS THETA=0.0009;AVGPOST=0.9936;ERATE=0.0014;RSQ=0.3359;AN=2184;VT=SNP;AA=.;LDAF=0.0044;SNPSOURCE=LOWCOV;AC=3;AF=0.0014;AMR_AF=0.0028;AFR_AF=0.0041 +1 84244 rs191297051 A C 100 PASS LDAF=0.1204;AN=2184;VT=SNP;AA=.;AVGPOST=0.9398;RSQ=0.7828;THETA=0.0025;ERATE=0.0018;SNPSOURCE=LOWCOV;AC=222;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.14;EUR_AF=0.11 +1 84295 rs183209871 G A 100 PASS LDAF=0.0067;AVGPOST=0.9946;AN=2184;THETA=0.0038;VT=SNP;AA=.;AC=9;SNPSOURCE=LOWCOV;ERATE=0.0007;RSQ=0.6599;AF=0.0041;AMR_AF=0.01;EUR_AF=0.01 +1 84346 rs187855973 T C 100 PASS THETA=0.0044;AN=2184;AVGPOST=0.9981;VT=SNP;AA=.;LDAF=0.0014;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0007;RSQ=0.3659;AF=0.0005;EUR_AF=0.0013 +1 84453 rs191379015 C G 100 PASS LDAF=0.0021;RSQ=0.2866;AN=2184;VT=SNP;AA=.;THETA=0.0018;ERATE=0.0008;SNPSOURCE=LOWCOV;AC=1;AVGPOST=0.9968;AF=0.0005;AMR_AF=0.0028 +1 84705 rs183470350 T G 100 PASS LDAF=0.0033;AVGPOST=0.9943;AN=2184;VT=SNP;AA=.;THETA=0.0030;RSQ=0.2658;SNPSOURCE=LOWCOV;AC=2;ERATE=0.0007;AF=0.0009;AMR_AF=0.0028;EUR_AF=0.0013 +1 85063 rs187802690 T C 100 PASS THETA=0.0093;AN=2184;VT=SNP;AA=.;ERATE=0.0051;LDAF=0.0255;RSQ=0.6868;AVGPOST=0.9806;AC=38;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02 +1 85597 rs192472955 A C 100 PASS AC=145;AVGPOST=0.9322;AN=2184;LDAF=0.0880;VT=SNP;AA=.;RSQ=0.6993;SNPSOURCE=LOWCOV;THETA=0.0020;ERATE=0.0022;AF=0.07;AMR_AF=0.07;AFR_AF=0.11;EUR_AF=0.09 +1 85622 rs185273034 A T 100 PASS ERATE=0.0005;AVGPOST=0.9963;AN=2184;RSQ=0.5194;VT=SNP;AA=.;THETA=0.0174;LDAF=0.0034;SNPSOURCE=LOWCOV;AC=4;AF=0.0018;AFR_AF=0.01 +1 85892 rs147185795 A G 100 PASS AVGPOST=0.9936;RSQ=0.7759;AN=2184;VT=SNP;AA=.;LDAF=0.0122;SNPSOURCE=LOWCOV;AC=21;THETA=0.0116;ERATE=0.0007;AF=0.01;AMR_AF=0.0028;AFR_AF=0.04 +1 86000 rs140628094 A C 100 PASS AN=2184;LDAF=0.0062;VT=SNP;AA=.;AC=10;THETA=0.0018;ERATE=0.0008;RSQ=0.7700;SNPSOURCE=LOWCOV;AVGPOST=0.9968;AF=0.0046;AFR_AF=0.02 +1 86018 rs142878000 C G 100 PASS ERATE=0.0036;RSQ=0.7867;AVGPOST=0.9429;AN=2184;AC=213;LDAF=0.1166;VT=SNP;AA=.;THETA=0.0030;SNPSOURCE=LOWCOV;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.11 +1 86028 rs114608975 T C 100 PASS ERATE=0.0005;AC=73;AN=2184;RSQ=0.8713;VT=SNP;AA=.;THETA=0.0108;SNPSOURCE=LOWCOV;AVGPOST=0.9841;LDAF=0.0388;AF=0.03;AMR_AF=0.02;AFR_AF=0.0041;EUR_AF=0.08 +1 86064 rs190167736 G A 100 PASS ERATE=0.0004;AN=2184;VT=SNP;AA=.;THETA=0.0081;SNPSOURCE=LOWCOV;AC=1;RSQ=0.5628;AVGPOST=0.9992;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020 +1 86065 rs116504101 G C 100 PASS ERATE=0.0005;LDAF=0.0398;AN=2184;AVGPOST=0.9846;VT=SNP;AA=.;AC=76;THETA=0.0057;RSQ=0.8725;SNPSOURCE=LOWCOV;AF=0.03;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.09 +1 86282 rs192830046 T G 100 PASS LDAF=0.0036;AN=2184;VT=SNP;AA=.;SNPSOURCE=LOWCOV;ERATE=0.0012;RSQ=0.2764;AVGPOST=0.9941;AC=2;THETA=0.0034;AF=0.0009;AMR_AF=0.0028;EUR_AF=0.0013 +1 86303 rs2949417 G T 100 PASS THETA=0.0021;RSQ=0.8008;LDAF=0.1194;AN=2184;AC=214;VT=SNP;AA=.;AVGPOST=0.9465;SNPSOURCE=LOWCOV;ERATE=0.0007;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.11 +1 86331 rs115209712 A G 100 PASS THETA=0.0047;AN=2184;VT=SNP;AA=.;AC=216;LDAF=0.1195;RSQ=0.8119;ERATE=0.0008;AVGPOST=0.9495;SNPSOURCE=LOWCOV;AF=0.10;ASN_AF=0.08;AMR_AF=0.08;AFR_AF=0.12;EUR_AF=0.11 +1 86982 rs184970101 G A 100 PASS THETA=0.0050;AN=2184;AVGPOST=0.9979;LDAF=0.0015;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0006;RSQ=0.3541;AF=0.0005;AFR_AF=0.0020 +1 87021 rs188486692 T C 100 PASS AN=2184;RSQ=0.4348;VT=SNP;AA=.;THETA=0.0112;AVGPOST=0.9687;ERATE=0.0011;SNPSOURCE=LOWCOV;AC=19;LDAF=0.0221;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02 +1 87114 rs200095900 CT C 192 PASS AA=.;AC=8;AF=0.0037;AFR_AF=0.02;AN=2184;AVGPOST=0.9976;ERATE=0.0010;LDAF=0.0042;RSQ=0.7479;THETA=0.0149;VT=INDEL +1 87190 rs1524602 G A 100 PASS AN=2184;LDAF=0.2822;VT=SNP;AA=.;RSQ=0.7549;THETA=0.0148;AC=540;SNPSOURCE=LOWCOV;ERATE=0.0096;AVGPOST=0.8739;AF=0.25;ASN_AF=0.29;AMR_AF=0.35;AFR_AF=0.38;EUR_AF=0.08 +1 87360 rs180907504 C T 100 PASS THETA=0.0014;AN=2184;ERATE=0.0025;RSQ=0.3869;VT=SNP;AA=.;AC=14;LDAF=0.0170;SNPSOURCE=LOWCOV;AVGPOST=0.9768;AF=0.01;ASN_AF=0.02;AMR_AF=0.0028;AFR_AF=0.01 +1 87409 rs139490478 C T 100 PASS AN=2184;AC=80;RSQ=0.8364;AVGPOST=0.9797;THETA=0.0075;VT=SNP;AA=.;ERATE=0.0011;SNPSOURCE=LOWCOV;LDAF=0.0438;AF=0.04;ASN_AF=0.0017;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.09 +1 87590 rs185279164 G A 100 PASS THETA=0.0068;ERATE=0.0005;AN=2184;VT=SNP;AA=.;LDAF=0.0026;RSQ=0.6866;SNPSOURCE=LOWCOV;AC=4;AVGPOST=0.9982;AF=0.0018;AFR_AF=0.01 +1 87647 rs146836579 T C 100 PASS AN=2184;AC=111;THETA=0.0041;VT=SNP;AA=.;LDAF=0.0558;AVGPOST=0.9811;SNPSOURCE=LOWCOV;ERATE=0.0015;RSQ=0.8636;AF=0.05;AMR_AF=0.03;AFR_AF=0.20 +1 87755 rs140735660 G A 100 PASS ERATE=0.0027;RSQ=0.5060;AN=2184;AC=16;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AVGPOST=0.9847;LDAF=0.0138;THETA=0.0069;AF=0.01;AFR_AF=0.03 +1 87970 rs189643077 T C 100 PASS ERATE=0.0005;AN=2184;RSQ=0.5846;VT=SNP;AA=.;AVGPOST=0.9976;THETA=0.0053;LDAF=0.0025;SNPSOURCE=LOWCOV;AC=3;AF=0.0014;AFR_AF=0.01 +1 87978 rs182297743 G A 100 PASS AVGPOST=0.9963;THETA=0.0074;AN=2184;VT=SNP;AA=.;LDAF=0.0023;RSQ=0.2883;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0006;AF=0.0005;AMR_AF=0.0028 +1 88136 rs59529791 G A 100 PASS RSQ=0.8406;AN=2184;VT=SNP;AA=.;ERATE=0.0010;THETA=0.0059;AVGPOST=0.9778;SNPSOURCE=LOWCOV;AC=106;LDAF=0.0548;AF=0.05;AMR_AF=0.03;AFR_AF=0.20 +1 88169 rs940550 C T 100 PASS RSQ=0.7811;AN=2184;VT=SNP;AA=.;THETA=0.0055;LDAF=0.2576;ERATE=0.0018;SNPSOURCE=LOWCOV;AC=506;AVGPOST=0.8932;AF=0.23;ASN_AF=0.29;AMR_AF=0.33;AFR_AF=0.33;EUR_AF=0.08 +1 88172 rs940551 G A 100 PASS RSQ=0.7703;AVGPOST=0.9669;LDAF=0.0483;AN=2184;ERATE=0.0009;VT=SNP;AA=.;THETA=0.0027;SNPSOURCE=LOWCOV;AC=86;AF=0.04;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.09 +1 88177 rs143215837 G C 100 PASS ERATE=0.0004;AN=2184;LDAF=0.0456;VT=SNP;AA=.;AVGPOST=0.9686;AC=82;THETA=0.0089;SNPSOURCE=LOWCOV;RSQ=0.7787;AF=0.04;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.09 +1 88188 rs148331237 C A 100 PASS THETA=0.0039;AN=2184;VT=SNP;AA=.;AC=9;LDAF=0.0085;SNPSOURCE=LOWCOV;ERATE=0.0007;RSQ=0.5212;AVGPOST=0.9910;AF=0.0041;AMR_AF=0.0028;EUR_AF=0.01 +1 88236 rs186918018 C T 100 PASS AVGPOST=0.9904;AN=2184;ERATE=0.0031;RSQ=0.5511;VT=SNP;AA=.;THETA=0.0087;LDAF=0.0097;SNPSOURCE=LOWCOV;AC=11;AF=0.01;AMR_AF=0.02;AFR_AF=0.0041;EUR_AF=0.0026 +1 88250 rs191950833 T A 100 PASS LDAF=0.0013;AN=2184;RSQ=0.1387;VT=SNP;AA=.;AC=0;THETA=0.0019;SNPSOURCE=LOWCOV;ERATE=0.0007;AVGPOST=0.9974;AF=0 +1 88316 rs113759966 G A 100 PASS LDAF=0.0531;AN=2184;VT=SNP;AA=.;THETA=0.0071;RSQ=0.7791;AVGPOST=0.9644;ERATE=0.0008;AC=87;SNPSOURCE=LOWCOV;AF=0.04;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.01;EUR_AF=0.09 +1 88324 rs183326616 A G 100 PASS ERATE=0.0004;AN=2184;AVGPOST=0.9996;VT=SNP;AA=.;RSQ=0.7073;LDAF=0.0006;SNPSOURCE=LOWCOV;AC=1;THETA=0.0092;AF=0.0005;AFR_AF=0.0020 +1 88338 rs55700207 G A 100 PASS THETA=0.0035;RSQ=0.7967;AN=2184;ERATE=0.0034;LDAF=0.1019;VT=SNP;AA=.;AC=186;SNPSOURCE=LOWCOV;AVGPOST=0.9507;AF=0.09;ASN_AF=0.03;AMR_AF=0.15;AFR_AF=0.14;EUR_AF=0.05 +1 88370 rs185487977 G A 100 PASS AVGPOST=0.9957;LDAF=0.0035;AN=2184;ERATE=0.0009;RSQ=0.4507;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=4;THETA=0.0043;AF=0.0018;AMR_AF=0.0028;EUR_AF=0.0040 +1 88376 rs189954431 T G 100 PASS RSQ=0.6404;AVGPOST=0.9994;AN=2184;VT=SNP;AA=.;THETA=0.0057;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0003;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020 +1 88388 rs182344336 C T 100 PASS THETA=0.0048;ERATE=0.0005;RSQ=0.3843;AVGPOST=0.9977;LDAF=0.0016;AN=2184;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017 +1 88429 rs146027550 T C 100 PASS LDAF=0.0083;AN=2184;AC=13;RSQ=0.6097;VT=SNP;AA=.;ERATE=0.0010;AVGPOST=0.9922;SNPSOURCE=LOWCOV;THETA=0.0069;AF=0.01;AMR_AF=0.01;AFR_AF=0.02;EUR_AF=0.0013 +1 88710 rs186575039 C G 100 PASS ERATE=0.0005;AC=73;THETA=0.0022;AN=2184;VT=SNP;AA=.;AVGPOST=0.9774;LDAF=0.0389;SNPSOURCE=LOWCOV;RSQ=0.8058;AF=0.03;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.08 +1 89165 rs192631277 A C 100 PASS RSQ=0.2647;AN=2184;ERATE=0.0009;VT=SNP;AA=.;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;THETA=0.0043;AF=0.0005;ASN_AF=0.0017 +1 89744 rs184101761 A G 100 PASS THETA=0.0068;ERATE=0.0004;AVGPOST=0.9985;LDAF=0.0016;AN=2184;RSQ=0.5853;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=2;AF=0.0009;AFR_AF=0.0041 +1 89794 rs188661839 T C 100 PASS AN=2184;THETA=0.0130;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AC=1;ERATE=0.0006;RSQ=0.3812;AVGPOST=0.9984;LDAF=0.0012;AF=0.0005;AFR_AF=0.0020 +1 89946 rs138808727 A T 100 PASS RSQ=0.7414;LDAF=0.1417;AN=2184;ERATE=0.0009;AC=236;VT=SNP;AA=.;THETA=0.0100;AVGPOST=0.9001;SNPSOURCE=LOWCOV;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.02;EUR_AF=0.22 +1 91190 rs143856811 G A 100 PASS AN=2184;AC=77;ERATE=0.0009;VT=SNP;AA=.;LDAF=0.0447;SNPSOURCE=LOWCOV;RSQ=0.7517;THETA=0.0113;AVGPOST=0.9690;AF=0.04;ASN_AF=0.0017;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.08 +1 91228 rs139873689 A G 100 PASS AN=2184;AVGPOST=0.9924;AC=8;RSQ=0.5097;VT=SNP;AA=.;ERATE=0.0010;THETA=0.0012;SNPSOURCE=LOWCOV;LDAF=0.0070;AF=0.0037;AFR_AF=0.02 +1 91536 rs77418980 G T 100 PASS AC=695;AN=2184;ERATE=0.0025;AVGPOST=0.7792;VT=SNP;AA=.;THETA=0.0018;LDAF=0.3255;RSQ=0.6634;SNPSOURCE=LOWCOV;AF=0.32;ASN_AF=0.34;AMR_AF=0.30;AFR_AF=0.04;EUR_AF=0.50 +1 91581 rs151118460 G A 100 PASS AVGPOST=0.7763;THETA=0.0078;AN=2184;VT=SNP;AA=.;AC=716;ERATE=0.0035;LDAF=0.3353;RSQ=0.6618;SNPSOURCE=LOWCOV;AF=0.33;ASN_AF=0.37;AMR_AF=0.30;AFR_AF=0.04;EUR_AF=0.50 +1 91605 rs141083882 C T 100 PASS AC=105;LDAF=0.0597;AN=2184;RSQ=0.7792;VT=SNP;AA=.;ERATE=0.0010;AVGPOST=0.9660;SNPSOURCE=LOWCOV;THETA=0.0070;AF=0.05;AMR_AF=0.02;AFR_AF=0.20 +1 92633 rs149776517 C T 100 PASS THETA=0.0054;AN=2184;VT=SNP;AA=.;AC=44;AVGPOST=0.9592;ERATE=0.0008;LDAF=0.0366;SNPSOURCE=LOWCOV;RSQ=0.5870;AF=0.02;AMR_AF=0.02;EUR_AF=0.05 +1 92858 rs147061536 G T 100 PASS AC=248;THETA=0.0212;RSQ=0.7567;AN=2184;ERATE=0.0046;VT=SNP;AA=.;SNPSOURCE=LOWCOV;AVGPOST=0.9072;LDAF=0.1433;AF=0.11;ASN_AF=0.01;AMR_AF=0.15;AFR_AF=0.05;EUR_AF=0.22 +1 92875 rs193157612 T C 100 PASS THETA=0.0048;AVGPOST=0.9957;AN=2184;LDAF=0.0040;ERATE=0.0009;VT=SNP;AA=.;RSQ=0.4901;SNPSOURCE=LOWCOV;AC=4;AF=0.0018;EUR_AF=0.01 +1 94421 rs200856736 TC T 90 PASS AA=TC;AC=253;AF=0.12;AFR_AF=0.01;AMR_AF=0.20;AN=2184;ASN_AF=0.26;AVGPOST=0.7183;ERATE=0.0117;EUR_AF=0.03;LDAF=0.2244;RSQ=0.3175;THETA=0.0159;VT=INDEL +1 94986 rs185004859 C T 100 PASS ERATE=0.0166;AN=2184;AC=100;THETA=0.0227;VT=SNP;AA=.;LDAF=0.0872;AVGPOST=0.9040;SNPSOURCE=LOWCOV;RSQ=0.4650;AF=0.05;ASN_AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.04 +1 94991 rs188832636 G A 100 PASS THETA=0.0048;LDAF=0.0041;AN=2184;AVGPOST=0.9944;ERATE=0.0009;VT=SNP;AA=.;SNPSOURCE=LOWCOV;RSQ=0.4157;AC=3;AF=0.0014;AMR_AF=0.0028;EUR_AF=0.0026 +1 98583 rs141344361 T A 100 PASS AVGPOST=0.9463;AC=248;THETA=0.0099;AN=2184;VT=SNP;AA=.;RSQ=0.8090;LDAF=0.1336;ERATE=0.0008;SNPSOURCE=LOWCOV;AF=0.11;ASN_AF=0.30;AMR_AF=0.16;AFR_AF=0.01;EUR_AF=0.02 +1 98929 rs12184306 A G 100 PASS RSQ=0.6226;AVGPOST=0.8784;AN=2184;VT=SNP;AA=.;ERATE=0.0045;LDAF=0.1723;SNPSOURCE=LOWCOV;AC=264;THETA=0.0070;AF=0.12;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.16;EUR_AF=0.09 +1 98946 rs191775802 C G 100 PASS AVGPOST=0.9945;ERATE=0.0013;AN=2184;LDAF=0.0046;VT=SNP;AA=.;RSQ=0.4807;SNPSOURCE=LOWCOV;AC=4;THETA=0.0097;AF=0.0018;AFR_AF=0.01 +1 98974 rs12184307 A G 100 PASS AVGPOST=0.8921;AN=2184;AC=224;THETA=0.0130;VT=SNP;AA=.;LDAF=0.1405;RSQ=0.6149;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.10;ASN_AF=0.14;AMR_AF=0.07;AFR_AF=0.14;EUR_AF=0.06 +1 99671 rs146209971 A T 100 PASS THETA=0.0199;AN=2184;AC=13;RSQ=0.4401;VT=SNP;AA=.;ERATE=0.0010;AVGPOST=0.9802;SNPSOURCE=LOWCOV;LDAF=0.0158;AF=0.01;AMR_AF=0.02;AFR_AF=0.0020;EUR_AF=0.01 +1 99687 rs139153227 C T 100 PASS THETA=0.0211;LDAF=0.0470;AN=2184;VT=SNP;AA=.;ERATE=0.0010;RSQ=0.6276;AVGPOST=0.9548;AC=64;SNPSOURCE=LOWCOV;AF=0.03;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.07 +1 99719 rs183898652 C T 100 PASS AN=2184;RSQ=0.5856;VT=SNP;AA=.;AC=10;ERATE=0.0018;SNPSOURCE=LOWCOV;LDAF=0.0076;AVGPOST=0.9925;THETA=0.0251;AF=0.0046;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.0013 +1 100676 rs188226172 A T 100 PASS THETA=0.0212;LDAF=0.0047;AN=2184;VT=SNP;AA=.;RSQ=0.2646;SNPSOURCE=LOWCOV;ERATE=0.0012;AC=2;AVGPOST=0.9925;AF=0.0009;AMR_AF=0.0028;AFR_AF=0.0020 +1 103905 rs142403309 A G 100 PASS AN=2184;THETA=0.0131;ERATE=0.0025;AVGPOST=0.8782;AC=220;VT=SNP;AA=.;LDAF=0.1434;SNPSOURCE=LOWCOV;RSQ=0.5994;AF=0.10;ASN_AF=0.10;AMR_AF=0.14;AFR_AF=0.15;EUR_AF=0.05 +1 106544 rs180741296 C G 100 PASS AC=205;AVGPOST=0.5776;AN=2184;VT=SNP;AA=.;LDAF=0.3120;SNPSOURCE=LOWCOV;ERATE=0.0061;THETA=0.0372;RSQ=0.1442;AF=0.09;ASN_AF=0.11;AMR_AF=0.13;AFR_AF=0.11;EUR_AF=0.05 +1 109107 rs201432136 G GT 67 PASS AA=G;AC=63;AF=0.03;AFR_AF=0.01;AMR_AF=0.04;AN=2184;ASN_AF=0.03;AVGPOST=0.8840;ERATE=0.0122;EUR_AF=0.04;LDAF=0.0890;RSQ=0.3660;THETA=0.0210;VT=INDEL +1 111513 rs199911222 C CTA 249 PASS AA=.;AC=58;AF=0.03;AFR_AF=0.09;AMR_AF=0.03;AN=2184;ASN_AF=0.0017;AVGPOST=0.9145;ERATE=0.0024;EUR_AF=0.0013;LDAF=0.0665;RSQ=0.4694;THETA=0.0292;VT=INDEL diff --git a/vcf/test/FT.vcf b/vcf/test/FT.vcf new file mode 100644 index 0000000..e42436a --- /dev/null +++ b/vcf/test/FT.vcf @@ -0,0 +1,50 @@ +##fileformat=VCFv4.2 +##ALT= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine.VariantFiltration= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##reference=file://../ref.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 +ref 63393 . C A 29454.60 . AC=5;AF=1.00;AN=5;DP=719;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=29.67;SOR=0.965 GT:AD:DP:GQ:PL 1:0,166:166:99:6740,0 1:0,142:142:99:5824,0 1:0,134:134:99:5616,0 1:0,122:122:99:4930,0 1:0,155:155:99:6371,0 +ref 65903 . AATTGCGCTG A 7340.57 PASS AC=1;AF=0.200;AN=5;DP=524;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;QD=34.04;SOR=1.091 GT:AD:DP:FT:GQ:PL 1:0,164:164:PASS:99:7383,0 0:95,0:95:DP125;DP130:99:0,1800 0:88,0:88:DP125;DP130:99:0,1800 0:87,0:87:DP125;DP130:99:0,1800 0:89,0:89:DP125;DP130:99:0,1800 +ref 70837 . C A 4711.61 Q4800;Q5000 AC=1;AF=0.200;AN=5;DP=512;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;QD=27.64;SOR=0.726 GT:AD:DP:FT:GQ:PL 0:121,0:121:DP125;DP130:99:0,1800 0:95,0:95:DP125;DP130:99:0,1800 1:0,120:120:DP125;DP130:99:4745,0 0:87,0:87:DP125;DP130:99:0,1800 0:89,0:89:DP125;DP130:99:0,1800 +ref 71448 . C T 31134.60 PASS AC=5;AF=1.00;AN=5;BaseQRankSum=2.22;ClippingRankSum=0.00;DP=768;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;MQRankSum=0.00;QD=29.43;ReadPosRankSum=2.03;SOR=0.295 GT:AD:DP:FT:GQ:PL 1:0,147:147:PASS:99:5996,0 1:1,183:184:PASS:99:7501,0 1:0,113:113:DP125;DP130:99:4559,0 1:0,161:161:PASS:99:6436,0 1:0,163:163:PASS:99:6669,0 +ref 104257 . C T 5521.61 PASS AC=1;AF=0.200;AN=5;DP=506;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;QD=29.45;SOR=0.854 GT:AD:DP:FT:GQ:PL 0:101,0:101:DP125;DP130:99:0,1800 0:109,0:109:DP125;DP130:99:0,1800 1:0,132:132:PASS:99:5555,0 0:67,0:67:DP125;DP130:99:0,1800 0:97,0:97:DP125;DP130:99:0,1800 +ref 140658 . C A 32467.60 PASS AC=5;AF=1.00;AN=5;BaseQRankSum=2.24;ClippingRankSum=0.00;DP=801;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;MQRankSum=0.00;QD=29.65;ReadPosRankSum=1.27;SOR=0.346 GT:AD:DP:GQ:PL 1:0,170:170:99:6854,0 1:0,198:198:99:8098,0 1:0,136:136:99:5554,0 1:0,141:141:99:5661,0 1:1,155:156:99:6327,0 +ref 147463 . C A 4885.61 Q5000 AC=1;AF=0.200;AN=5;BaseQRankSum=-7.720e-01;ClippingRankSum=0.00;DP=503;FS=0.000;MLEAC=1;MLEAF=0.200;MQ=60.00;MQRankSum=0.00;QD=35.03;ReadPosRankSum=-6.950e-01;SOR=0.278 GT:AD:DP:FT:GQ:PL 0:97,0:97:DP125;DP130:99:0,1800 0:104,0:104:DP125;DP130:99:0,1800 0:84,0:84:DP125;DP130:99:0,1800 1:1,128:129:DP130:99:4919,0 0:89,0:89:DP125;DP130:99:0,1800 +ref 154578 . A G 32015.60 PASS AC=5;AF=1.00;AN=5;DP=776;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=25.82;SOR=0.902 GT:AD:DP:GQ:PL 1:0,152:152:99:6300,0 1:0,183:183:99:7608,0 1:0,137:137:99:5713,0 1:0,148:148:99:6040,0 1:0,156:156:99:6381,0 +ref 203200 . C T 30880.60 PASS AC=5;AF=1.00;AN=5;DP=752;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=29.65;SOR=0.878 GT:AD:DP:FT:GQ:PL 1:0,161:161:PASS:99:6708,0 1:0,185:185:PASS:99:7602,0 1:0,136:136:PASS:99:5602,0 1:0,126:126:DP130:99:5080,0 1:0,144:144:PASS:99:5915,0 +ref 231665 . C T 30074.60 PASS AC=5;AF=1.00;AN=5;DP=735;FS=0.000;MLEAC=5;MLEAF=1.00;MQ=60.00;QD=33.23;SOR=0.938 GT:AD:DP:FT:GQ:PL 1:0,191:191:PASS:99:7867,0 1:0,159:159:PASS:99:6431,0 1:0,130:130:PASS:99:5299,0 1:0,129:129:DP130:99:5290,0 1:0,126:126:DP130:99:5214,0 diff --git a/vcf/test/bad-info-character.vcf b/vcf/test/bad-info-character.vcf new file mode 100644 index 0000000..099470c --- /dev/null +++ b/vcf/test/bad-info-character.vcf @@ -0,0 +1,14 @@ +##fileformat=VCFv4.1 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample +chr1 100 id1 G A . . FLAG;EMPTY_1=;EMPTY_3=;EMPTY_N=;DOT_1=.;DOT_3=.,.,.;DOT_N=.;NOTEMPTY_1=1;NOTEMPTY_3=1,2,3;NOTEMPTY_N=1 GT 0/1 diff --git a/vcf/test/contig_idonly.vcf b/vcf/test/contig_idonly.vcf new file mode 100644 index 0000000..5e5a6ad --- /dev/null +++ b/vcf/test/contig_idonly.vcf @@ -0,0 +1,5 @@ +##fileformat=VCFv4.2 +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO diff --git a/vcf/test/example-4.0.vcf b/vcf/test/example-4.0.vcf index 27803a1..97fb07e 100644 --- a/vcf/test/example-4.0.vcf +++ b/vcf/test/example-4.0.vcf @@ -20,4 +20,5 @@ 20 17330 . T A 3.0 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 20 1110696 rs6040355 A G,T 1e+03 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 +20 1231234 . AT A 46 PASS NS=3;DP=15;AA=A GT:GQ:DP:HQ 1|1:23:7:26,30 0|0:27:9:56,60 0|0:31:10:65,71 20 1234567 microsat1 GTCT G,GTACT . PASS NS=3;DP=9;AA=G GT:GQ:DP ./.:35:4 0/2:17:2 1/1:40:3 diff --git a/vcf/test/example-4.1-info-multiple-values.vcf b/vcf/test/example-4.1-info-multiple-values.vcf new file mode 100644 index 0000000..6faf95e --- /dev/null +++ b/vcf/test/example-4.1-info-multiple-values.vcf @@ -0,0 +1,7 @@ +##fileformat=VCFv4.1 +##contig= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT +Pf3D7_01_v3 401 . C T 53.99 PASS RepeatCopies=19.3,47.4,14.0;RepeatSize=42,14,56;RepeatConsensus=TCTTATCTTCTTACTTTTCATTCCTTACTCTTACTTACTTAC,TTACTCTTACTTAC,TTACTCTTACTTACTTACTCTTACTTACTTACTCTTACTTACTTACTCTTATCTTC diff --git a/vcf/test/example-4.1-ploidy.vcf b/vcf/test/example-4.1-ploidy.vcf new file mode 100644 index 0000000..6704048 --- /dev/null +++ b/vcf/test/example-4.1-ploidy.vcf @@ -0,0 +1,21 @@ +##fileformat=VCFv4.1 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig= +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +X 60034 rs186434315 T A 100 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0:48:1:51,51 1|0:48:8:51,51 1/1/1:43:5:.,. +X 60378 rs185512268 C A 100 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0:48:1:51,51 1:48:8:51,51 0:43:5:.,. \ No newline at end of file diff --git a/vcf/test/example-4.2.vcf b/vcf/test/example-4.2.vcf new file mode 100644 index 0000000..d649fc3 --- /dev/null +++ b/vcf/test/example-4.2.vcf @@ -0,0 +1,56 @@ +##fileformat=VCFv4.2 +##FILTER= +##samtoolsVersion=1.0-17-gfaf4dd6+htslib-1.0-11-g830ea73 +##samtoolsCommand=samtools mpileup -u -t DP,DPR,DV,DP4,INFO/DPR,SP -f /data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa -r KB704451:0004153102-0004172483 huge_list_of_bam_files_removed +##reference=file:///data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa +##contig= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##bcftools_callVersion=1.0-55-gc661821+htslib-1.0-11-g830ea73 +##bcftools_callCommand=call -m -vM -f GQ,GP +##SnpSiftVersion="SnpSift 3.6c (build 2014-05-20), by Pablo Cingolani" +##SnpSiftCmd="SnpSift varType - " +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT LUPI059 MINE001 OKJ042 LUPI001 LUPI007 LUPI024 LUPI056 LUPI071 LUPI074 LUPI082 MINE040 MINE100 MINE101 MINE105 MINE111 OKJ017 OKJ045 OKJ070 SAGA066 SAGA107 SAGA131 SAGA133 SAGA134 SAGA141 2012L_LUPI_002 2012L_LUPI_015 2012L_LUPI_017 2012L_LUPI_018 2012L_LUPI_035 2012L_LUPI_062 2012L_LUPI_065 2012L_LUPI_077 2012L_LUPI_083 2012L_LUPI_116 2012L_LUPI_013 2012L_LUPI_041 2012L_LUPI_068 2012L_LUPI_096 2012L_LUPI_098 2012L_LUPI_101 2012L_LUPI_103 2012_LUPI_156 2012_LUPI_157 2012_LUPI_161 2012_LUPI_171 2012_LUPI_173 2012_LUPI_180 2012L_LUPI_010 2012L_LUPI_012 2012L_LUPI_021 2012L_LUPI_045 2012L_LUPI_047 2012L_LUPI_060 2012L_LUPI_061 2012L_LUPI_067 2012_LUPI_125 2012_LUPI_129 2012_LUPI_146 2012_LUPI_178 2012_LUPI_211 2012_LUPI_277 2012_LUPI_278 2012_LUPI_279 2012_LUPI_284 +KB704451 4157846 . N A,C 167.0 . DP=10;VDB=1.17174e-06;SGB=1.26353;MQ0F=0;DPR=0,6,4;AC=10,4;AN=14;DP4=0,0,10,0;MQ=60;SNP;VARTYPE=SNP,SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 1/2:74,23,14,57,0,54:4:4:0:0,0,4,0:0,3,1:144,56,16,90,0,57:16 1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 +KB704451 4157870 . T C 275.0 . DP=243;VDB=0.00023935;SGB=29.4468;RPB=0.0368658;MQB=0.979612;MQSB=0.268441;BQB=0.99223;MQ0F=0;DPR=213,19;ICB=0.85092;HOB=0.0287274;AC=6;AN=118;DP4=201,12,19,1;MQ=53;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,66,255:22:0:0:20,2,0,0:22,0:0,75,279:75 0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,21,144:21 1/1:193,36,0:12:12:0:0,0,12,0:0,12:168,20,0:20 0/0:0,9,95:3:0:0:3,0,0,0:3,0:0,18,119:18 0/1:78,0,110:7:3:0:3,1,3,0:4,3:68,0,125:68 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,12,64:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,90:3:0:0:3,0,0,0:3,0:0,18,114:18 0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21 0/0:0,9,97:3:0:0:3,0,0,0:3,0:0,18,121:18 0/0:0,15,122:5:0:0:5,0,0,0:5,0:0,24,146:24 0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,15,95:15 0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,15,82:15 0/0:0,18,155:6:0:0:6,0,0,0:6,0:0,27,179:27 0/0:0,3,39:1:0:0:1,0,0,0:1,0:0,12,63:12 0/1:35,3,0:1:1:0:0,0,1,0:0,1:23,0,12:12 0/0:0,9,87:3:0:0:3,0,0,0:3,0:0,18,111:18 0/1:47,0,104:6:2:0:4,0,2,0:4,2:37,0,119:37 0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,30,184:30 0/0:0,6,35:2:0:0:2,0,0,0:2,0:0,15,59:15 0/0:0,12,98:4:0:0:4,0,0,0:4,0:0,21,122:21 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,15,94:15 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21 0/0:0,3,29:1:0:0:0,1,0,0:1,0:0,12,53:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,76:3:0:0:3,0,0,0:3,0:0,18,100:18 0/0:0,15,136:5:0:0:5,0,0,0:5,0:0,24,160:24 0/0:0,30,182:10:0:0:10,0,0,0:10,0:0,39,206:39 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,15,93:15 0/0:0,27,152:9:0:0:9,0,0,0:9,0:0,36,176:36 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,15,118:15 0/0:0,21,195:7:0:0:5,2,0,0:7,0:0,30,219:30 0/0:0,9,92:3:0:0:2,1,0,0:3,0:0,18,116:18 0/1:33,0,18:2:1:0:0,1,1,0:1,1:23,0,33:23 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,12,59:12 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,18,115:18 0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,30,212:10:0:0:9,1,0,0:10,0:0,39,236:39 0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,195:7:0:0:7,0,0,0:7,0:0,30,219:30 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,97:3:0:0:2,1,0,0:3,0:0,18,121:18 0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,18,117:18 0/0:0,9,116:3:0:0:3,0,0,0:3,0:0,18,140:18 0/0:0,6,71:2:0:0:1,1,0,0:2,0:0,15,95:15 0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18 0/0:0,33,175:11:0:0:11,0,0,0:11,0:0,42,199:42 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,15,87:15 0/0:0,21,145:7:0:0:7,0,0,0:7,0:0,30,169:30 0/0:0,3,39:1:0:0:0,1,0,0:1,0:0,12,63:12 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,18,108:18 0/0:0,3,13:1:0:0:1,0,0,0:1,0:0,12,37:12 0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,12,47:12 0/0:0,12,106:4:0:0:4,0,0,0:4,0:0,21,130:21 0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,18,118:18 0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,15,91:15 0/0:2,5,27:2:1:0:1,0,0,1:1,0:0,12,49:12 +KB704451 4157877 . G A 999.0 . DP=250;VDB=6.58963e-09;SGB=31.659;RPB=0.0227135;MQB=0.410318;MQSB=0.139343;BQB=0.0767891;MQ0F=0;DPR=188,48;ICB=0.990841;HOB=0.00761276;AC=17;AN=118;DP4=176,12,45,3;MQ=55;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/1:159,0,202:22:9:0:12,1,8,1:13,9:154,0,212:127 0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,16,134:16 0/0:0,51,207:17:0:0:17,0,0,0:17,0:0,55,221:55 0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13 0/1:123,0,61:8:5:0:3,0,4,1:3,5:118,0,71:71 0/0:0,3,38:1:0:0:0,1,0,0:1,0:0,7,52:7 0/1:68,0,29:3:2:0:1,0,1,1:1,2:63,0,39:39 0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,10,83:10 0/0:0,12,119:4:0:0:4,0,0,0:4,0:0,16,133:16 0/1:34,0,34:2:1:0:1,0,1,0:1,1:29,0,44:29 0/1:24,0,99:5:1:0:4,0,1,0:4,1:19,0,109:19 0/1:34,0,28:2:1:0:1,0,1,0:1,1:29,0,38:29 0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,10,72:10 0/1:122,0,57:7:4:0:3,0,4,0:3,4:117,0,67:67 0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,7,55:7 0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,7,43:7 0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16 0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,22,158:22 0/1:118,0,63:8:5:0:3,0,5,0:3,5:113,0,73:73 0/0:0,6,34:2:0:0:2,0,0,0:2,0:0,10,48:10 0/0:0,15,131:5:0:0:5,0,0,0:5,0:0,19,145:19 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,10,86:10 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,10,103:10 1/1:124,12,0:4:4:0:0,0,4,0:0,4:112,4,2:4 0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,7,48:7 0/0:0,6,73:2:0:0:2,0,0,0:2,0:0,10,87:10 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,13,105:13 0/0:0,15,138:5:0:0:5,0,0,0:5,0:0,19,152:19 0/0:0,30,179:10:0:0:10,0,0,0:10,0:0,34,193:34 0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,10,79:10 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,10,84:10 0/0:0,27,155:9:0:0:9,0,0,0:9,0:0,31,169:31 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,10,108:10 0/0:0,15,161:5:0:0:3,2,0,0:5,0:0,19,175:19 0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,10,79:10 0/1:36,3,0:1:1:0:0,0,1,0:0,1:29,0,7:7 0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,13,107:13 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,7,48:7 0/1:87,0,137:10:4:0:5,1,4,0:6,4:82,0,147:82 0/1:57,0,26:3:2:0:1,0,2,0:1,2:52,0,36:35 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:139,0,73:7:4:0:3,0,4,0:3,4:134,0,83:83 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,75:2:0:0:1,1,0,0:2,0:0,10,89:10 0/1:90,9,0:3:3:0:0,0,3,0:0,3:79,2,3:3 0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13 0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,9,88:3:0:0:3,0,0,0:3,0:0,13,102:13 0/0:0,33,173:11:0:0:11,0,0,0:11,0:0,37,187:37 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,10,71:10 0/0:0,15,125:5:0:0:5,0,0,0:5,0:0,19,139:19 0/0:0,6,61:2:0:0:1,1,0,0:2,0:0,10,75:10 0/1:24,0,51:3:1:0:2,0,1,0:2,1:19,0,61:19 0/0:0,3,30:1:0:0:1,0,0,0:1,0:0,7,44:7 0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,7,37:7 0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,7,49:7 0/1:25,0,61:3:1:0:2,0,1,0:2,1:20,0,71:20 0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,10,81:10 0/0:0,3,8:1:0:0:0,1,0,0:1,0:0,7,22:7 +KB704451 4157907 . A C 278.0 . DP=295;VDB=0.241276;SGB=26.7514;RPB=0.676983;MQB=0.997838;MQSB=0.136536;BQB=0.45683;MQ0F=0;DPR=264,15;ICB=0.00518819;HOB=0.00237812;AC=4;AN=116;DP4=233,31,14,1;MQ=59;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,90,255:30:0:0:25,5,0,0:30,0:0,101,283:101 0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41 0/1:157,0,188:18:8:0:10,0,7,1:10,8:145,0,205:127 0/1:75,0,90:5:2:0:2,1,2,0:3,2:63,0,107:63 0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41 0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,17,108:17 0/0:0,12,134:4:0:0:3,1,0,0:4,0:0,23,162:23 0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,32,188:32 0/0:0,12,135:4:0:0:2,2,0,0:4,0:0,23,163:23 0/0:0,15,148:5:0:0:5,0,0,0:5,0:0,26,176:26 0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20 0/1:70,0,19:4:3:0:1,0,3,0:1,3:58,0,36:36 0/0:0,24,246:8:0:0:7,1,0,0:8,0:0,35,274:35 0/0:0,18,147:6:0:0:6,0,0,0:6,0:0,29,175:29 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20 0/1:59,0,62:5:2:0:3,0,2,0:3,2:47,0,79:47 0/0:0,33,192:11:0:0:11,0,0,0:11,0:0,44,220:44 0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,20,122:20 0/0:0,24,198:8:0:0:7,1,0,0:8,0:0,35,226:35 0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,23,148:23 0/0:0,15,165:5:0:0:3,2,0,0:5,0:0,26,193:26 0/0:0,24,172:8:0:0:8,0,0,0:8,0:0,35,200:35 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,14,59:14 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,17,94:17 0/0:0,15,118:5:0:0:5,0,0,0:5,0:0,26,146:26 0/0:0,33,178:11:0:0:11,0,0,0:11,0:0,44,206:44 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,9,74:3:0:0:2,1,0,0:3,0:0,20,102:20 0/0:0,21,168:7:0:0:5,2,0,0:7,0:0,32,196:32 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,14,68:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,17,117:17 0/0:0,12,130:4:0:0:2,2,0,0:4,0:0,23,158:23 0/0:0,9,78:3:0:0:1,2,0,0:3,0:0,20,106:20 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,17,93:17 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,17,83:17 0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,14,57:14 0/0:0,36,194:12:0:0:11,1,0,0:12,0:0,47,222:47 0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,23,138:23 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,182:6:0:0:6,0,0,0:6,0:0,29,210:29 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,14,62:14 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,9,107:3:0:0:3,0,0,0:3,0:0,20,135:20 0/0:0,6,67:2:0:0:1,1,0,0:2,0:0,17,95:17 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,27,184:9:0:0:9,0,0,0:9,0:0,38,212:38 0/0:0,9,85:3:0:0:3,0,0,0:3,0:0,20,113:20 0/0:0,12,111:4:0:0:4,0,0,0:4,0:0,23,139:23 0/0:0,6,77:2:0:0:1,1,0,0:2,0:0,17,105:17 0/0:0,12,108:4:0:0:3,1,0,0:4,0:0,23,136:23 0/0:0,3,27:1:0:0:1,0,0,0:1,0:0,14,55:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,12,125:4:0:0:4,0,0,0:4,0:0,23,153:23 0/0:0,9,98:3:0:0:2,1,0,0:3,0:0,20,126:20 0/0:0,6,46:2:0:0:2,0,0,0:2,0:0,17,74:17 +KB704451 4157909 . T G 278.0 . DP=295;VDB=0.184881;SGB=22.7413;RPB=0.646301;MQB=0.998034;MQSB=0.200514;BQB=0.321842;MQ0F=0;DPR=247,15;ICB=0.00558284;HOB=0.00255102;AC=4;AN=112;DP4=218,29,15,1;MQ=59;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,87,255:29:0:0:24,5,0,0:29,0:0,97,282:97 0/0:0,27,183:9:0:0:9,0,0,0:9,0:0,37,210:37 0/1:156,0,167:19:8:0:11,0,7,1:11,8:145,0,183:127 0/1:75,0,107:5:2:0:2,1,2,0:3,2:64,0,123:64 0/0:0,27,191:9:0:0:8,1,0,0:9,0:0,37,218:37 0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,16,107:16 0/0:0,12,119:4:0:0:3,1,0,0:4,0:0,22,146:22 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,15,126:5:0:0:5,0,0,0:5,0:0,25,153:25 0/0:0,12,132:4:0:0:2,2,0,0:4,0:0,22,159:22 0/0:0,12,133:4:0:0:4,0,0,0:4,0:0,22,160:22 0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,16,94:16 0/1:79,9,0:3:3:0:0,0,3,0:0,3:60,0,8:8 0/0:0,21,230:7:0:0:6,1,0,0:7,0:0,31,257:31 0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,28,171:28 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,16,80:16 0/1:59,0,64:5:2:0:3,0,2,0:3,2:48,0,80:48 0/0:0,33,180:11:0:0:11,0,0,0:11,0:0,43,207:43 0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22 0/0:0,24,190:8:0:0:7,1,0,0:8,0:0,34,217:34 0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22 0/0:0,15,164:5:0:0:3,2,0,0:5,0:0,25,191:25 0/0:0,24,161:8:0:0:8,0,0,0:8,0:0,34,188:34 0/0:0,3,32:1:0:0:0,1,0,0:1,0:0,13,59:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,16,92:16 0/0:0,15,121:5:0:0:5,0,0,0:5,0:0,25,148:25 0/0:0,30,174:10:0:0:10,0,0,0:10,0:0,40,201:40 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,21,164:7:0:0:5,2,0,0:7,0:0,31,191:31 0/0:0,3,37:1:0:0:1,0,0,0:1,0:0,13,64:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,16,116:16 0/0:0,12,128:4:0:0:2,2,0,0:4,0:0,22,155:22 0/0:0,9,94:3:0:0:1,2,0,0:3,0:0,19,121:19 0/0:0,6,63:2:0:0:1,1,0,0:2,0:0,16,90:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,16,83:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,36,193:12:0:0:11,1,0,0:12,0:0,46,220:46 0/0:0,12,108:4:0:0:4,0,0,0:4,0:0,22,135:22 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,168:6:0:0:6,0,0,0:6,0:0,28,195:28 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,13,58:13 0/0:0,6,47:2:0:0:2,0,0,0:2,0:0,16,74:16 0/0:8,11,65:2:1:0:1,0,1,0:1,0:0,13,84:13 0/0:0,6,64:2:0:0:1,1,0,0:2,0:0,16,91:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,27,177:9:0:0:9,0,0,0:9,0:0,37,204:37 0/0:0,6,50:2:0:0:2,0,0,0:2,0:0,16,77:16 0/0:0,12,101:4:0:0:4,0,0,0:4,0:0,22,128:22 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,16,92:16 0/0:0,12,100:4:0:0:3,1,0,0:4,0:0,22,127:22 0/0:0,3,31:1:0:0:1,0,0,0:1,0:0,13,58:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,19,111:19 0/0:0,3,32:1:0:0:1,0,0,0:1,0:0,13,59:13 0/0:0,12,104:4:0:0:4,0,0,0:4,0:0,22,131:22 0/0:0,6,66:2:0:0:1,1,0,0:2,0:0,16,93:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 +KB704451 4157927 . G A 4.88727 . DP=334;VDB=0.38;SGB=3.29913;RPB=0.454248;MQB=0.970588;MQSB=0.546099;BQB=0.215686;MQ0F=0;DPR=306,2;ICB=0.000310486;HOB=0.000153894;AC=1;AN=114;DP4=265,41,2,0;MQ=59;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,105,255:35:0:0:31,4,0,0:35,0:0,122,295:122 0/0:0,45,255:15:0:0:14,1,0,0:15,0:0,62,295:62 0/0:0,60,255:20:0:0:17,3,0,0:20,0:0,77,295:77 0/0:0,21,242:7:0:0:6,1,0,0:7,0:0,38,282:38 0/0:0,24,207:8:0:0:6,2,0,0:8,0:0,41,247:41 0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,23,112:23 0/0:0,24,215:8:0:0:6,2,0,0:8,0:0,41,255:41 0/0:0,6,70:2:0:0:1,1,0,0:2,0:0,23,111:23 0/0:0,30,191:10:0:0:10,0,0,0:10,0:0,47,231:47 0/0:0,15,163:5:0:0:3,2,0,0:5,0:0,32,203:32 0/0:0,15,151:5:0:0:5,0,0,0:5,0:0,32,191:32 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,23,111:23 0/0:0,12,102:4:0:0:4,0,0,0:4,0:0,29,142:29 0/0:0,24,255:8:0:0:6,2,0,0:8,0:0,41,295:41 0/0:0,21,189:7:0:0:7,0,0,0:7,0:0,38,229:38 0/0:0,3,35:1:0:0:0,1,0,0:1,0:0,20,76:20 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,20,81:20 0/0:0,12,126:4:0:0:3,1,0,0:4,0:0,29,166:29 0/0:0,39,255:13:0:0:12,1,0,0:13,0:0,56,295:56 0/0:0,21,206:7:0:0:6,1,0,0:7,0:0,38,246:38 0/0:0,30,238:10:0:0:8,2,0,0:10,0:0,47,278:47 0/0:0,18,145:6:0:0:6,0,0,0:6,0:0,35,185:35 0/0:0,24,244:8:0:0:6,2,0,0:8,0:0,41,284:41 0/0:0,24,195:8:0:0:7,1,0,0:8,0:0,41,235:41 0/0:0,3,27:1:0:0:0,1,0,0:1,0:0,20,68:20 0/0:0,6,62:2:0:0:2,0,0,0:2,0:0,23,103:23 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,23,105:23 0/0:0,15,123:5:0:0:5,0,0,0:5,0:0,32,163:32 0/0:0,33,184:11:0:0:11,0,0,0:11,0:0,50,224:50 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,18,165:6:0:0:4,2,0,0:6,0:0,35,205:35 0/0:0,3,38:1:0:0:1,0,0,0:1,0:0,20,79:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,117:3:0:0:3,0,0,0:3,0:0,26,157:26 0/0:0,12,121:4:0:0:2,2,0,0:4,0:0,29,161:29 0/0:0,9,95:3:0:0:1,2,0,0:3,0:0,26,135:26 0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,20,82:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23 0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 0/0:0,36,213:12:0:0:11,1,0,0:12,0:0,53,253:53 0/0:0,15,152:5:0:0:4,1,0,0:5,0:0,32,192:32 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,215:7:0:0:6,1,0,0:7,0:0,38,255:38 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,20,72:20 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23 0/0:0,9,101:3:0:0:3,0,0,0:3,0:0,26,141:26 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23 0/0:0,27,179:9:0:0:9,0,0,0:9,0:0,44,219:44 0/0:0,9,92:3:0:0:3,0,0,0:3,0:0,26,132:26 0/0:0,12,112:4:0:0:4,0,0,0:4,0:0,29,152:29 0/0:0,6,58:2:0:0:1,1,0,0:2,0:0,23,99:23 0/0:0,15,123:5:0:0:4,1,0,0:5,0:0,32,163:32 0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,78:3:0:0:3,0,0,0:3,0:0,26,118:26 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,20,75:20 0/1:47,0,51:3:2:0:1,0,2,0:1,2:29,0,74:29 0/0:0,9,80:3:0:0:2,1,0,0:3,0:0,26,120:26 0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,23,94:23 +KB704451 4157938 . ATTT ATTTT 650.0 . INDEL;IDV=18;IMF=0.428571;DP=361;VDB=0.773794;SGB=32.6744;MQSB=0.993251;MQ0F=0.00831025;DPR=115,60;ICB=0.929833;HOB=0.0258;AC=23;AN=100;DP4=98,17,48,12;MQ=59;INS;VARTYPE=INS GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/1:124,0,7:19:16:0:3,0,13,3:3,16:123,0,13:13 0/1:14,3,0:1:1:0:0,0,1,0:0,1:12,1,5:4 0/0:0,9,55:3:0:0:3,0,0,0:3,0:0,9,62:9 0/0:0,21,146:7:0:0:5,2,0,0:7,0:0,21,153:21 0/0:0,18,126:6:0:0:4,2,0,0:6,0:0,18,133:18 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7 0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9 1/1:46,9,0:3:3:0:0,0,1,2:0,3:40,4,1:4 0/0:0,27,155:9:0:0:8,1,0,0:9,0:0,27,162:27 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7 0/1:24,3,0:1:1:0:0,0,1,0:0,1:22,1,5:5 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 0/0:0,9,84:3:0:0:2,1,0,0:3,0:0,9,91:9 0/0:0,15,111:5:0:0:5,0,0,0:5,0:0,15,118:15 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:32,6,0:2:2:0:0,0,1,1:0,2:28,2,3:3 0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,7,62:7 0/1:63,0,61:7:4:0:3,0,3,1:3,4:62,0,67:61 0/0:0,24,154:8:0:0:7,1,0,0:8,0:0,24,161:24 0/1:16,0,94:7:2:0:5,0,2,0:5,2:15,0,100:15 0/1:61,0,101:9:4:0:4,1,3,1:5,4:60,0,107:60 0/0:0,3,31:1:0:0:1,0,0,0:1,0:1,5,39:5 0/1:13,3,0:1:1:0:0,0,0,1:0,1:11,1,5:4 0/1:16,0,47:3:1:0:2,0,1,0:2,1:15,0,53:15 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 0/1:46,0,57:5:2:0:3,0,2,0:3,2:45,0,63:45 0/1:50,0,12:6:5:0:1,0,5,0:1,5:49,0,18:18 0/0:0,3,30:1:0:0:1,0,0,0:1,0:1,5,38:5 0/0:0,3,4:1:0:0:1,0,0,0:1,0:1,5,12:4 0/1:18,0,98:5:1:0:2,2,1,0:4,1:17,0,104:17 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:15,0,26:2:1:0:1,0,1,0:1,1:14,0,32:14 0/1:20,0,26:2:1:0:1,0,0,1:1,1:19,0,32:19 0/0:0,6,60:2:0:0:1,1,0,0:2,0:0,7,67:7 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:33,3,0:1:1:0:0,0,1,0:0,1:31,1,5:5 0/1:82,6,0:7:6:0:1,0,5,1:1,6:78,2,3:3 0/0:0,15,126:5:0:0:4,1,0,0:5,0:0,15,133:15 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:34,6,0:2:2:0:0,0,2,0:0,2:30,2,3:3 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,50:2:0:0:1,1,0,0:2,0:0,7,57:7 0/0:0,3,28:1:0:0:1,0,0,0:1,0:1,5,36:5 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7 0/0:0,18,124:6:0:0:5,1,0,0:6,0:0,18,131:18 0/1:53,6,0:2:2:0:0,0,2,0:0,2:49,2,3:3 0/0:0,12,96:4:0:0:4,0,0,0:4,0:0,12,103:12 0/1:25,3,0:1:1:0:0,0,1,0:0,1:23,1,5:5 1/1:61,9,0:3:3:0:0,0,2,1:0,3:55,4,1:4 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:1,5,39:5 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7 0/0:0,3,29:1:0:0:1,0,0,0:1,0:1,5,37:5 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 +KB704451 4157940 . TTGTGTGTGTGTGT TTGTGTGTGTGTGTGTGT,TTTCTGTGTGTGTGTGT 999.0 . INDEL;IDV=7;IMF=0.5;DP=366;VDB=0.0431342;SGB=14.7456;MQSB=0.996953;MQ0F=0.010929;DPR=86,41,8;ICB=0.963728;HOB=0.02;AC=21,6;AN=90;DP4=70,16,39,10;MQ=58;INS;VARTYPE=INS,INS GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/2:60,60,60,3,3,0:1:1:0:0,0,1,0:0,0,1:54,55,61,2,5,9:4 1/1:255,18,0,255,18,255:6:6:0:0,0,3,3:0,6,0:248,11,0,252,19,263:11 0/0:0,6,62,6,62,62:2:0:0:2,0,0,0:2,0,0:1,7,70,11,71,78:6 0/1:9,0,238,27,241,255:7:1:0:4,2,1,0:6,1,0:8,0,245,31,249,270:8 0/0:0,12,185,12,185,185:4:0:0:3,1,0,0:4,0,0:0,12,192,16,193,200:11 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 0/1:1,0,158,10,161,165:4:1:0:2,1,1,0:3,1,0:3,2,167,16,171,182:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,24,255,24,255,255:8:0:0:7,1,0,0:8,0,0:0,24,262,28,263,270:23 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:37,0,54,40,57,94:2:1:0:1,0,1,0:1,1,0:36,0,60,44,64,108:35 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:117,6,0,117,6,117:2:2:0:0,0,1,1:0,2,0:113,3,3,118,10,128:2 0/0:0,18,237,18,237,237:6:0:0:5,1,0,0:6,0,0:0,18,244,22,245,252:17 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/2:120,120,120,6,6,0:2:2:0:0,0,1,1:0,0,2:111,112,119,2,6,7:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:255,21,0,255,21,255:7:7:0:0,0,5,2:0,7,0:247,14,0,252,22,263:13 0/2:19,31,181,0,157,154:5:1:0:3,1,1,0:4,0,1:14,27,183,0,160,164:14 0/1:120,0,255,141,255,255:10:3:0:6,1,2,1:7,3,0:119,0,261,145,262,269:119 0/0:0,15,212,15,212,212:5:0:0:5,0,0,0:5,0,0:0,15,219,19,220,227:14 0/0:0,15,243,15,243,243:5:0:0:4,1,0,0:5,0,0:0,15,250,19,251,258:14 1/1:40,9,0,40,9,40:3:3:0:0,0,3,0:0,3,0:34,4,2,39,12,50:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 0/1:54,0,54,57,57,111:2:1:0:1,0,1,0:1,1,0:53,0,60,61,64,125:51 0/0:0,9,139,9,139,139:3:0:0:3,0,0,0:3,0,0:0,10,146,14,147,154:8 1/1:243,15,0,243,15,243:5:5:0:0,0,4,1:0,5,0:236,9,0,241,16,251:8 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:55,1,0,58,3,57:2:1:0:1,0,1,0:1,1,0:54,1,7,62,11,72:5 0/0:0,9,170,9,170,170:3:0:0:1,2,0,0:3,0,0:0,10,177,14,178,185:8 0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:60,3,0,60,3,60:1:1:0:0,0,0,1:0,1,0:58,2,5,63,9,73:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,15,206,15,206,206:5:0:0:4,1,0,0:5,0,0:0,15,213,19,214,221:14 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/2:134,105,96,40,0,34:5:5:0:0,0,5,0:0,3,2:125,97,95,36,0,41:35 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:58,6,0,58,6,58:2:2:0:0,0,2,0:0,2,0:54,3,3,59,10,69:2 0/0:0,6,102,6,102,102:2:0:0:1,1,0,0:2,0,0:1,7,110,11,111,118:6 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:98,0,88,104,94,191:4:2:0:1,1,2,0:2,2,0:97,0,94,108,101,205:92 0/2:35,35,35,3,3,0:1:1:0:0,0,1,0:0,0,1:29,30,36,2,5,9:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/2:45,45,45,3,3,0:1:1:0:0,0,1,0:0,0,1:39,40,46,2,5,9:4 0/0:0,3,60,3,60,60:1:0:0:0,1,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,120,6,120,120:2:0:0:1,1,0,0:2,0,0:1,7,128,11,129,136:6 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 diff --git a/vcf/test/gatk_26_meta.vcf b/vcf/test/gatk_26_meta.vcf new file mode 100644 index 0000000..1dd2e56 --- /dev/null +++ b/vcf/test/gatk_26_meta.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.1 +##GATKCommandLine= +##GATKCommandLine= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT diff --git a/vcf/test/gonl.chr20.release4.gtc.vcf b/vcf/test/gonl.chr20.release4.gtc.vcf new file mode 100644 index 0000000..03588bf --- /dev/null +++ b/vcf/test/gonl.chr20.release4.gtc.vcf @@ -0,0 +1,120 @@ +##fileformat=VCFv4.1 +##ApplyRecalibration="analysis_type=ApplyRecalibration input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/target/gpfs2/gcc/resources/hg19/indices/human_g1k_v37.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false input=[(RodBinding name=input source=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/results/snps/UG_raw_biallelic/gonl.biallelic.vcf)] recal_file=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/intermediate/snps/vqsr_1kg_phase1/gonl.biallelic.vcf.1kg_phase1.2.recal tranches_file=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/intermediate/snps/vqsr_1kg_phase1/gonl.biallelic.vcf.1kg_phase1.2.tranches out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub ts_filter_level=99.5 ignore_filter=null mode=SNP filter_mismatching_base_and_quals=false" +##CombineVariants="analysis_type=CombineVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:123000001-126000000] excludeIntervals=null reference_sequence=/humgen/1kg/reference/human_g1k_v37.fasta rodBind=[/humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AFR/AFR.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/ASN/ASN.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AMR/AMR.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/EUR/EUR.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AFR.admix/AFR.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/ASN.admix/ASN.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/AMR.admix/AMR.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/EUR.admix/EUR.admix.phase1.chr1.42.raw.snps.vcf, /humgen/1kg/processing/production_wgs_phase1/consensus_wgs/v2b/calls/chr1/ALL/ALL.phase1.chr1.42.raw.snps.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub genotypemergeoption=PRIORITIZE filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED rod_priority_list=ALL,AFR.admix,AMR.admix,EUR.admix,ASN.admix,AFR,AMR,EUR,ASN printComplexMerges=false filteredAreUncalled=false minimalVCF=false setKey=pop assumeIdenticalSamples=false minimumN=1 masterMerge=false mergeInfoWithMaxAC=true" +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##INFO= +##INFO= +##SelectVariants="analysis_type=SelectVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:1-5000001] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/target/gpfs2/gcc/home/lfrancioli/gonl/resources/hg19/indices/human_g1k_v37.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/target/gpfs2/gcc/home/lfrancioli/results/trio-analysis/ug_initial/gonl.1_1-5000001.vcf) discordance=(RodBinding name= source=UNBOUND) concordance=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sample_name=[] sample_expressions=null sample_file=null exclude_sample_name=[] exclude_sample_file=[] select_expressions=[] excludeNonVariants=false excludeFiltered=false restrictAllelesTo=BIALLELIC keepOriginalAC=false mendelianViolation=false mendelianViolationQualThreshold=0.0 select_random_number=0 select_random_fraction=0.0 remove_fraction_genotypes=0.0 selectTypeToInclude=[] keepIDs=null outMVFile=null filter_mismatching_base_and_quals=false" +##SetFilterPASS="analysis_type=SetFilterPASS input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/target/gpfs2/gcc/resources/hg19/indices/human_g1k_v37.fa nonDeterministicRandomSeed=false disableRandomization=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 enable_experimental_downsampling=false baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/target/gpfs2/gcc/home/lfrancioli/gonl/projects/trio-analysis/intermediate/snps/vqsr_1kg_phase1/gonl.biallelic.vqsr_1kg_phase1.2.99.5.vcf) sites=[(RodBinding name=sites source=/target/gpfs2/gcc/home/lfrancioli/resources/1000GP_hg19/EUR.wgs.project_consensus_vqsr2b.20101123.snps.low_coverage.sites.vcf)] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub filter_mismatching_base_and_quals=false" +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/humgen/1kg/phase1_cleaned_bams/bams/chr1/CHB.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/CHS.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/CLM.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/JPT.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/MXL.phase1.chr1.42.cleaned.bam, /humgen/1kg/phase1_cleaned_bams/bams/chr1/PUR.phase1.chr1.42.cleaned.bam] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:123000001-126000000] excludeIntervals=null reference_sequence=/humgen/1kg/reference/human_g1k_v37.fasta rodBind=[/humgen/1kg/processing/production_wgs_phase1/consensus/ALL.phase1.wgs.unionBC1.pass.sites.vcf, /humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION nonDeterministicRandomSeed=false DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=50 baq=CALCULATE_AS_NECESSARY baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=SNP p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=GENOTYPE_GIVEN_ALLELES output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=4.0 standard_min_confidence_threshold_for_emitting=4.0 noSLOD=false assume_single_sample_reads=null abort_at_too_much_coverage=-1 min_base_quality_score=17 min_mapping_quality_score=20 max_deletion_fraction=0.05 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 doContextDependentGapPenalties=true getGapPenaltiesFromData=false indel_recal_file=indel.recal_data.csv indelDebug=false dovit=false GSA_PRODUCTION_ONLY=false exactCalculation=LINEAR_EXPERIMENTAL ignoreSNPAlleles=false output_all_callable_bases=false genotype=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[]" +##VariantAnnotator="analysis_type=VariantAnnotator input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=[1:1-5000001] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/target/gpfs2/gcc/home/lfrancioli/gonl/resources/hg19/indices/human_g1k_v37.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[/target/gpfs2/gcc/home/lfrancioli/gonl/resources/UnifiedGenotyper/GoNL.ped] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/target/gpfs2/gcc/home/lfrancioli/results/trio-analysis/snps/gonl.1_1-5000001.biallelic.vcf) snpEffFile=(RodBinding name= source=UNBOUND) dbsnp=(RodBinding name= source=UNBOUND) comp=[] resource=[] out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub annotation=[TransmissionDisequilibriumTest, InbreedingCoeff, AlleleDosage, ChromosomeCounts] excludeAnnotation=[] group=[] expression=[] useAllAnnotations=false list=false vcfContainsOnlyIndels=false MendelViolationGenotypeQualityThreshold=0.0 requireStrictAlleleMatch=false filter_mismatching_base_and_quals=false" +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##reference=file:///target/gpfs2/gcc/resources/hg19/indices/human_g1k_v37.fa +##source=SelectVariants +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +20 60309 . G T 991.76 PASS AC=4;AN=996;GTC=494,4,0 +20 60573 . T C 124.17 PASS AC=1;AN=996;GTC=497,1,0 +20 60828 . T G 807.71 PASS AC=6;AN=996;GTC=492,6,0 +20 61098 rs6078030 C T 51254.56 PASS AC=225;AN=996;GTC=304,163,31 +20 61270 . A C 2414.84 PASS AC=20;AN=992;GTC=476,20,0 +20 61289 . A C 419.41 TruthSensitivityTranche99.70to99.80 AC=71;AN=960;GTC=411,67,2 +20 61682 . C T 12.27 PASS AC=1;AN=996;GTC=497,1,0 diff --git a/vcf/test/info-type-character.vcf b/vcf/test/info-type-character.vcf new file mode 100644 index 0000000..77b24a7 --- /dev/null +++ b/vcf/test/info-type-character.vcf @@ -0,0 +1,8 @@ +##fileformat=VCFv4.1 +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample +chr1 100 id1 G A . . FLOAT_1=123.456;CHAR_1=Y;FLOAT_N=123.456;CHAR_N=Y GT 0/1 diff --git a/vcf/test/issue-140-file1.vcf b/vcf/test/issue-140-file1.vcf new file mode 100644 index 0000000..8ee2de2 --- /dev/null +++ b/vcf/test/issue-140-file1.vcf @@ -0,0 +1,35 @@ +##fileformat=VCFv4.1 +##source=VarScan2 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL +chr1 10 . G GGT . PASS DP=91;SS=1;SSC=2;GPV=3.0109E-23;SPV=5.8324E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:36:13:22:62.86%:2,11,1,21 +chr1 20 . GT G . PASS DP=77;SS=1;SSC=2;GPV=2.4504E-29;SPV=6.0772E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:28:5:22:81.48%:0,5,1,21 +chr2 30 . AC A . PASS DP=22;SS=1;SSC=7;GPV=1.3117E-10;SPV=1.9481E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:10:2:8:80%:0,2,0,8 +chr2 40 . AAAC A . PASS DP=42;SS=1;SSC=12;GPV=7.3092E-18;SPV=6.278E-2 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:13:4:9:69.23%:4,0,9,0 +chr3 50 . TC T . PASS DP=41;SS=1;SSC=2;GPV=9.8874E-23;SPV=5.3659E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:22:1:21:95.45%:1,0,15,6 +chr10 60 . T TTAA . PASS DP=27;SS=1;SSC=2;GPV=1.4382E-14;SPV=5.5556E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:12:0:12:100%:0,0,0,12 +chr10 70 . C CTG . PASS DP=40;SS=1;SSC=7;GPV=3.6006E-9;SPV=1.9922E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:10:6:4:40%:0,6,0,4 +chr11 80 . AGTT A . PASS DP=86;SS=1;SSC=0;GPV=4.1554E-34;SPV=8.5795E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:32:4:28:87.5%:1,3,0,28 +chr11 90 . GA G . PASS DP=41;SS=1;SSC=3;GPV=1.9197E-12;SPV=4.089E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:15:5:9:64.29%:1,4,0,9 +chr20 100 . TTTTG T . PASS DP=23;SS=1;SSC=1;GPV=2.9149E-12;SPV=6.5217E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:8:0:8:100%:0,0,7,1 +chr20 110 . GA G . PASS DP=83;SS=1;SSC=13;GPV=1E0;SPV=4.0806E-2 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:35:5:28:84.85%:4,1,12,16 +chrX 120 . G GA . PASS DP=61;SS=1;SSC=1;GPV=1.6967E-25;SPV=7.0485E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:22:3:19:86.36%:0,3,1,18 +chrX 130 . T TAA . PASS DP=19;SS=1;SSC=1;GPV=1.1285E-5;SPV=7.2172E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:8:2:5:71.43%:0,2,0,5 +chrY 140 . G GTTT . PASS DP=62;SS=1;SSC=0;GPV=3.4914E-15;SPV=9.571E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:36:2:19:90.48%:1,1,15,4 +chrY 150 . T TGAAG . PASS DP=28;SS=1;SSC=12;GPV=1.7583E-10;SPV=5.5797E-2 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:13:5:8:61.54%:4,1,2,6 +chrM 160 . G GTTT . PASS DP=62;SS=1;SSC=0;GPV=3.4914E-15;SPV=9.571E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:36:2:19:90.48%:1,1,15,4 +chrM 170 . T TGAAG . PASS DP=28;SS=1;SSC=12;GPV=1.7583E-10;SPV=5.5797E-2 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:13:5:8:61.54%:4,1,2,6 diff --git a/vcf/test/issue-140-file2.vcf b/vcf/test/issue-140-file2.vcf new file mode 100644 index 0000000..7852133 --- /dev/null +++ b/vcf/test/issue-140-file2.vcf @@ -0,0 +1,34 @@ +##fileformat=VCFv4.1 +##source=VarScan2 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL +chr1 10 . G GGT . PASS DP=91;SS=1;SSC=2;GPV=3.0109E-23;SPV=5.8324E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:36:13:22:62.86%:2,11,1,21 +chr1 20 . GT G . PASS DP=77;SS=1;SSC=2;GPV=2.4504E-29;SPV=6.0772E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:28:5:22:81.48%:0,5,1,21 +chr2 30 . AC A . PASS DP=22;SS=1;SSC=7;GPV=1.3117E-10;SPV=1.9481E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:10:2:8:80%:0,2,0,8 +chr2 41 . AAAC A . PASS DP=42;SS=1;SSC=12;GPV=7.3092E-18;SPV=6.278E-2 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:13:4:9:69.23%:4,0,9,0 +chr10 60 . T TTAA . PASS DP=27;SS=1;SSC=2;GPV=1.4382E-14;SPV=5.5556E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:12:0:12:100%:0,0,0,12 +chr10 70 . C CTG . PASS DP=40;SS=1;SSC=7;GPV=3.6006E-9;SPV=1.9922E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:10:6:4:40%:0,6,0,4 +chr11 80 . AGTT A . PASS DP=86;SS=1;SSC=0;GPV=4.1554E-34;SPV=8.5795E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:32:4:28:87.5%:1,3,0,28 +chr11 91 . GA G . PASS DP=41;SS=1;SSC=3;GPV=1.9197E-12;SPV=4.089E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:15:5:9:64.29%:1,4,0,9 +chr20 100 . TTTTG T . PASS DP=23;SS=1;SSC=1;GPV=2.9149E-12;SPV=6.5217E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:8:0:8:100%:0,0,7,1 +chr20 110 . GA G . PASS DP=83;SS=1;SSC=13;GPV=1E0;SPV=4.0806E-2 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:35:5:28:84.85%:4,1,12,16 +chrX 120 . G GA . PASS DP=61;SS=1;SSC=1;GPV=1.6967E-25;SPV=7.0485E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:22:3:19:86.36%:0,3,1,18 +chrX 130 . T TAA . PASS DP=19;SS=1;SSC=1;GPV=1.1285E-5;SPV=7.2172E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:8:2:5:71.43%:0,2,0,5 +chrY 140 . G GTTT . PASS DP=62;SS=1;SSC=0;GPV=3.4914E-15;SPV=9.571E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:36:2:19:90.48%:1,1,15,4 +chrY 149 . T TGAAG . PASS DP=28;SS=1;SSC=12;GPV=1.7583E-10;SPV=5.5797E-2 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:13:5:8:61.54%:4,1,2,6 +chrM 160 . G GTTT . PASS DP=62;SS=1;SSC=0;GPV=3.4914E-15;SPV=9.571E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:36:2:19:90.48%:1,1,15,4 +chrM 170 . T TGAAG . PASS DP=28;SS=1;SSC=12;GPV=1.7583E-10;SPV=5.5797E-2 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:13:5:8:61.54%:4,1,2,6 diff --git a/vcf/test/issue-140-file3.vcf b/vcf/test/issue-140-file3.vcf new file mode 100644 index 0000000..754f6b6 --- /dev/null +++ b/vcf/test/issue-140-file3.vcf @@ -0,0 +1,25 @@ +##fileformat=VCFv4.1 +##source=VarScan2 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL +chr3 50 . TC T . PASS DP=41;SS=1;SSC=2;GPV=9.8874E-23;SPV=5.3659E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:22:1:21:95.45%:1,0,15,6 +chr10 60 . T TTAA . PASS DP=27;SS=1;SSC=2;GPV=1.4382E-14;SPV=5.5556E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:12:0:12:100%:0,0,0,12 +chr10 70 . C CTG . PASS DP=40;SS=1;SSC=7;GPV=3.6006E-9;SPV=1.9922E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:10:6:4:40%:0,6,0,4 +chr11 80 . AGTT A . PASS DP=86;SS=1;SSC=0;GPV=4.1554E-34;SPV=8.5795E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:32:4:28:87.5%:1,3,0,28 +chr11 90 . GA G . PASS DP=41;SS=1;SSC=3;GPV=1.9197E-12;SPV=4.089E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/1:.:15:5:9:64.29%:1,4,0,9 +chr20 100 . TTTTG T . PASS DP=23;SS=1;SSC=1;GPV=2.9149E-12;SPV=6.5217E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:8:0:8:100%:0,0,7,1 +chrX 120 . G GA . PASS DP=61;SS=1;SSC=1;GPV=1.6967E-25;SPV=7.0485E-1 GT:GQ:DP:RD:AD:FREQ:DP4 1/1:.:22:3:19:86.36%:0,3,1,18 diff --git a/vcf/test/issue-201.vcf.gz b/vcf/test/issue-201.vcf.gz new file mode 100644 index 0000000..2af09fa Binary files /dev/null and b/vcf/test/issue-201.vcf.gz differ diff --git a/vcf/test/issue-201.vcf.gz.tbi b/vcf/test/issue-201.vcf.gz.tbi new file mode 100644 index 0000000..23ffe4d Binary files /dev/null and b/vcf/test/issue-201.vcf.gz.tbi differ diff --git a/vcf/test/issue-214.vcf b/vcf/test/issue-214.vcf new file mode 100644 index 0000000..dbc5fac --- /dev/null +++ b/vcf/test/issue-214.vcf @@ -0,0 +1,32 @@ +##fileformat=VCFv4.1 +##ALT= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 Sample2 +1 456904 . T C,* 6162.77 . AC=1,1;AF=8.333e-03,8.333e-03;AN=120;DP=7693;FS=0.000;MLEAC=1,1;MLEAF=8.333e-03,8.333e-03;MQ=60.00;QD=31.36;SOR=0.976 GT:AD:DP:GQ:PL 0:106,0,0:106:99:0,1800,1800 0:110,0,0:110:99:0,1800,1800 +1 456940 . * C,T 6162.77 . AC=1,1;AF=8.333e-03,8.333e-03;AN=120;DP=7693;FS=0.000;MLEAC=1,1;MLEAF=8.333e-03,8.333e-03;MQ=60.00;QD=31.36;SOR=0.976 GT:AD:DP:GQ:PL 0:106,0,0:106:99:0,1800,1800 0:110,0,0:110:99:0,1800,1800 diff --git a/vcf/test/issue-254.vcf b/vcf/test/issue-254.vcf new file mode 100644 index 0000000..c17262d --- /dev/null +++ b/vcf/test/issue-254.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.1 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=1000GenomesPilot-NCBI36 +##phasing=partial +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +21 4242421 . T A 30 . . GT:AO 0|0:0.1 0|1:0.2 0/0:0.3 diff --git a/vcf/test/metadata-whitespace.vcf b/vcf/test/metadata-whitespace.vcf new file mode 100644 index 0000000..c163f9a --- /dev/null +++ b/vcf/test/metadata-whitespace.vcf @@ -0,0 +1,56 @@ +##fileformat=VCFv4.2 +##FILTER= +##samtoolsVersion=1.0-17-gfaf4dd6+htslib-1.0-11-g830ea73 +##samtoolsCommand=samtools mpileup -u -t DP,DPR,DV,DP4,INFO/DPR,SP -f /data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa -r KB704451:0004153102-0004172483 huge_list_of_bam_files_removed +##reference=file:///data/archive/reference/Anopheles-arabiensis-Dongola_SCAFFOLDS_AaraD1.fa +##contig= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##bcftools_callVersion=1.0-55-gc661821+htslib-1.0-11-g830ea73 +##bcftools_callCommand=call -m -vM -f GQ,GP +##SnpSiftVersion="SnpSift 3.6c (build 2014-05-20), by Pablo Cingolani" +##SnpSiftCmd="SnpSift varType - " +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT LUPI059 MINE001 OKJ042 LUPI001 LUPI007 LUPI024 LUPI056 LUPI071 LUPI074 LUPI082 MINE040 MINE100 MINE101 MINE105 MINE111 OKJ017 OKJ045 OKJ070 SAGA066 SAGA107 SAGA131 SAGA133 SAGA134 SAGA141 2012L_LUPI_002 2012L_LUPI_015 2012L_LUPI_017 2012L_LUPI_018 2012L_LUPI_035 2012L_LUPI_062 2012L_LUPI_065 2012L_LUPI_077 2012L_LUPI_083 2012L_LUPI_116 2012L_LUPI_013 2012L_LUPI_041 2012L_LUPI_068 2012L_LUPI_096 2012L_LUPI_098 2012L_LUPI_101 2012L_LUPI_103 2012_LUPI_156 2012_LUPI_157 2012_LUPI_161 2012_LUPI_171 2012_LUPI_173 2012_LUPI_180 2012L_LUPI_010 2012L_LUPI_012 2012L_LUPI_021 2012L_LUPI_045 2012L_LUPI_047 2012L_LUPI_060 2012L_LUPI_061 2012L_LUPI_067 2012_LUPI_125 2012_LUPI_129 2012_LUPI_146 2012_LUPI_178 2012_LUPI_211 2012_LUPI_277 2012_LUPI_278 2012_LUPI_279 2012_LUPI_284 +KB704451 4157846 . N A,C 167.0 . DP=10;VDB=1.17174e-06;SGB=1.26353;MQ0F=0;DPR=0,6,4;AC=10,4;AN=14;DP4=0,0,10,0;MQ=60;SNP;VARTYPE=SNP,SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 1/2:74,23,14,57,0,54:4:4:0:0,0,4,0:0,3,1:144,56,16,90,0,57:16 1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:26,3,0,26,3,26:1:1:0:0,0,1,0:0,1,0:96,36,2,60,3,29:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/2:26,26,26,3,3,0:1:1:0:0,0,1,0:0,0,1:95,58,28,36,2,3:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 +KB704451 4157870 . T C 275.0 . DP=243;VDB=0.00023935;SGB=29.4468;RPB=0.0368658;MQB=0.979612;MQSB=0.268441;BQB=0.99223;MQ0F=0;DPR=213,19;ICB=0.85092;HOB=0.0287274;AC=6;AN=118;DP4=201,12,19,1;MQ=53;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,66,255:22:0:0:20,2,0,0:22,0:0,75,279:75 0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,21,144:21 1/1:193,36,0:12:12:0:0,0,12,0:0,12:168,20,0:20 0/0:0,9,95:3:0:0:3,0,0,0:3,0:0,18,119:18 0/1:78,0,110:7:3:0:3,1,3,0:4,3:68,0,125:68 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,12,64:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,90:3:0:0:3,0,0,0:3,0:0,18,114:18 0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21 0/0:0,9,97:3:0:0:3,0,0,0:3,0:0,18,121:18 0/0:0,15,122:5:0:0:5,0,0,0:5,0:0,24,146:24 0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,15,95:15 0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,15,82:15 0/0:0,18,155:6:0:0:6,0,0,0:6,0:0,27,179:27 0/0:0,3,39:1:0:0:1,0,0,0:1,0:0,12,63:12 0/1:35,3,0:1:1:0:0,0,1,0:0,1:23,0,12:12 0/0:0,9,87:3:0:0:3,0,0,0:3,0:0,18,111:18 0/1:47,0,104:6:2:0:4,0,2,0:4,2:37,0,119:37 0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,30,184:30 0/0:0,6,35:2:0:0:2,0,0,0:2,0:0,15,59:15 0/0:0,12,98:4:0:0:4,0,0,0:4,0:0,21,122:21 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,15,94:15 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,12,122:4:0:0:4,0,0,0:4,0:0,21,146:21 0/0:0,3,29:1:0:0:0,1,0,0:1,0:0,12,53:12 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,15,96:15 0/0:0,9,76:3:0:0:3,0,0,0:3,0:0,18,100:18 0/0:0,15,136:5:0:0:5,0,0,0:5,0:0,24,160:24 0/0:0,30,182:10:0:0:10,0,0,0:10,0:0,39,206:39 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,15,90:15 0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,15,93:15 0/0:0,27,152:9:0:0:9,0,0,0:9,0:0,36,176:36 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,15,118:15 0/0:0,21,195:7:0:0:5,2,0,0:7,0:0,30,219:30 0/0:0,9,92:3:0:0:2,1,0,0:3,0:0,18,116:18 0/1:33,0,18:2:1:0:0,1,1,0:1,1:23,0,33:23 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,12,59:12 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,18,115:18 0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,30,212:10:0:0:9,1,0,0:10,0:0,39,236:39 0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,195:7:0:0:7,0,0,0:7,0:0,30,219:30 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,97:3:0:0:2,1,0,0:3,0:0,18,121:18 0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,18,117:18 0/0:0,9,116:3:0:0:3,0,0,0:3,0:0,18,140:18 0/0:0,6,71:2:0:0:1,1,0,0:2,0:0,15,95:15 0/0:0,9,89:3:0:0:3,0,0,0:3,0:0,18,113:18 0/0:0,33,175:11:0:0:11,0,0,0:11,0:0,42,199:42 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,15,87:15 0/0:0,21,145:7:0:0:7,0,0,0:7,0:0,30,169:30 0/0:0,3,39:1:0:0:0,1,0,0:1,0:0,12,63:12 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,18,108:18 0/0:0,3,13:1:0:0:1,0,0,0:1,0:0,12,37:12 0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,12,47:12 0/0:0,12,106:4:0:0:4,0,0,0:4,0:0,21,130:21 0/0:0,3,36:1:0:0:1,0,0,0:1,0:0,12,60:12 0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,18,118:18 0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,15,91:15 0/0:2,5,27:2:1:0:1,0,0,1:1,0:0,12,49:12 +KB704451 4157877 . G A 999.0 . DP=250;VDB=6.58963e-09;SGB=31.659;RPB=0.0227135;MQB=0.410318;MQSB=0.139343;BQB=0.0767891;MQ0F=0;DPR=188,48;ICB=0.990841;HOB=0.00761276;AC=17;AN=118;DP4=176,12,45,3;MQ=55;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/1:159,0,202:22:9:0:12,1,8,1:13,9:154,0,212:127 0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,16,134:16 0/0:0,51,207:17:0:0:17,0,0,0:17,0:0,55,221:55 0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13 0/1:123,0,61:8:5:0:3,0,4,1:3,5:118,0,71:71 0/0:0,3,38:1:0:0:0,1,0,0:1,0:0,7,52:7 0/1:68,0,29:3:2:0:1,0,1,1:1,2:63,0,39:39 0/0:0,6,69:2:0:0:2,0,0,0:2,0:0,10,83:10 0/0:0,12,119:4:0:0:4,0,0,0:4,0:0,16,133:16 0/1:34,0,34:2:1:0:1,0,1,0:1,1:29,0,44:29 0/1:24,0,99:5:1:0:4,0,1,0:4,1:19,0,109:19 0/1:34,0,28:2:1:0:1,0,1,0:1,1:29,0,38:29 0/0:0,6,58:2:0:0:2,0,0,0:2,0:0,10,72:10 0/1:122,0,57:7:4:0:3,0,4,0:3,4:117,0,67:67 0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,7,55:7 0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,7,43:7 0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16 0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,22,158:22 0/1:118,0,63:8:5:0:3,0,5,0:3,5:113,0,73:73 0/0:0,6,34:2:0:0:2,0,0,0:2,0:0,10,48:10 0/0:0,15,131:5:0:0:5,0,0,0:5,0:0,19,145:19 0/0:0,6,72:2:0:0:2,0,0,0:2,0:0,10,86:10 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,10,103:10 1/1:124,12,0:4:4:0:0,0,4,0:0,4:112,4,2:4 0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,7,48:7 0/0:0,6,73:2:0:0:2,0,0,0:2,0:0,10,87:10 0/0:0,9,91:3:0:0:3,0,0,0:3,0:0,13,105:13 0/0:0,15,138:5:0:0:5,0,0,0:5,0:0,19,152:19 0/0:0,30,179:10:0:0:10,0,0,0:10,0:0,34,193:34 0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,10,79:10 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,10,84:10 0/0:0,27,155:9:0:0:9,0,0,0:9,0:0,31,169:31 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,94:2:0:0:2,0,0,0:2,0:0,10,108:10 0/0:0,15,161:5:0:0:3,2,0,0:5,0:0,19,175:19 0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,10,79:10 0/1:36,3,0:1:1:0:0,0,1,0:0,1:29,0,7:7 0/0:0,9,93:3:0:0:3,0,0,0:3,0:0,13,107:13 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,7,48:7 0/1:87,0,137:10:4:0:5,1,4,0:6,4:82,0,147:82 0/1:57,0,26:3:2:0:1,0,2,0:1,2:52,0,36:35 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:139,0,73:7:4:0:3,0,4,0:3,4:134,0,83:83 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,75:2:0:0:1,1,0,0:2,0:0,10,89:10 0/1:90,9,0:3:3:0:0,0,3,0:0,3:79,2,3:3 0/0:0,9,98:3:0:0:3,0,0,0:3,0:0,13,112:13 0/0:0,6,72:2:0:0:1,1,0,0:2,0:0,10,86:10 0/0:0,9,88:3:0:0:3,0,0,0:3,0:0,13,102:13 0/0:0,33,173:11:0:0:11,0,0,0:11,0:0,37,187:37 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,10,71:10 0/0:0,15,125:5:0:0:5,0,0,0:5,0:0,19,139:19 0/0:0,6,61:2:0:0:1,1,0,0:2,0:0,10,75:10 0/1:24,0,51:3:1:0:2,0,1,0:2,1:19,0,61:19 0/0:0,3,30:1:0:0:1,0,0,0:1,0:0,7,44:7 0/0:0,3,23:1:0:0:1,0,0,0:1,0:0,7,37:7 0/0:0,12,105:4:0:0:4,0,0,0:4,0:0,16,119:16 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,7,49:7 0/1:25,0,61:3:1:0:2,0,1,0:2,1:20,0,71:20 0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,10,81:10 0/0:0,3,8:1:0:0:0,1,0,0:1,0:0,7,22:7 +KB704451 4157907 . A C 278.0 . DP=295;VDB=0.241276;SGB=26.7514;RPB=0.676983;MQB=0.997838;MQSB=0.136536;BQB=0.45683;MQ0F=0;DPR=264,15;ICB=0.00518819;HOB=0.00237812;AC=4;AN=116;DP4=233,31,14,1;MQ=59;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,90,255:30:0:0:25,5,0,0:30,0:0,101,283:101 0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41 0/1:157,0,188:18:8:0:10,0,7,1:10,8:145,0,205:127 0/1:75,0,90:5:2:0:2,1,2,0:3,2:63,0,107:63 0/0:0,30,201:10:0:0:9,1,0,0:10,0:0,41,229:41 0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,17,108:17 0/0:0,12,134:4:0:0:3,1,0,0:4,0:0,23,162:23 0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,21,160:7:0:0:7,0,0,0:7,0:0,32,188:32 0/0:0,12,135:4:0:0:2,2,0,0:4,0:0,23,163:23 0/0:0,15,148:5:0:0:5,0,0,0:5,0:0,26,176:26 0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20 0/1:70,0,19:4:3:0:1,0,3,0:1,3:58,0,36:36 0/0:0,24,246:8:0:0:7,1,0,0:8,0:0,35,274:35 0/0:0,18,147:6:0:0:6,0,0,0:6,0:0,29,175:29 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,82:3:0:0:3,0,0,0:3,0:0,20,110:20 0/1:59,0,62:5:2:0:3,0,2,0:3,2:47,0,79:47 0/0:0,33,192:11:0:0:11,0,0,0:11,0:0,44,220:44 0/0:0,9,94:3:0:0:3,0,0,0:3,0:0,20,122:20 0/0:0,24,198:8:0:0:7,1,0,0:8,0:0,35,226:35 0/0:0,12,120:4:0:0:4,0,0,0:4,0:0,23,148:23 0/0:0,15,165:5:0:0:3,2,0,0:5,0:0,26,193:26 0/0:0,24,172:8:0:0:8,0,0,0:8,0:0,35,200:35 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,14,59:14 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,6,66:2:0:0:2,0,0,0:2,0:0,17,94:17 0/0:0,15,118:5:0:0:5,0,0,0:5,0:0,26,146:26 0/0:0,33,178:11:0:0:11,0,0,0:11,0:0,44,206:44 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,9,74:3:0:0:2,1,0,0:3,0:0,20,102:20 0/0:0,21,168:7:0:0:5,2,0,0:7,0:0,32,196:32 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,14,68:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,17,117:17 0/0:0,12,130:4:0:0:2,2,0,0:4,0:0,23,158:23 0/0:0,9,78:3:0:0:1,2,0,0:3,0:0,20,106:20 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,17,93:17 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,14,63:14 0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,17,83:17 0/0:0,3,29:1:0:0:1,0,0,0:1,0:0,14,57:14 0/0:0,36,194:12:0:0:11,1,0,0:12,0:0,47,222:47 0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,23,138:23 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,182:6:0:0:6,0,0,0:6,0:0,29,210:29 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,34:1:0:0:0,1,0,0:1,0:0,14,62:14 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,9,107:3:0:0:3,0,0,0:3,0:0,20,135:20 0/0:0,6,67:2:0:0:1,1,0,0:2,0:0,17,95:17 0/0:0,6,68:2:0:0:2,0,0,0:2,0:0,17,96:17 0/0:0,27,184:9:0:0:9,0,0,0:9,0:0,38,212:38 0/0:0,9,85:3:0:0:3,0,0,0:3,0:0,20,113:20 0/0:0,12,111:4:0:0:4,0,0,0:4,0:0,23,139:23 0/0:0,6,77:2:0:0:1,1,0,0:2,0:0,17,105:17 0/0:0,12,108:4:0:0:3,1,0,0:4,0:0,23,136:23 0/0:0,3,27:1:0:0:1,0,0,0:1,0:0,14,55:14 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,17,92:17 0/0:0,3,33:1:0:0:1,0,0,0:1,0:0,14,61:14 0/0:0,12,125:4:0:0:4,0,0,0:4,0:0,23,153:23 0/0:0,9,98:3:0:0:2,1,0,0:3,0:0,20,126:20 0/0:0,6,46:2:0:0:2,0,0,0:2,0:0,17,74:17 +KB704451 4157909 . T G 278.0 . DP=295;VDB=0.184881;SGB=22.7413;RPB=0.646301;MQB=0.998034;MQSB=0.200514;BQB=0.321842;MQ0F=0;DPR=247,15;ICB=0.00558284;HOB=0.00255102;AC=4;AN=112;DP4=218,29,15,1;MQ=59;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,87,255:29:0:0:24,5,0,0:29,0:0,97,282:97 0/0:0,27,183:9:0:0:9,0,0,0:9,0:0,37,210:37 0/1:156,0,167:19:8:0:11,0,7,1:11,8:145,0,183:127 0/1:75,0,107:5:2:0:2,1,2,0:3,2:64,0,123:64 0/0:0,27,191:9:0:0:8,1,0,0:9,0:0,37,218:37 0/0:0,6,80:2:0:0:1,1,0,0:2,0:0,16,107:16 0/0:0,12,119:4:0:0:3,1,0,0:4,0:0,22,146:22 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,15,126:5:0:0:5,0,0,0:5,0:0,25,153:25 0/0:0,12,132:4:0:0:2,2,0,0:4,0:0,22,159:22 0/0:0,12,133:4:0:0:4,0,0,0:4,0:0,22,160:22 0/0:0,6,67:2:0:0:2,0,0,0:2,0:0,16,94:16 0/1:79,9,0:3:3:0:0,0,3,0:0,3:60,0,8:8 0/0:0,21,230:7:0:0:6,1,0,0:7,0:0,31,257:31 0/0:0,18,144:6:0:0:6,0,0,0:6,0:0,28,171:28 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,16,80:16 0/1:59,0,64:5:2:0:3,0,2,0:3,2:48,0,80:48 0/0:0,33,180:11:0:0:11,0,0,0:11,0:0,43,207:43 0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22 0/0:0,24,190:8:0:0:7,1,0,0:8,0:0,34,217:34 0/0:0,12,110:4:0:0:4,0,0,0:4,0:0,22,137:22 0/0:0,15,164:5:0:0:3,2,0,0:5,0:0,25,191:25 0/0:0,24,161:8:0:0:8,0,0,0:8,0:0,34,188:34 0/0:0,3,32:1:0:0:0,1,0,0:1,0:0,13,59:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,6,65:2:0:0:2,0,0,0:2,0:0,16,92:16 0/0:0,15,121:5:0:0:5,0,0,0:5,0:0,25,148:25 0/0:0,30,174:10:0:0:10,0,0,0:10,0:0,40,201:40 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,6,63:2:0:0:2,0,0,0:2,0:0,16,90:16 0/0:0,21,164:7:0:0:5,2,0,0:7,0:0,31,191:31 0/0:0,3,37:1:0:0:1,0,0,0:1,0:0,13,64:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,89:2:0:0:2,0,0,0:2,0:0,16,116:16 0/0:0,12,128:4:0:0:2,2,0,0:4,0:0,22,155:22 0/0:0,9,94:3:0:0:1,2,0,0:3,0:0,19,121:19 0/0:0,6,63:2:0:0:1,1,0,0:2,0:0,16,90:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,16,83:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,36,193:12:0:0:11,1,0,0:12,0:0,46,220:46 0/0:0,12,108:4:0:0:4,0,0,0:4,0:0,22,135:22 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,18,168:6:0:0:6,0,0,0:6,0:0,28,195:28 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,13,58:13 0/0:0,6,47:2:0:0:2,0,0,0:2,0:0,16,74:16 0/0:8,11,65:2:1:0:1,0,1,0:1,0:0,13,84:13 0/0:0,6,64:2:0:0:1,1,0,0:2,0:0,16,91:16 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,13,61:13 0/0:0,27,177:9:0:0:9,0,0,0:9,0:0,37,204:37 0/0:0,6,50:2:0:0:2,0,0,0:2,0:0,16,77:16 0/0:0,12,101:4:0:0:4,0,0,0:4,0:0,22,128:22 0/0:0,6,65:2:0:0:1,1,0,0:2,0:0,16,92:16 0/0:0,12,100:4:0:0:3,1,0,0:4,0:0,22,127:22 0/0:0,3,31:1:0:0:1,0,0,0:1,0:0,13,58:13 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,84:3:0:0:3,0,0,0:3,0:0,19,111:19 0/0:0,3,32:1:0:0:1,0,0,0:1,0:0,13,59:13 0/0:0,12,104:4:0:0:4,0,0,0:4,0:0,22,131:22 0/0:0,6,66:2:0:0:1,1,0,0:2,0:0,16,93:16 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 +KB704451 4157927 . G A 4.88727 . DP=334;VDB=0.38;SGB=3.29913;RPB=0.454248;MQB=0.970588;MQSB=0.546099;BQB=0.215686;MQ0F=0;DPR=306,2;ICB=0.000310486;HOB=0.000153894;AC=1;AN=114;DP4=265,41,2,0;MQ=59;SNP;VARTYPE=SNP GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/0:0,105,255:35:0:0:31,4,0,0:35,0:0,122,295:122 0/0:0,45,255:15:0:0:14,1,0,0:15,0:0,62,295:62 0/0:0,60,255:20:0:0:17,3,0,0:20,0:0,77,295:77 0/0:0,21,242:7:0:0:6,1,0,0:7,0:0,38,282:38 0/0:0,24,207:8:0:0:6,2,0,0:8,0:0,41,247:41 0/0:0,6,71:2:0:0:2,0,0,0:2,0:0,23,112:23 0/0:0,24,215:8:0:0:6,2,0,0:8,0:0,41,255:41 0/0:0,6,70:2:0:0:1,1,0,0:2,0:0,23,111:23 0/0:0,30,191:10:0:0:10,0,0,0:10,0:0,47,231:47 0/0:0,15,163:5:0:0:3,2,0,0:5,0:0,32,203:32 0/0:0,15,151:5:0:0:5,0,0,0:5,0:0,32,191:32 0/0:0,6,70:2:0:0:2,0,0,0:2,0:0,23,111:23 0/0:0,12,102:4:0:0:4,0,0,0:4,0:0,29,142:29 0/0:0,24,255:8:0:0:6,2,0,0:8,0:0,41,295:41 0/0:0,21,189:7:0:0:7,0,0,0:7,0:0,38,229:38 0/0:0,3,35:1:0:0:0,1,0,0:1,0:0,20,76:20 0/0:0,3,40:1:0:0:1,0,0,0:1,0:0,20,81:20 0/0:0,12,126:4:0:0:3,1,0,0:4,0:0,29,166:29 0/0:0,39,255:13:0:0:12,1,0,0:13,0:0,56,295:56 0/0:0,21,206:7:0:0:6,1,0,0:7,0:0,38,246:38 0/0:0,30,238:10:0:0:8,2,0,0:10,0:0,47,278:47 0/0:0,18,145:6:0:0:6,0,0,0:6,0:0,35,185:35 0/0:0,24,244:8:0:0:6,2,0,0:8,0:0,41,284:41 0/0:0,24,195:8:0:0:7,1,0,0:8,0:0,41,235:41 0/0:0,3,27:1:0:0:0,1,0,0:1,0:0,20,68:20 0/0:0,6,62:2:0:0:2,0,0,0:2,0:0,23,103:23 0/0:0,6,64:2:0:0:2,0,0,0:2,0:0,23,105:23 0/0:0,15,123:5:0:0:5,0,0,0:5,0:0,32,163:32 0/0:0,33,184:11:0:0:11,0,0,0:11,0:0,50,224:50 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,3,35:1:0:0:1,0,0,0:1,0:0,20,76:20 0/0:0,18,165:6:0:0:4,2,0,0:6,0:0,35,205:35 0/0:0,3,38:1:0:0:1,0,0,0:1,0:0,20,79:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,117:3:0:0:3,0,0,0:3,0:0,26,157:26 0/0:0,12,121:4:0:0:2,2,0,0:4,0:0,29,161:29 0/0:0,9,95:3:0:0:1,2,0,0:3,0:0,26,135:26 0/0:0,3,41:1:0:0:1,0,0,0:1,0:0,20,82:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23 0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 0/0:0,36,213:12:0:0:11,1,0,0:12,0:0,53,253:53 0/0:0,15,152:5:0:0:4,1,0,0:5,0:0,32,192:32 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,21,215:7:0:0:6,1,0,0:7,0:0,38,255:38 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:0,20,72:20 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23 0/0:0,9,101:3:0:0:3,0,0,0:3,0:0,26,141:26 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,60:2:0:0:2,0,0,0:2,0:0,23,101:23 0/0:0,27,179:9:0:0:9,0,0,0:9,0:0,44,219:44 0/0:0,9,92:3:0:0:3,0,0,0:3,0:0,26,132:26 0/0:0,12,112:4:0:0:4,0,0,0:4,0:0,29,152:29 0/0:0,6,58:2:0:0:1,1,0,0:2,0:0,23,99:23 0/0:0,15,123:5:0:0:4,1,0,0:5,0:0,32,163:32 0/0:0,3,25:1:0:0:1,0,0,0:1,0:0,20,66:20 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,9,78:3:0:0:3,0,0,0:3,0:0,26,118:26 0/0:0,3,34:1:0:0:1,0,0,0:1,0:0,20,75:20 0/1:47,0,51:3:2:0:1,0,2,0:1,2:29,0,74:29 0/0:0,9,80:3:0:0:2,1,0,0:3,0:0,26,120:26 0/0:0,6,53:2:0:0:2,0,0,0:2,0:0,23,94:23 +KB704451 4157938 . ATTT ATTTT 650.0 . INDEL;IDV=18;IMF=0.428571;DP=361;VDB=0.773794;SGB=32.6744;MQSB=0.993251;MQ0F=0.00831025;DPR=115,60;ICB=0.929833;HOB=0.0258;AC=23;AN=100;DP4=98,17,48,12;MQ=59;INS;VARTYPE=INS GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/1:124,0,7:19:16:0:3,0,13,3:3,16:123,0,13:13 0/1:14,3,0:1:1:0:0,0,1,0:0,1:12,1,5:4 0/0:0,9,55:3:0:0:3,0,0,0:3,0:0,9,62:9 0/0:0,21,146:7:0:0:5,2,0,0:7,0:0,21,153:21 0/0:0,18,126:6:0:0:4,2,0,0:6,0:0,18,133:18 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7 0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9 1/1:46,9,0:3:3:0:0,0,1,2:0,3:40,4,1:4 0/0:0,27,155:9:0:0:8,1,0,0:9,0:0,27,162:27 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7 0/1:24,3,0:1:1:0:0,0,1,0:0,1:22,1,5:5 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 0/0:0,9,84:3:0:0:2,1,0,0:3,0:0,9,91:9 0/0:0,15,111:5:0:0:5,0,0,0:5,0:0,15,118:15 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:32,6,0:2:2:0:0,0,1,1:0,2:28,2,3:3 0/0:0,6,55:2:0:0:2,0,0,0:2,0:0,7,62:7 0/1:63,0,61:7:4:0:3,0,3,1:3,4:62,0,67:61 0/0:0,24,154:8:0:0:7,1,0,0:8,0:0,24,161:24 0/1:16,0,94:7:2:0:5,0,2,0:5,2:15,0,100:15 0/1:61,0,101:9:4:0:4,1,3,1:5,4:60,0,107:60 0/0:0,3,31:1:0:0:1,0,0,0:1,0:1,5,39:5 0/1:13,3,0:1:1:0:0,0,0,1:0,1:11,1,5:4 0/1:16,0,47:3:1:0:2,0,1,0:2,1:15,0,53:15 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 0/1:46,0,57:5:2:0:3,0,2,0:3,2:45,0,63:45 0/1:50,0,12:6:5:0:1,0,5,0:1,5:49,0,18:18 0/0:0,3,30:1:0:0:1,0,0,0:1,0:1,5,38:5 0/0:0,3,4:1:0:0:1,0,0,0:1,0:1,5,12:4 0/1:18,0,98:5:1:0:2,2,1,0:4,1:17,0,104:17 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:15,0,26:2:1:0:1,0,1,0:1,1:14,0,32:14 0/1:20,0,26:2:1:0:1,0,0,1:1,1:19,0,32:19 0/0:0,6,60:2:0:0:1,1,0,0:2,0:0,7,67:7 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:33,3,0:1:1:0:0,0,1,0:0,1:31,1,5:5 0/1:82,6,0:7:6:0:1,0,5,1:1,6:78,2,3:3 0/0:0,15,126:5:0:0:4,1,0,0:5,0:0,15,133:15 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/1:34,6,0:2:2:0:0,0,2,0:0,2:30,2,3:3 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,6,50:2:0:0:1,1,0,0:2,0:0,7,57:7 0/0:0,3,28:1:0:0:1,0,0,0:1,0:1,5,36:5 0/0:0,6,57:2:0:0:2,0,0,0:2,0:0,7,64:7 0/0:0,18,124:6:0:0:5,1,0,0:6,0:0,18,131:18 0/1:53,6,0:2:2:0:0,0,2,0:0,2:49,2,3:3 0/0:0,12,96:4:0:0:4,0,0,0:4,0:0,12,103:12 0/1:25,3,0:1:1:0:0,0,1,0:0,1:23,1,5:5 1/1:61,9,0:3:3:0:0,0,2,1:0,3:55,4,1:4 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 0/0:0,3,31:1:0:0:0,1,0,0:1,0:1,5,39:5 0/0:0,6,56:2:0:0:2,0,0,0:2,0:0,7,63:7 0/0:0,3,29:1:0:0:1,0,0,0:1,0:1,5,37:5 0/0:0,3,32:1:0:0:1,0,0,0:1,0:1,5,40:5 0/0:0,9,87:3:0:0:2,1,0,0:3,0:0,9,94:9 ./.:0,0,0:0:0:0:0,0,0,0:0,0:0,0,0:0 +KB704451 4157940 . TTGTGTGTGTGTGT TTGTGTGTGTGTGTGTGT,TTTCTGTGTGTGTGTGT 999.0 . INDEL;IDV=7;IMF=0.5;DP=366;VDB=0.0431342;SGB=14.7456;MQSB=0.996953;MQ0F=0.010929;DPR=86,41,8;ICB=0.963728;HOB=0.02;AC=21,6;AN=90;DP4=70,16,39,10;MQ=58;INS;VARTYPE=INS,INS GT:PL:DP:DV:SP:DP4:DPR:GP:GQ 0/2:60,60,60,3,3,0:1:1:0:0,0,1,0:0,0,1:54,55,61,2,5,9:4 1/1:255,18,0,255,18,255:6:6:0:0,0,3,3:0,6,0:248,11,0,252,19,263:11 0/0:0,6,62,6,62,62:2:0:0:2,0,0,0:2,0,0:1,7,70,11,71,78:6 0/1:9,0,238,27,241,255:7:1:0:4,2,1,0:6,1,0:8,0,245,31,249,270:8 0/0:0,12,185,12,185,185:4:0:0:3,1,0,0:4,0,0:0,12,192,16,193,200:11 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 0/1:1,0,158,10,161,165:4:1:0:2,1,1,0:3,1,0:3,2,167,16,171,182:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,24,255,24,255,255:8:0:0:7,1,0,0:8,0,0:0,24,262,28,263,270:23 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:37,0,54,40,57,94:2:1:0:1,0,1,0:1,1,0:36,0,60,44,64,108:35 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:117,6,0,117,6,117:2:2:0:0,0,1,1:0,2,0:113,3,3,118,10,128:2 0/0:0,18,237,18,237,237:6:0:0:5,1,0,0:6,0,0:0,18,244,22,245,252:17 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/2:120,120,120,6,6,0:2:2:0:0,0,1,1:0,0,2:111,112,119,2,6,7:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/1:255,21,0,255,21,255:7:7:0:0,0,5,2:0,7,0:247,14,0,252,22,263:13 0/2:19,31,181,0,157,154:5:1:0:3,1,1,0:4,0,1:14,27,183,0,160,164:14 0/1:120,0,255,141,255,255:10:3:0:6,1,2,1:7,3,0:119,0,261,145,262,269:119 0/0:0,15,212,15,212,212:5:0:0:5,0,0,0:5,0,0:0,15,219,19,220,227:14 0/0:0,15,243,15,243,243:5:0:0:4,1,0,0:5,0,0:0,15,250,19,251,258:14 1/1:40,9,0,40,9,40:3:3:0:0,0,3,0:0,3,0:34,4,2,39,12,50:3 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 0/1:54,0,54,57,57,111:2:1:0:1,0,1,0:1,1,0:53,0,60,61,64,125:51 0/0:0,9,139,9,139,139:3:0:0:3,0,0,0:3,0,0:0,10,146,14,147,154:8 1/1:243,15,0,243,15,243:5:5:0:0,0,4,1:0,5,0:236,9,0,241,16,251:8 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:55,1,0,58,3,57:2:1:0:1,0,1,0:1,1,0:54,1,7,62,11,72:5 0/0:0,9,170,9,170,170:3:0:0:1,2,0,0:3,0,0:0,10,177,14,178,185:8 0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:60,3,0,60,3,60:1:1:0:0,0,0,1:0,1,0:58,2,5,63,9,73:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:60,3,0,60,3,60:1:1:0:0,0,1,0:0,1,0:58,2,5,63,9,73:4 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,15,206,15,206,206:5:0:0:4,1,0,0:5,0,0:0,15,213,19,214,221:14 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 1/2:134,105,96,40,0,34:5:5:0:0,0,5,0:0,3,2:125,97,95,36,0,41:35 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/1:58,6,0,58,6,58:2:2:0:0,0,2,0:0,2,0:54,3,3,59,10,69:2 0/0:0,6,102,6,102,102:2:0:0:1,1,0,0:2,0,0:1,7,110,11,111,118:6 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/1:98,0,88,104,94,191:4:2:0:1,1,2,0:2,2,0:97,0,94,108,101,205:92 0/2:35,35,35,3,3,0:1:1:0:0,0,1,0:0,0,1:29,30,36,2,5,9:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/2:45,45,45,3,3,0:1:1:0:0,0,1,0:0,0,1:39,40,46,2,5,9:4 0/0:0,3,60,3,60,60:1:0:0:0,1,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,110,6,110,110:2:0:0:2,0,0,0:2,0,0:1,7,118,11,119,126:6 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 0/0:0,3,60,3,60,60:1:0:0:1,0,0,0:1,0,0:2,5,69,9,70,77:4 0/0:0,6,120,6,120,120:2:0:0:1,1,0,0:2,0,0:1,7,128,11,129,136:6 ./.:0,0,0,0,0,0:0:0:0:0,0,0,0:0,0,0:0,0,0,0,0,0:0 diff --git a/vcf/test/mixed-filtering.vcf b/vcf/test/mixed-filtering.vcf new file mode 100644 index 0000000..f02e839 --- /dev/null +++ b/vcf/test/mixed-filtering.vcf @@ -0,0 +1,24 @@ +##fileformat=VCFv4.1 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig= +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 +20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 +20 1230237 . T . 47 . NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 +20 1234567 microsat1 GTC G,GTCT 50 q10;q50 NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 diff --git a/vcf/test/parse-meta-line.vcf b/vcf/test/parse-meta-line.vcf new file mode 100644 index 0000000..e3a2611 --- /dev/null +++ b/vcf/test/parse-meta-line.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.1 +##INFO= +##FORMAT= +##MYFIELD= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample +chr1 100 id1 G A . . NS=3 GT 0/1 diff --git a/vcf/test/prof.py b/vcf/test/prof.py index 62c72fe..953d169 100755 --- a/vcf/test/prof.py +++ b/vcf/test/prof.py @@ -1,4 +1,4 @@ -import vcf +import vcf as vcf import cProfile import timeit import pstats diff --git a/vcf/test/samples-space.vcf b/vcf/test/samples-space.vcf new file mode 100644 index 0000000..8c9bb9e --- /dev/null +++ b/vcf/test/samples-space.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.0 +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA 00001 NA 00002 NA 00003 +20 14370 rs6054257 G A 29 PASS . GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 ./. +20 76766 rs6054257 C T 29 PASS . GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 ./. diff --git a/vcf/test/strelka.vcf b/vcf/test/strelka.vcf new file mode 100644 index 0000000..b5aea76 --- /dev/null +++ b/vcf/test/strelka.vcf @@ -0,0 +1,57 @@ +##fileformat=VCFv4.1 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##content=strelka somatic indel calls +##fileDate=20151113 +##germlineIndelTheta=0.0001 +##germlineSnvTheta=0.001 +##priorSomaticIndelRate=1e-06 +##priorSomaticSnvRate=1e-06 +##reference=file:///b37.fasta +##source=strelka +##source_version=2.0.17.strelka1 +##startTime=Fri Nov 13 19:38:43 2015 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL.variant NORMAL.variant2 TUMOR.variant TUMOR.variant2 +1 1666175 . C T . PASS AC=0;AF=0.00;AN=0;NT=ref;QSS=28;QSS_NT=28;SGT=CC->CT;SOMATIC;TQSS=1;TQSS_NT=1;set=variant AU:CU:DP:FDP:GU:SDP:SUBDP:TU 0,0:42,42:43:0:0,0:0:0:1,1 0,0:45,45:59:0:0,0:0:0:14,14 +1 3750492 . G A . PASS AC=0;AF=0.00;AN=0;NT=ref;QSS=38;QSS_NT=38;SGT=GG->AG;SOMATIC;TQSS=2;TQSS_NT=2;set=variant AU:CU:DP:FDP:GU:SDP:SUBDP:TU 0,0:0,0:116:0:116,116:0:0:0,0 6,6:0,0:96:0:90,91:0:0:0,0 +1 9117626 . G A . PASS AC=0;AF=0.00;AN=0;NT=ref;QSS=32;QSS_NT=32;SGT=GG->AG;SOMATIC;TQSS=1;TQSS_NT=1;set=variant AU:CU:DP:FDP:GU:SDP:SUBDP:TU 0,0:0,0:165:0:165,166:0:0:0,0 6,6:0,0:132:0:126,127:0:0:0,0 \ No newline at end of file diff --git a/vcf/test/string_as_flag.vcf b/vcf/test/string_as_flag.vcf new file mode 100644 index 0000000..afa3b0d --- /dev/null +++ b/vcf/test/string_as_flag.vcf @@ -0,0 +1,8 @@ +##fileformat=VCFv4.1 +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT test +chr2 21 . A G . . GT . +chr2 24 . G T . . AB GT . +chr2 48 . C T . . CD GT . +chr2 75 . T C . . AB;CD GT . diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py index e4c3426..f04c8b1 100644 --- a/vcf/test/test_vcf.py +++ b/vcf/test/test_vcf.py @@ -1,12 +1,27 @@ from __future__ import print_function import unittest +try: + unittest.skip +except AttributeError: + import unittest2 as unittest import doctest import os import commands +import cPickle from StringIO import StringIO +import subprocess +import sys + +try: + import pysam +except ImportError: + pysam = None import vcf -from vcf import utils +from vcf import model, utils + +IS_PYTHON2 = sys.version_info[0] == 2 +IS_NOT_PYPY = 'PyPy' not in sys.version suite = doctest.DocTestSuite(vcf) @@ -19,7 +34,7 @@ class TestVcfSpecs(unittest.TestCase): def test_vcf_4_0(self): reader = vcf.Reader(fh('example-4.0.vcf')) - assert reader.metadata['fileformat'] == 'VCFv4.0' + self.assertEqual(reader.metadata['fileformat'], 'VCFv4.0') # test we can walk the file at least for r in reader: @@ -48,10 +63,7 @@ def test_vcf_4_1(self): self.assertEqual(reader.metadata['fileformat'], 'VCFv4.1') # contigs were added in vcf4.1 - # probably need to add a reader.contigs attribute - assert 'contig' in reader.metadata - assert 'ID' in reader.metadata['contig'][0] - assert reader.metadata['contig'][0]['ID'] == '20' + self.assertEqual(reader.contigs['20'].length, 62435964) # test we can walk the file at least for r in reader: @@ -83,25 +95,56 @@ def test_vcf_4_1_bnd(self): for a in r.ALT: print(a) if r.ID == "bnd1": - assert len(r.ALT) == 1 - assert r.ALT[0].type == "BND" - assert r.ALT[0].chr == "2" - assert r.ALT[0].pos == 3 - assert r.ALT[0].orientation == False - assert r.ALT[0].remoteOrientation == True - assert r.ALT[0].connectingSequence == "T" + self.assertEqual(len(r.ALT), 1) + self.assertEqual(r.ALT[0].type, "BND") + self.assertEqual(r.ALT[0].chr, "2") + self.assertEqual(r.ALT[0].pos, 3) + self.assertEqual(r.ALT[0].orientation, False) + self.assertEqual(r.ALT[0].remoteOrientation, True) + self.assertEqual(r.ALT[0].connectingSequence, "T") if r.ID == "bnd4": - assert len(r.ALT) == 1 - assert r.ALT[0].type == "BND" - assert r.ALT[0].chr == "1" - assert r.ALT[0].pos == 2 - assert r.ALT[0].orientation == True - assert r.ALT[0].remoteOrientation == False - assert r.ALT[0].connectingSequence == "G" + self.assertEqual(len(r.ALT), 1) + self.assertEqual(r.ALT[0].type, "BND") + self.assertEqual(r.ALT[0].chr, "1") + self.assertEqual(r.ALT[0].pos, 2) + self.assertEqual(r.ALT[0].orientation, True) + self.assertEqual(r.ALT[0].remoteOrientation, False) + self.assertEqual(r.ALT[0].connectingSequence, "G") for c in r: print(c) assert c + def test_vcf_4_2(self): + reader = vcf.Reader(fh('example-4.2.vcf')) + self.assertEqual(reader.metadata['fileformat'], 'VCFv4.2') + + # If INFO contains no Source and Version keys, they should be None. + self.assertEqual(reader.infos['DP'].source, None) + self.assertEqual(reader.infos['DP'].version, None) + + # According to spec, INFO Version key is required to be double quoted, + # but at least SAMtools 1.0 does not quote it. So we want to be + # forgiving here. + self.assertEqual(reader.infos['VDB'].source, None) + self.assertEqual(reader.infos['VDB'].version, '3') + + # test we can walk the file at least + for r in reader: + for c in r: + assert c + + def test_contig_idonly(self): + """Test VCF inputs with ##contig inputs containing only IDs. produced by bcftools 1.2+ + """ + reader = vcf.Reader(fh("contig_idonly.vcf")) + for cid, contig in reader.contigs.items(): + if cid == "1": + assert contig.length is None + elif cid == "2": + assert contig.length == 2000 + elif cid == "3": + assert contig.length == 3000 + class TestGatkOutput(unittest.TestCase): filename = 'gatk.vcf' @@ -167,7 +210,7 @@ def testParse(self): n+=1 for x in r: assert x - assert n == self.n_calls + self.assertEqual(n, self.n_calls) class TestSamtoolsOutput(unittest.TestCase): @@ -186,12 +229,42 @@ def testParse(self): for s in r.samples: s.phased +class TestIssue214(unittest.TestCase): + """ See https://github.com/jamescasbon/PyVCF/issues/214 """ + + def test_issue_214_is_snp(self): + reader=vcf.Reader(fh('issue-214.vcf')) + r=next(reader) + self.assertTrue(r.is_snp) + + def test_issue_214_var_type(self): + reader=vcf.Reader(fh('issue-214.vcf')) + r=next(reader) + self.assertEqual(r.var_type,'snp') + + # Can the ref even be a spanning deletion? + # Note, this does not trigger issue 214, but I've added it here for completeness + def test_issue_214_ref_is_del_is_snp(self): + reader=vcf.Reader(fh('issue-214.vcf')) + next(reader) + r=next(reader) + self.assertTrue(r.is_snp) + + # Can the ref even be a spanning deletion? + # Note, this does not trigger issue 214, but I've added it here for completeness + def test_issue_214_ref_is_del_var_type(self): + reader=vcf.Reader(fh('issue-214.vcf')) + next(reader) + r=next(reader) + self.assertEqual(r.var_type,'snp') class Test1kg(unittest.TestCase): def testParse(self): reader = vcf.Reader(fh('1kg.vcf.gz', 'rb')) + assert 'FORMAT' in reader._column_headers + self.assertEqual(len(reader.samples), 629) for _ in reader: pass @@ -205,6 +278,171 @@ def test_issue_49(self): pass +class Test1kgSites(unittest.TestCase): + + def test_reader(self): + """The samples attribute should be the empty list.""" + reader = vcf.Reader(fh('1kg.sites.vcf', 'r')) + + assert 'FORMAT' not in reader._column_headers + + self.assertEqual(reader.samples, []) + for record in reader: + self.assertEqual(record.samples, []) + + def test_writer(self): + """FORMAT should not be written if not present in the template and no + extra tab character should be printed if there are no FORMAT fields.""" + reader = vcf.Reader(fh('1kg.sites.vcf', 'r')) + out = StringIO() + writer = vcf.Writer(out, reader, lineterminator='\n') + + for record in reader: + writer.write_record(record) + out.seek(0) + out_str = out.getvalue() + for line in out_str.split('\n'): + if line.startswith('##'): + continue + if line.startswith('#CHROM'): + assert 'FORMAT' not in line + assert not line.endswith('\t') + + +class TestGoNL(unittest.TestCase): + + def testParse(self): + reader = vcf.Reader(fh('gonl.chr20.release4.gtc.vcf')) + for _ in reader: + pass + + def test_contig_line(self): + reader = vcf.Reader(fh('gonl.chr20.release4.gtc.vcf')) + self.assertEqual(reader.contigs['1'].length, 249250621) + + +class TestStringAsFlag(unittest.TestCase): + + def test_string_as_flag(self): + """A flag INFO field is declared as string (not allowed by the spec, + but seen in practice).""" + reader = vcf.Reader(fh('string_as_flag.vcf', 'r')) + for _ in reader: + pass + + +class TestInfoOrder(unittest.TestCase): + + def _assert_order(self, definitions, fields): + """ + Elements common to both lists should be in the same order. Elements + only in `fields` should be last and in alphabetical order. + """ + used_definitions = [d for d in definitions if d in fields] + self.assertEqual(used_definitions, fields[:len(used_definitions)]) + self.assertEqual(fields[len(used_definitions):], + sorted(fields[len(used_definitions):])) + + def test_writer(self): + """ + Order of INFO fields should be compatible with the order of their + definition in the header and undefined fields should be last and in + alphabetical order. + """ + reader = vcf.Reader(fh('1kg.sites.vcf', 'r')) + out = StringIO() + writer = vcf.Writer(out, reader, lineterminator='\n') + + for record in reader: + writer.write_record(record) + out.seek(0) + out_str = out.getvalue() + + definitions = [] + for line in out_str.split('\n'): + if line.startswith('##INFO='): + definitions.append(line.split('ID=')[1].split(',')[0]) + if not line or line.startswith('#'): + continue + fields = [f.split('=')[0] for f in line.split('\t')[7].split(';')] + self._assert_order(definitions, fields) + + +class TestInfoTypeCharacter(unittest.TestCase): + def test_parse(self): + reader = vcf.Reader(fh('info-type-character.vcf')) + record = next(reader) + self.assertEqual(record.INFO['FLOAT_1'], 123.456) + self.assertEqual(record.INFO['CHAR_1'], 'Y') + self.assertEqual(record.INFO['FLOAT_N'], [123.456]) + self.assertEqual(record.INFO['CHAR_N'], ['Y']) + + def test_write(self): + reader = vcf.Reader(fh('info-type-character.vcf')) + out = StringIO() + writer = vcf.Writer(out, reader) + + records = list(reader) + + for record in records: + writer.write_record(record) + out.seek(0) + reader2 = vcf.Reader(out) + + for l, r in zip(records, reader2): + self.assertEquals(l.INFO, r.INFO) + + +class TestBadInfoFields(unittest.TestCase): + def test_parse(self): + reader = vcf.Reader(fh('bad-info-character.vcf')) + record = next(reader) + self.assertEquals(record.INFO['DOT_1'], None) + self.assertEquals(record.INFO['DOT_3'], [None, None, None]) + self.assertEquals(record.INFO['DOT_N'], [None]) + self.assertEquals(record.INFO['EMPTY_1'], None) + # Perhaps EMPTY_3 should yield [None, None, None] but this is really a + # cornercase of unspecified behaviour. + self.assertEquals(record.INFO['EMPTY_3'], [None]) + self.assertEquals(record.INFO['EMPTY_N'], [None]) + self.assertEquals(record.INFO['NOTEMPTY_1'], 1) + self.assertEquals(record.INFO['NOTEMPTY_3'], [1, 2, 3]) + self.assertEquals(record.INFO['NOTEMPTY_N'], [1]) + pass + + +class TestParseMetaLine(unittest.TestCase): + def test_parse(self): + reader = vcf.Reader(fh('parse-meta-line.vcf')) + f = reader.metadata['MYFIELD'][0] + self.assertEqual(f['ID'], 'SomeField') + self.assertEqual(f['Version'], '3.4-0-g7e26428') + self.assertEqual(f['Date'], '"Wed Oct 07 09:11:47 CEST 2015"') + self.assertEqual(f['Options'], '"< 4 and > 3"') + next(reader) + + def test_write(self): + reader = vcf.Reader(fh('parse-meta-line.vcf')) + out = StringIO() + writer = vcf.Writer(out, reader) + + records = list(reader) + + for record in records: + writer.write_record(record) + out.seek(0) + reader2 = vcf.Reader(out) + + f = reader2.metadata['MYFIELD'][0] + self.assertEqual(f['ID'], 'SomeField') + self.assertEqual(f['Version'], '3.4-0-g7e26428') + self.assertEqual(f['Date'], '"Wed Oct 07 09:11:47 CEST 2015"') + self.assertEqual(f['Options'], '"< 4 and > 3"') + + for l, r in zip(records, reader2): + self.assertEquals(l.INFO, r.INFO) + + class TestGatkOutputWriter(unittest.TestCase): def testWrite(self): @@ -218,16 +456,26 @@ def testWrite(self): for record in records: writer.write_record(record) out.seek(0) - print (out.getvalue()) + out_str = out.getvalue() + for line in out_str.split("\n"): + if line.startswith("##contig"): + assert line.startswith('##contig=<'), "Found dictionary in contig line: {0}".format(line) + print (out_str) reader2 = vcf.Reader(out) self.assertEquals(reader.samples, reader2.samples) self.assertEquals(reader.formats, reader2.formats) self.assertEquals(reader.infos, reader2.infos) + self.assertEquals(reader.contigs, reader2.contigs) for l, r in zip(records, reader2): self.assertEquals(l.samples, r.samples) + # test for call data equality, since equality on the sample calls + # may not always mean their data are all equal + for l_call, r_call in zip(l.samples, r.samples): + self.assertEqual(l_call.data, r_call.data) + class TestBcfToolsOutputWriter(unittest.TestCase): @@ -252,6 +500,75 @@ def testWrite(self): for l, r in zip(records, reader2): self.assertEquals(l.samples, r.samples) + # test for call data equality, since equality on the sample calls + # may not always mean their data are all equal + for l_call, r_call in zip(l.samples, r.samples): + self.assertEqual(l_call.data, r_call.data) + + +class TestWriterDictionaryMeta(unittest.TestCase): + + def testWrite(self): + + reader = vcf.Reader(fh('example-4.1-bnd.vcf')) + out = StringIO() + writer = vcf.Writer(out, reader) + + records = list(reader) + + for record in records: + writer.write_record(record) + out.seek(0) + out_str = out.getvalue() + for line in out_str.split("\n"): + if line.startswith("##PEDIGREE"): + self.assertEquals(line, '##PEDIGREE=') + if line.startswith("##SAMPLE"): + assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line) + + +class TestSamplesSpace(unittest.TestCase): + filename = 'samples-space.vcf' + samples = ['NA 00001', 'NA 00002', 'NA 00003'] + def test_samples(self): + self.reader = vcf.Reader(fh(self.filename), strict_whitespace=True) + self.assertEqual(self.reader.samples, self.samples) + + +class TestMetadataWhitespace(unittest.TestCase): + filename = 'metadata-whitespace.vcf' + def test_metadata_whitespace(self): + """ + Test parsing metadata header lines with whitespace. + """ + self.reader = vcf.Reader(fh(self.filename)) + + # Pick one INFO line and assert that we parsed it correctly. + info_indel = self.reader.infos['INDEL'] + assert info_indel.id == 'INDEL' + assert info_indel.num == 0 + assert info_indel.type == 'Flag' + assert info_indel.desc == 'Indicates that the variant is an INDEL.' + + # Test we can walk the file at least. + for r in self.reader: + for c in r: + pass + + +class TestMixedFiltering(unittest.TestCase): + filename = 'mixed-filtering.vcf' + def test_mixed_filtering(self): + """ + Test mix of FILTER values (pass, filtered, no filtering). + """ + reader = vcf.Reader(fh(self.filename)) + self.assertEqual(next(reader).FILTER, []) + self.assertEqual(next(reader).FILTER, ['q10']) + self.assertEqual(next(reader).FILTER, []) + self.assertEqual(next(reader).FILTER, None) + self.assertEqual(next(reader).FILTER, ['q10', 'q50']) + class TestRecord(unittest.TestCase): @@ -262,6 +579,11 @@ def test_num_calls(self): var.num_het + var.num_unknown) self.assertEqual(len(var.samples), num_calls) + def test_dunder_eq(self): + rec = next(vcf.Reader(fh('example-4.0.vcf'))) + self.assertFalse(rec == None) + self.assertFalse(None == rec) + def test_call_rate(self): reader = vcf.Reader(fh('example-4.0.vcf')) for var in reader: @@ -274,6 +596,8 @@ def test_call_rate(self): self.assertEqual(3.0/3.0, call_rate) if var.POS == 1230237: self.assertEqual(3.0/3.0, call_rate) + if var.POS == 1231234: + self.assertEqual(3.0/3.0, call_rate) elif var.POS == 1234567: self.assertEqual(2.0/3.0, call_rate) @@ -282,15 +606,24 @@ def test_aaf(self): for var in reader: aaf = var.aaf if var.POS == 14370: - self.assertEqual(3.0/6.0, aaf) + self.assertEqual([3.0/6.0], aaf) if var.POS == 17330: - self.assertEqual(1.0/6.0, aaf) + self.assertEqual([1.0/6.0], aaf) if var.POS == 1110696: - self.assertEqual(None, aaf) + self.assertEqual([2.0/6.0, 4.0/6.0], aaf) if var.POS == 1230237: - self.assertEqual(0.0/6.0, aaf) + self.assertEqual([0.0/6.0], aaf) + if var.POS == 1231234: + self.assertEqual([2.0/6.0], aaf) elif var.POS == 1234567: - self.assertEqual(None, aaf) + self.assertEqual([2.0/4.0, 1.0/4.0], aaf) + reader = vcf.Reader(fh('example-4.1-ploidy.vcf')) + for var in reader: + aaf = var.aaf + if var.POS == 60034: + self.assertEqual([4.0/6.0], aaf) + elif var.POS == 60387: + self.assertEqual([1.0/3.0], aaf) def test_pi(self): reader = vcf.Reader(fh('example-4.0.vcf')) @@ -304,9 +637,28 @@ def test_pi(self): self.assertEqual(None, pi) if var.POS == 1230237: self.assertEqual(0.0/6.0, pi) + if var.POS == 1231234: + self.assertEqual((6.0/(6.0-1))*(2.0*(1.0/3.0)*(2.0/3.0)) , pi) elif var.POS == 1234567: self.assertEqual(None, pi) + def test_heterozygosity(self): + reader = vcf.Reader(fh('example-4.0.vcf')) + for var in reader: + het = var.heterozygosity + if var.POS == 14370: + self.assertEqual(0.5, het) + if var.POS == 17330: + self.assertEqual(1-((1.0/6)**2 + (5.0/6)**2), het) + if var.POS == 1110696: + self.assertEqual(4.0/9.0, het) + if var.POS == 1230237: + self.assertEqual(0.0, het) + if var.POS == 1231234: + self.assertEqual(4.0/9.0, het) + elif var.POS == 1234567: + self.assertEqual(5.0/8.0, het) + def test_is_snp(self): reader = vcf.Reader(fh('example-4.0.vcf')) for r in reader: @@ -324,9 +676,29 @@ def test_is_snp(self): self.assertEqual(True, is_snp) if var.POS == 1230237: self.assertEqual(False, is_snp) + if var.POS == 1231234: + self.assertEqual(False, is_snp) elif var.POS == 1234567: self.assertEqual(False, is_snp) + + def test_is_snp_for_n_alt(self): + record = model._Record( + '1', + 10, + 'id1', + 'C', + [model._Substitution('N')], + None, + None, + {}, + None, + {}, + None + ) + self.assertTrue(record.is_snp) + + def test_is_indel(self): reader = vcf.Reader(fh('example-4.0.vcf')) for var in reader: @@ -338,6 +710,8 @@ def test_is_indel(self): if var.POS == 1110696: self.assertEqual(False, is_indel) if var.POS == 1230237: + self.assertEqual(False, is_indel) + if var.POS == 1231234: self.assertEqual(True, is_indel) elif var.POS == 1234567: self.assertEqual(True, is_indel) @@ -354,6 +728,8 @@ def test_is_transition(self): self.assertEqual(False, is_trans) if var.POS == 1230237: self.assertEqual(False, is_trans) + if var.POS == 1231234: + self.assertEqual(False, is_trans) elif var.POS == 1234567: self.assertEqual(False, is_trans) @@ -368,6 +744,8 @@ def test_is_deletion(self): if var.POS == 1110696: self.assertEqual(False, is_del) if var.POS == 1230237: + self.assertEqual(False, is_del) + if var.POS == 1231234: self.assertEqual(True, is_del) elif var.POS == 1234567: self.assertEqual(False, is_del) @@ -383,6 +761,8 @@ def test_var_type(self): if var.POS == 1110696: self.assertEqual("snp", type) if var.POS == 1230237: + self.assertEqual("unknown", type) + if var.POS == 1231234: self.assertEqual("indel", type) elif var.POS == 1234567: self.assertEqual("indel", type) @@ -415,6 +795,8 @@ def test_var_subtype(self): if var.POS == 1110696: self.assertEqual("unknown", subtype) if var.POS == 1230237: + self.assertEqual("unknown", subtype) + if var.POS == 1231234: self.assertEqual("del", subtype) elif var.POS == 1234567: self.assertEqual("unknown", subtype) @@ -463,6 +845,8 @@ def test_is_sv(self): self.assertEqual(False, is_sv) if var.POS == 1230237: self.assertEqual(False, is_sv) + if var.POS == 1231234: + self.assertEqual(False, is_sv) elif var.POS == 1234567: self.assertEqual(False, is_sv) @@ -494,6 +878,8 @@ def test_is_sv_precise(self): self.assertEqual(False, is_precise) if var.POS == 1230237: self.assertEqual(False, is_precise) + if var.POS == 1231234: + self.assertEqual(False, is_precise) elif var.POS == 1234567: self.assertEqual(False, is_precise) @@ -525,6 +911,8 @@ def test_sv_end(self): self.assertEqual(None, sv_end) if var.POS == 1230237: self.assertEqual(None, sv_end) + if var.POS == 1231234: + self.assertEqual(None, sv_end) elif var.POS == 1234567: self.assertEqual(None, sv_end) @@ -541,14 +929,277 @@ def test_qual(self): expected = 1e+03 if var.POS == 1230237: expected = 47 + if var.POS == 1231234: + expected = 46 elif var.POS == 1234567: expected = None self.assertEqual(expected, qual) self.assertEqual(type(expected), qtype) + def test_info_multiple_values(self): + reader = vcf.Reader(fh('example-4.1-info-multiple-values.vcf')) + var = next(reader) + # check Float type INFO field with multiple values + expected = [19.3, 47.4, 14.0] + actual = var.INFO['RepeatCopies'] + self.assertEqual(expected, actual) + # check Integer type INFO field with multiple values + expected = [42, 14, 56] + actual = var.INFO['RepeatSize'] + self.assertEqual(expected, actual) + # check String type INFO field with multiple values + expected = ['TCTTATCTTCTTACTTTTCATTCCTTACTCTTACTTACTTAC', 'TTACTCTTACTTAC', 'TTACTCTTACTTACTTACTCTTACTTACTTACTCTTACTTACTTACTCTTATCTTC'] + actual = var.INFO['RepeatConsensus'] + self.assertEqual(expected, actual) + + def test_pickle(self): + reader = vcf.Reader(fh('example-4.0.vcf')) + for var in reader: + self.assertEqual(cPickle.loads(cPickle.dumps(var)), var) + + + def assert_has_expected_coordinates( + self, + record, + expected_coordinates, + expected_affected_coordinates + ): + self.assertEqual( + (record.start, record.end), + expected_coordinates + ) + self.assertEqual( + (record.affected_start, record.affected_end), + expected_affected_coordinates + ) + + + def test_coordinates_for_snp(self): + record = model._Record( + '1', + 10, + 'id1', + 'C', + [model._Substitution('A')], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) + + + def test_coordinates_for_insertion(self): + record = model._Record( + '1', + 10, + 'id2', + 'C', + [model._Substitution('CTA')], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 10), (10, 10)) + + + def test_coordinates_for_deletion(self): + record = model._Record( + '1', + 10, + 'id3', + 'CTA', + [model._Substitution('C')], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 12), (10, 12)) + + + def test_coordinates_for_None_alt(self): + record = model._Record( + '1', + 10, + 'id4', + 'C', + [None], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) + + + def test_coordinates_for_multiple_snps(self): + record = model._Record( + '1', + 10, + 'id5', + 'C', + [ + model._Substitution('A'), + model._Substitution('G'), + model._Substitution('T') + ], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) + + + def test_coordinates_for_insert_and_snp(self): + record = model._Record( + '1', + 10, + 'id6', + 'C', + [ + model._Substitution('GTA'), + model._Substitution('G'), + ], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) + record = model._Record( + '1', + 10, + 'id7', + 'C', + [ + model._Substitution('G'), + model._Substitution('GTA'), + ], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) + + + def test_coordinates_for_snp_and_deletion(self): + record = model._Record( + '1', + 10, + 'id8', + 'CTA', + [ + model._Substitution('C'), + model._Substitution('CTG'), + ], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 12), (10, 12)) + record = model._Record( + '1', + 10, + 'id9', + 'CTA', + [ + model._Substitution('CTG'), + model._Substitution('C'), + ], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 12), (10, 12)) + + + def test_coordinates_for_insertion_and_deletion(self): + record = model._Record( + '1', + 10, + 'id10', + 'CT', + [ + model._Substitution('CA'), + model._Substitution('CTT'), + ], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 11), (10, 11)) + record = model._Record( + '1', + 10, + 'id11', + 'CT', + [ + model._Substitution('CTT'), + model._Substitution('CA'), + ], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 11), (10, 11)) + + + def test_coordinates_for_breakend(self): + record = model._Record( + '1', + 10, + 'id12', + 'CTA', + [model._Breakend('1', 500, False, True, 'GGTC', True)], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 12), (9, 12)) + class TestCall(unittest.TestCase): + def test_dunder_eq(self): + reader = vcf.Reader(fh('example-4.0.vcf')) + var = next(reader) + example_call = var.samples[0] + self.assertFalse(example_call == None) + self.assertFalse(None == example_call) + def test_phased(self): reader = vcf.Reader(fh('example-4.0.vcf')) for var in reader: @@ -561,6 +1212,8 @@ def test_phased(self): self.assertEqual([True, True, False], phases) if var.POS == 1230237: self.assertEqual([True, True, False], phases) + if var.POS == 1231234: + self.assertEqual([True, True, True], phases) elif var.POS == 1234567: self.assertEqual([False, False, False], phases) @@ -576,6 +1229,8 @@ def test_gt_bases(self): self.assertEqual(['G|T', 'T|G', 'T/T'], gt_bases) elif var.POS == 1230237: self.assertEqual(['T|T', 'T|T', 'T/T'], gt_bases) + elif var.POS == 1231234: + self.assertEqual(['A|A', 'AT|AT', 'AT|AT'], gt_bases) elif var.POS == 1234567: self.assertEqual([None, 'GTCT/GTACT', 'G/G'], gt_bases) @@ -593,43 +1248,173 @@ def test_gt_types(self): self.assertEqual([1,1,2], gt_types) elif var.POS == 1230237: self.assertEqual([0,0,0], gt_types) + elif var.POS == 1231234: + self.assertEqual([2,0,0], gt_types) elif var.POS == 1234567: self.assertEqual([None,1,2], gt_types) -class TestTabix(unittest.TestCase): + +@unittest.skipUnless(pysam, "test requires installation of PySAM.") +class TestFetch(unittest.TestCase): def setUp(self): self.reader = vcf.Reader(fh('tb.vcf.gz', 'rb')) - self.run = vcf.parser.pysam is not None + + def assertFetchedExpectedPositions( + self, fetched_variants, expected_positions): + fetched_positions = [var.POS for var in fetched_variants] + self.assertEqual(fetched_positions, expected_positions) + + + def testNoVariantsInRange(self): + fetched_variants = self.reader.fetch('20', 14370, 17329) + self.assertFetchedExpectedPositions(fetched_variants, []) + + + def testNoVariantsForZeroLengthInterval(self): + fetched_variants = self.reader.fetch('20', 14369, 14369) + self.assertFetchedExpectedPositions(fetched_variants, []) def testFetchRange(self): - if not self.run: - return - lines = list(self.reader.fetch('20', 14370, 14370)) - self.assertEquals(len(lines), 1) - self.assertEqual(lines[0].POS, 14370) + fetched_variants = self.reader.fetch('20', 14369, 14370) + self.assertFetchedExpectedPositions(fetched_variants, [14370]) + + fetched_variants = self.reader.fetch('20', 14369, 17330) + self.assertFetchedExpectedPositions( + fetched_variants, [14370, 17330]) - lines = list(self.reader.fetch('20', 14370, 17330)) - self.assertEquals(len(lines), 2) - self.assertEqual(lines[0].POS, 14370) - self.assertEqual(lines[1].POS, 17330) + fetched_variants = self.reader.fetch('20', 1110695, 1234567) + self.assertFetchedExpectedPositions( + fetched_variants, [1110696, 1230237, 1234567]) - lines = list(self.reader.fetch('20', 1110695, 1234567)) - self.assertEquals(len(lines), 3) + def testFetchesFromStartIfStartOnlySpecified(self): + fetched_variants = self.reader.fetch('20', 1110695) + self.assertFetchedExpectedPositions( + fetched_variants, [1110696, 1230237, 1234567]) - def testFetchSite(self): - if not self.run: - return - site = self.reader.fetch('20', 14370) - assert site.POS == 14370 - site = self.reader.fetch('20', 14369) - assert site is None + def testFetchesAllFromChromIfOnlyChromSpecified(self): + fetched_variants = self.reader.fetch('20') + self.assertFetchedExpectedPositions( + fetched_variants, + [14370, 17330, 1110696, 1230237, 1234567] + ) +@unittest.skipUnless(pysam, "test requires installation of PySAM.") +class TestIssue201(unittest.TestCase): + def setUp(self): + # This file contains some non-ASCII characters in a UTF-8 encoding. + # https://github.com/jamescasbon/PyVCF/issues/201 + self.reader = vcf.Reader(fh('issue-201.vcf.gz', 'rb'), + encoding='utf-8') + + def testIterate(self): + for record in self.reader: + # Should not raise decoding errors. + pass + + def testFetch(self): + for record in self.reader.fetch(chrom='17'): + # Should not raise decoding errors. + pass + + +class TestIssue234(unittest.TestCase): + """ See https://github.com/jamescasbon/PyVCF/issues/234 """ + + def test_vcf_metadata_parser_doesnt_break_with_empty_number_tags(self): + parser = vcf.parser._vcf_metadata_parser() + num_str = '##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AB00001 AB00002 AB00003 +20 14370 rs6054257 G A 29 PASS NS=3 GT 0|0 ./. 1/1 +20 17330 . T A 3 q10 NS=3 GT ././. 0|1 0/0 +20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3 GT 0/1 0/2 . +20 1234568 . GTC G,GTCT 50 PASS NS=3 GT ./1 0/. . diff --git a/vcf/test/walk_left.vcf b/vcf/test/walk_left.vcf index c910432..aafb82b 100644 --- a/vcf/test/walk_left.vcf +++ b/vcf/test/walk_left.vcf @@ -21,4 +21,5 @@ 19 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:65,3 20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:65,4 20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2:65,3 +20 1231234 . AT A 46 PASS NS=3;DP=15;AA=A GT:GQ:DP:HQ 1|1:23:7:26,30 0|0:27:9:56,60 0|0:31:10:65,71 21 1234567 microsat1 GTCT G,GTACT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP ./.:35:4 0/2:17:2 1/1:40:3 diff --git a/vcf/test/walk_refcall.vcf b/vcf/test/walk_refcall.vcf new file mode 100644 index 0000000..e93aeff --- /dev/null +++ b/vcf/test/walk_refcall.vcf @@ -0,0 +1,22 @@ +##fileformat=VCFv4.0 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=1000GenomesPilot-NCBI36 +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G . 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 0|0:48:8:51,51 0/0:43:5:.,. +20 17330 . T . 3.0 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|0:3:5:65,3 0/0:41:3 +20 1110696 rs6040355 A . 1e+03 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 0|0:21:6:23,27 0|0:2:0:18,2 0/0:35:4 +20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 diff --git a/vcf/utils.py b/vcf/utils.py index ec6f686..2881dc2 100644 --- a/vcf/utils.py +++ b/vcf/utils.py @@ -2,43 +2,56 @@ Utilities for VCF files. """ +def walk_together(*readers, **kwargs): + """ + Simultaneously iteratate over two or more VCF readers. For each + genomic position with a variant, return a list of size equal to the number + of VCF readers. This list contains the VCF record from readers that have + this variant, and None for readers that don't have it. + The caller must make sure that inputs are sorted in the same way and use the + same reference otherwise behaviour is undefined. -def walk_together(*readers): - """ Simultaneously iteratate two or more VCF readers and return - lists of concurrent records from each - reader, with None if no record present. Caller must check the - inputs are sorted in the same way and use the same reference - otherwise behaviour is undefined. + Args: + vcf_record_sort_key: function that takes a VCF record and returns a + tuple that can be used as a key for comparing and sorting VCF + records across all readers. This tuple defines what it means for two + variants to be equal (eg. whether it's only their position or also + their allele values), and implicitly determines the chromosome + ordering since the tuple's 1st element is typically the chromosome + name (or calculated from it). """ - # if one of the VCFs has no records, StopIteration is - # raised immediately, so we need to check for that and - # deal appropriately + if 'vcf_record_sort_key' in kwargs: + get_key = kwargs['vcf_record_sort_key'] + else: + get_key = lambda r: (r.CHROM, r.POS) #, r.REF, r.ALT) + nexts = [] for reader in readers: try: - nexts.append(reader.next()) + nexts.append(next(reader)) except StopIteration: nexts.append(None) - while True: - min_next = min([x for x in nexts if x is not None]) - - # this line uses equality on Records, which checks the ALTs - # not sure what to do with records that have overlapping but different - # variation - yield [x if x is None or x == min_next else None for x in nexts] + min_k = (None,) # keep track of the previous min key's contig + while any([r is not None for r in nexts]): + next_idx_to_k = dict( + (i, get_key(r)) for i, r in enumerate(nexts) if r is not None) + keys_with_prev_contig = [ + k for k in next_idx_to_k.values() if k[0] == min_k[0]] - # update nexts that we just yielded - for i, n in enumerate(nexts): + if any(keys_with_prev_contig): + min_k = min(keys_with_prev_contig) # finish previous contig + else: + min_k = min(next_idx_to_k.values()) # move on to next contig - if n is not None and n == min_next: - try: - nexts[i] = readers[i].next() - except StopIteration: - nexts[i] = None + min_k_idxs = set([i for i, k in next_idx_to_k.items() if k == min_k]) + yield [nexts[i] if i in min_k_idxs else None for i in range(len(nexts))] - if all([x is None for x in nexts]): - break + for i in min_k_idxs: + try: + nexts[i] = readers[i].next() + except StopIteration: + nexts[i] = None def trim_common_suffix(*sequences):